r600_shader.c revision e973221538d5edfad62abedf5b37a4fb774d71fc
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_pipe.h" 29#include "r600_asm.h" 30#include "r600_sq.h" 31#include "r600_opcodes.h" 32#include "r600d.h" 33#include <stdio.h> 34#include <errno.h> 35 36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) 37{ 38 struct r600_pipe_state *rstate = &shader->rstate; 39 struct r600_shader *rshader = &shader->shader; 40 unsigned spi_vs_out_id[10]; 41 unsigned i, tmp; 42 43 /* clear previous register */ 44 rstate->nregs = 0; 45 46 /* so far never got proper semantic id from tgsi */ 47 for (i = 0; i < 10; i++) { 48 spi_vs_out_id[i] = 0; 49 } 50 for (i = 0; i < 32; i++) { 51 tmp = i << ((i & 3) * 8); 52 spi_vs_out_id[i / 4] |= tmp; 53 } 54 for (i = 0; i < 10; i++) { 55 r600_pipe_state_add_reg(rstate, 56 R_028614_SPI_VS_OUT_ID_0 + i * 4, 57 spi_vs_out_id[i], 0xFFFFFFFF, NULL); 58 } 59 60 r600_pipe_state_add_reg(rstate, 61 R_0286C4_SPI_VS_OUT_CONFIG, 62 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), 63 0xFFFFFFFF, NULL); 64 r600_pipe_state_add_reg(rstate, 65 R_028868_SQ_PGM_RESOURCES_VS, 66 S_028868_NUM_GPRS(rshader->bc.ngpr) | 67 S_028868_STACK_SIZE(rshader->bc.nstack), 68 0xFFFFFFFF, NULL); 69 r600_pipe_state_add_reg(rstate, 70 R_0288A4_SQ_PGM_RESOURCES_FS, 71 0x00000000, 0xFFFFFFFF, NULL); 72 r600_pipe_state_add_reg(rstate, 73 R_0288D0_SQ_PGM_CF_OFFSET_VS, 74 0x00000000, 0xFFFFFFFF, NULL); 75 r600_pipe_state_add_reg(rstate, 76 R_0288DC_SQ_PGM_CF_OFFSET_FS, 77 0x00000000, 0xFFFFFFFF, NULL); 78 r600_pipe_state_add_reg(rstate, 79 R_028858_SQ_PGM_START_VS, 80 0x00000000, 0xFFFFFFFF, shader->bo); 81 r600_pipe_state_add_reg(rstate, 82 R_028894_SQ_PGM_START_FS, 83 0x00000000, 0xFFFFFFFF, shader->bo); 84} 85 86int r600_find_vs_semantic_index(struct r600_shader *vs, 87 struct r600_shader *ps, int id) 88{ 89 struct r600_shader_io *input = &ps->input[id]; 90 91 for (int i = 0; i < vs->noutput; i++) { 92 if (input->name == vs->output[i].name && 93 input->sid == vs->output[i].sid) { 94 return i - 1; 95 } 96 } 97 return 0; 98} 99 100static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) 101{ 102 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 103 struct r600_pipe_state *rstate = &shader->rstate; 104 struct r600_shader *rshader = &shader->shader; 105 unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z; 106 boolean have_pos = FALSE, have_face = FALSE; 107 108 /* clear previous register */ 109 rstate->nregs = 0; 110 111 for (i = 0; i < rshader->ninput; i++) { 112 tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); 113 tmp |= S_028644_SEL_CENTROID(1); 114 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 115 have_pos = TRUE; 116 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || 117 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || 118 rshader->input[i].name == TGSI_SEMANTIC_POSITION) { 119 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); 120 } 121 if (rshader->input[i].name == TGSI_SEMANTIC_FACE) 122 have_face = TRUE; 123 if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && 124 rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { 125 tmp |= S_028644_PT_SPRITE_TEX(1); 126 } 127 r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); 128 } 129 for (i = 0; i < rshader->noutput; i++) { 130 r600_pipe_state_add_reg(rstate, 131 R_02880C_DB_SHADER_CONTROL, 132 S_02880C_Z_EXPORT_ENABLE(1), 133 S_02880C_Z_EXPORT_ENABLE(1), NULL); 134 } 135 136 exports_ps = 0; 137 num_cout = 0; 138 for (i = 0; i < rshader->noutput; i++) { 139 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 140 exports_ps |= 1; 141 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { 142 num_cout++; 143 } 144 } 145 exports_ps |= S_028854_EXPORT_COLORS(num_cout); 146 if (!exports_ps) { 147 /* always at least export 1 component per pixel */ 148 exports_ps = 2; 149 } 150 151 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | 152 S_0286CC_PERSP_GRADIENT_ENA(1); 153 spi_input_z = 0; 154 if (have_pos) { 155 spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | 156 S_0286CC_BARYC_SAMPLE_CNTL(1); 157 spi_input_z |= 1; 158 } 159 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); 160 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL); 161 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); 162 r600_pipe_state_add_reg(rstate, 163 R_028840_SQ_PGM_START_PS, 164 0x00000000, 0xFFFFFFFF, shader->bo); 165 r600_pipe_state_add_reg(rstate, 166 R_028850_SQ_PGM_RESOURCES_PS, 167 S_028868_NUM_GPRS(rshader->bc.ngpr) | 168 S_028868_STACK_SIZE(rshader->bc.nstack), 169 0xFFFFFFFF, NULL); 170 r600_pipe_state_add_reg(rstate, 171 R_028854_SQ_PGM_EXPORTS_PS, 172 exports_ps, 0xFFFFFFFF, NULL); 173 r600_pipe_state_add_reg(rstate, 174 R_0288CC_SQ_PGM_CF_OFFSET_PS, 175 0x00000000, 0xFFFFFFFF, NULL); 176 177 if (rshader->uses_kill) { 178 /* only set some bits here, the other bits are set in the dsa state */ 179 r600_pipe_state_add_reg(rstate, 180 R_02880C_DB_SHADER_CONTROL, 181 S_02880C_KILL_ENABLE(1), 182 S_02880C_KILL_ENABLE(1), NULL); 183 } 184} 185 186static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 187{ 188 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 189 struct r600_shader *rshader = &shader->shader; 190 void *ptr; 191 192 /* copy new shader */ 193 if (shader->bo == NULL) { 194 shader->bo = radeon_ws_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0); 195 if (shader->bo == NULL) { 196 return -ENOMEM; 197 } 198 ptr = radeon_ws_bo_map(rctx->radeon, shader->bo, 0, NULL); 199 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); 200 radeon_ws_bo_unmap(rctx->radeon, shader->bo); 201 } 202 /* build state */ 203 rshader->flat_shade = rctx->flatshade; 204 switch (rshader->processor_type) { 205 case TGSI_PROCESSOR_VERTEX: 206 if (rshader->family >= CHIP_CEDAR) { 207 evergreen_pipe_shader_vs(ctx, shader); 208 } else { 209 r600_pipe_shader_vs(ctx, shader); 210 } 211 break; 212 case TGSI_PROCESSOR_FRAGMENT: 213 if (rshader->family >= CHIP_CEDAR) { 214 evergreen_pipe_shader_ps(ctx, shader); 215 } else { 216 r600_pipe_shader_ps(ctx, shader); 217 } 218 break; 219 default: 220 return -EINVAL; 221 } 222 r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); 223 return 0; 224} 225 226static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) 227{ 228 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 229 struct r600_shader *shader = &rshader->shader; 230 const struct util_format_description *desc; 231 enum pipe_format resource_format[160]; 232 unsigned i, nresources = 0; 233 struct r600_bc *bc = &shader->bc; 234 struct r600_bc_cf *cf; 235 struct r600_bc_vtx *vtx; 236 237 if (shader->processor_type != TGSI_PROCESSOR_VERTEX) 238 return 0; 239 if (!memcmp(&rshader->vertex_elements, rctx->vertex_elements, sizeof(struct r600_vertex_element))) { 240 return 0; 241 } 242 rshader->vertex_elements = *rctx->vertex_elements; 243 for (i = 0; i < rctx->vertex_elements->count; i++) { 244 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; 245 } 246 radeon_ws_bo_reference(rctx->radeon, &rshader->bo, NULL); 247 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 248 switch (cf->inst) { 249 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 250 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 251 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 252 desc = util_format_description(resource_format[vtx->buffer_id]); 253 if (desc == NULL) { 254 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); 255 return -EINVAL; 256 } 257 vtx->dst_sel_x = desc->swizzle[0]; 258 vtx->dst_sel_y = desc->swizzle[1]; 259 vtx->dst_sel_z = desc->swizzle[2]; 260 vtx->dst_sel_w = desc->swizzle[3]; 261 } 262 break; 263 default: 264 break; 265 } 266 } 267 return r600_bc_build(&shader->bc); 268} 269 270int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader) 271{ 272 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 273 int r; 274 275 if (shader == NULL) 276 return -EINVAL; 277 /* there should be enough input */ 278 if (rctx->vertex_elements->count < shader->shader.bc.nresource) { 279 R600_ERR("%d resources provided, expecting %d\n", 280 rctx->vertex_elements->count, shader->shader.bc.nresource); 281 return -EINVAL; 282 } 283 r = r600_shader_update(ctx, shader); 284 if (r) 285 return r; 286 return r600_pipe_shader(ctx, shader); 287} 288 289int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 290int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) 291{ 292 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 293 int r; 294 295//fprintf(stderr, "--------------------------------------------------------------\n"); 296//tgsi_dump(tokens, 0); 297 shader->shader.family = r600_get_family(rctx->radeon); 298 r = r600_shader_from_tgsi(tokens, &shader->shader); 299 if (r) { 300 R600_ERR("translation from TGSI failed !\n"); 301 return r; 302 } 303 r = r600_bc_build(&shader->shader.bc); 304 if (r) { 305 R600_ERR("building bytecode failed !\n"); 306 return r; 307 } 308//fprintf(stderr, "______________________________________________________________\n"); 309 return 0; 310} 311 312/* 313 * tgsi -> r600 shader 314 */ 315struct r600_shader_tgsi_instruction; 316 317struct r600_shader_ctx { 318 struct tgsi_shader_info info; 319 struct tgsi_parse_context parse; 320 const struct tgsi_token *tokens; 321 unsigned type; 322 unsigned file_offset[TGSI_FILE_COUNT]; 323 unsigned temp_reg; 324 struct r600_shader_tgsi_instruction *inst_info; 325 struct r600_bc *bc; 326 struct r600_shader *shader; 327 u32 value[4]; 328 u32 *literals; 329 u32 nliterals; 330 u32 max_driver_temp_used; 331}; 332 333struct r600_shader_tgsi_instruction { 334 unsigned tgsi_opcode; 335 unsigned is_op3; 336 unsigned r600_opcode; 337 int (*process)(struct r600_shader_ctx *ctx); 338}; 339 340static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 341static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 342 343static int tgsi_is_supported(struct r600_shader_ctx *ctx) 344{ 345 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 346 int j; 347 348 if (i->Instruction.NumDstRegs > 1) { 349 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 350 return -EINVAL; 351 } 352 if (i->Instruction.Predicate) { 353 R600_ERR("predicate unsupported\n"); 354 return -EINVAL; 355 } 356#if 0 357 if (i->Instruction.Label) { 358 R600_ERR("label unsupported\n"); 359 return -EINVAL; 360 } 361#endif 362 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 363 if (i->Src[j].Register.Dimension || 364 i->Src[j].Register.Absolute) { 365 R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j, 366 i->Src[j].Register.Dimension, 367 i->Src[j].Register.Absolute); 368 return -EINVAL; 369 } 370 } 371 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 372 if (i->Dst[j].Register.Dimension) { 373 R600_ERR("unsupported dst (dimension)\n"); 374 return -EINVAL; 375 } 376 } 377 return 0; 378} 379 380static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) 381{ 382 int i, r; 383 struct r600_bc_alu alu; 384 385 for (i = 0; i < 8; i++) { 386 memset(&alu, 0, sizeof(struct r600_bc_alu)); 387 388 if (i < 4) 389 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 390 else 391 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 392 393 if ((i > 1) && (i < 6)) { 394 alu.dst.sel = ctx->shader->input[gpr].gpr; 395 alu.dst.write = 1; 396 } 397 398 alu.dst.chan = i % 4; 399 alu.src[0].chan = (1 - (i % 2)); 400 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr; 401 402 alu.bank_swizzle_force = SQ_ALU_VEC_210; 403 if ((i % 4) == 3) 404 alu.last = 1; 405 r = r600_bc_add_alu(ctx->bc, &alu); 406 if (r) 407 return r; 408 } 409 return 0; 410} 411 412 413static int tgsi_declaration(struct r600_shader_ctx *ctx) 414{ 415 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 416 struct r600_bc_vtx vtx; 417 unsigned i; 418 int r; 419 420 switch (d->Declaration.File) { 421 case TGSI_FILE_INPUT: 422 i = ctx->shader->ninput++; 423 ctx->shader->input[i].name = d->Semantic.Name; 424 ctx->shader->input[i].sid = d->Semantic.Index; 425 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 426 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 427 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 428 /* turn input into fetch */ 429 memset(&vtx, 0, sizeof(struct r600_bc_vtx)); 430 vtx.inst = 0; 431 vtx.fetch_type = 0; 432 vtx.buffer_id = i; 433 /* register containing the index into the buffer */ 434 vtx.src_gpr = 0; 435 vtx.src_sel_x = 0; 436 vtx.mega_fetch_count = 0x1F; 437 vtx.dst_gpr = ctx->shader->input[i].gpr; 438 vtx.dst_sel_x = 0; 439 vtx.dst_sel_y = 1; 440 vtx.dst_sel_z = 2; 441 vtx.dst_sel_w = 3; 442 vtx.use_const_fields = 1; 443 r = r600_bc_add_vtx(ctx->bc, &vtx); 444 if (r) 445 return r; 446 } 447 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { 448 /* turn input into interpolate on EG */ 449 evergreen_interp_alu(ctx, i); 450 } 451 break; 452 case TGSI_FILE_OUTPUT: 453 i = ctx->shader->noutput++; 454 ctx->shader->output[i].name = d->Semantic.Name; 455 ctx->shader->output[i].sid = d->Semantic.Index; 456 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 457 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 458 break; 459 case TGSI_FILE_CONSTANT: 460 case TGSI_FILE_TEMPORARY: 461 case TGSI_FILE_SAMPLER: 462 case TGSI_FILE_ADDRESS: 463 break; 464 default: 465 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 466 return -EINVAL; 467 } 468 return 0; 469} 470 471static int r600_get_temp(struct r600_shader_ctx *ctx) 472{ 473 return ctx->temp_reg + ctx->max_driver_temp_used++; 474} 475 476int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 477{ 478 struct tgsi_full_immediate *immediate; 479 struct r600_shader_ctx ctx; 480 struct r600_bc_output output[32]; 481 unsigned output_done, noutput; 482 unsigned opcode; 483 int i, r = 0, pos0; 484 485 ctx.bc = &shader->bc; 486 ctx.shader = shader; 487 r = r600_bc_init(ctx.bc, shader->family); 488 if (r) 489 return r; 490 ctx.bc->use_mem_constant = shader->use_mem_constant; 491 ctx.tokens = tokens; 492 tgsi_scan_shader(tokens, &ctx.info); 493 tgsi_parse_init(&ctx.parse, tokens); 494 ctx.type = ctx.parse.FullHeader.Processor.Processor; 495 shader->processor_type = ctx.type; 496 497 /* register allocations */ 498 /* Values [0,127] correspond to GPR[0..127]. 499 * Values [128,159] correspond to constant buffer bank 0 500 * Values [160,191] correspond to constant buffer bank 1 501 * Values [256,511] correspond to cfile constants c[0..255]. 502 * Other special values are shown in the list below. 503 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 504 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 505 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 506 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 507 * 248 SQ_ALU_SRC_0: special constant 0.0. 508 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 509 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 510 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 511 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 512 * 253 SQ_ALU_SRC_LITERAL: literal constant. 513 * 254 SQ_ALU_SRC_PV: previous vector result. 514 * 255 SQ_ALU_SRC_PS: previous scalar result. 515 */ 516 for (i = 0; i < TGSI_FILE_COUNT; i++) { 517 ctx.file_offset[i] = 0; 518 } 519 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 520 ctx.file_offset[TGSI_FILE_INPUT] = 1; 521 } 522 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 523 ctx.info.file_count[TGSI_FILE_INPUT]; 524 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 525 ctx.info.file_count[TGSI_FILE_OUTPUT]; 526 if (ctx.shader->use_mem_constant) 527 ctx.file_offset[TGSI_FILE_CONSTANT] = 128; 528 else 529 ctx.file_offset[TGSI_FILE_CONSTANT] = 256; 530 531 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 532 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 533 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 534 535 ctx.nliterals = 0; 536 ctx.literals = NULL; 537 538 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 539 tgsi_parse_token(&ctx.parse); 540 switch (ctx.parse.FullToken.Token.Type) { 541 case TGSI_TOKEN_TYPE_IMMEDIATE: 542 immediate = &ctx.parse.FullToken.FullImmediate; 543 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 544 if(ctx.literals == NULL) { 545 r = -ENOMEM; 546 goto out_err; 547 } 548 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 549 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 550 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 551 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 552 ctx.nliterals++; 553 break; 554 case TGSI_TOKEN_TYPE_DECLARATION: 555 r = tgsi_declaration(&ctx); 556 if (r) 557 goto out_err; 558 break; 559 case TGSI_TOKEN_TYPE_INSTRUCTION: 560 r = tgsi_is_supported(&ctx); 561 if (r) 562 goto out_err; 563 ctx.max_driver_temp_used = 0; 564 /* reserve first tmp for everyone */ 565 r600_get_temp(&ctx); 566 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 567 if (ctx.bc->chiprev == 2) 568 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 569 else 570 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 571 r = ctx.inst_info->process(&ctx); 572 if (r) 573 goto out_err; 574 r = r600_bc_add_literal(ctx.bc, ctx.value); 575 if (r) 576 goto out_err; 577 break; 578 default: 579 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 580 r = -EINVAL; 581 goto out_err; 582 } 583 } 584 /* export output */ 585 noutput = shader->noutput; 586 for (i = 0, pos0 = 0; i < noutput; i++) { 587 memset(&output[i], 0, sizeof(struct r600_bc_output)); 588 output[i].gpr = shader->output[i].gpr; 589 output[i].elem_size = 3; 590 output[i].swizzle_x = 0; 591 output[i].swizzle_y = 1; 592 output[i].swizzle_z = 2; 593 output[i].swizzle_w = 3; 594 output[i].barrier = 1; 595 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 596 output[i].array_base = i - pos0; 597 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 598 switch (ctx.type) { 599 case TGSI_PROCESSOR_VERTEX: 600 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 601 output[i].array_base = 60; 602 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 603 /* position doesn't count in array_base */ 604 pos0++; 605 } 606 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 607 output[i].array_base = 61; 608 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 609 /* position doesn't count in array_base */ 610 pos0++; 611 } 612 break; 613 case TGSI_PROCESSOR_FRAGMENT: 614 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 615 output[i].array_base = shader->output[i].sid; 616 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 617 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 618 output[i].array_base = 61; 619 output[i].swizzle_x = 2; 620 output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7; 621 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 622 } else { 623 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 624 r = -EINVAL; 625 goto out_err; 626 } 627 break; 628 default: 629 R600_ERR("unsupported processor type %d\n", ctx.type); 630 r = -EINVAL; 631 goto out_err; 632 } 633 } 634 /* add fake param output for vertex shader if no param is exported */ 635 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 636 for (i = 0, pos0 = 0; i < noutput; i++) { 637 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 638 pos0 = 1; 639 break; 640 } 641 } 642 if (!pos0) { 643 memset(&output[i], 0, sizeof(struct r600_bc_output)); 644 output[i].gpr = 0; 645 output[i].elem_size = 3; 646 output[i].swizzle_x = 0; 647 output[i].swizzle_y = 1; 648 output[i].swizzle_z = 2; 649 output[i].swizzle_w = 3; 650 output[i].barrier = 1; 651 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 652 output[i].array_base = 0; 653 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 654 noutput++; 655 } 656 } 657 /* add fake pixel export */ 658 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 659 memset(&output[0], 0, sizeof(struct r600_bc_output)); 660 output[0].gpr = 0; 661 output[0].elem_size = 3; 662 output[0].swizzle_x = 7; 663 output[0].swizzle_y = 7; 664 output[0].swizzle_z = 7; 665 output[0].swizzle_w = 7; 666 output[0].barrier = 1; 667 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 668 output[0].array_base = 0; 669 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 670 noutput++; 671 } 672 /* set export done on last export of each type */ 673 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 674 if (i == (noutput - 1)) { 675 output[i].end_of_program = 1; 676 } 677 if (!(output_done & (1 << output[i].type))) { 678 output_done |= (1 << output[i].type); 679 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 680 } 681 } 682 /* add output to bytecode */ 683 for (i = 0; i < noutput; i++) { 684 r = r600_bc_add_output(ctx.bc, &output[i]); 685 if (r) 686 goto out_err; 687 } 688 free(ctx.literals); 689 tgsi_parse_free(&ctx.parse); 690 return 0; 691out_err: 692 free(ctx.literals); 693 tgsi_parse_free(&ctx.parse); 694 return r; 695} 696 697static int tgsi_unsupported(struct r600_shader_ctx *ctx) 698{ 699 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 700 return -EINVAL; 701} 702 703static int tgsi_end(struct r600_shader_ctx *ctx) 704{ 705 return 0; 706} 707 708static int tgsi_src(struct r600_shader_ctx *ctx, 709 const struct tgsi_full_src_register *tgsi_src, 710 struct r600_bc_alu_src *r600_src) 711{ 712 int index; 713 memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); 714 r600_src->sel = tgsi_src->Register.Index; 715 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 716 r600_src->sel = 0; 717 index = tgsi_src->Register.Index; 718 ctx->value[0] = ctx->literals[index * 4 + 0]; 719 ctx->value[1] = ctx->literals[index * 4 + 1]; 720 ctx->value[2] = ctx->literals[index * 4 + 2]; 721 ctx->value[3] = ctx->literals[index * 4 + 3]; 722 } 723 if (tgsi_src->Register.Indirect) 724 r600_src->rel = V_SQ_REL_RELATIVE; 725 r600_src->neg = tgsi_src->Register.Negate; 726 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 727 return 0; 728} 729 730static int tgsi_dst(struct r600_shader_ctx *ctx, 731 const struct tgsi_full_dst_register *tgsi_dst, 732 unsigned swizzle, 733 struct r600_bc_alu_dst *r600_dst) 734{ 735 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 736 737 r600_dst->sel = tgsi_dst->Register.Index; 738 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 739 r600_dst->chan = swizzle; 740 r600_dst->write = 1; 741 if (tgsi_dst->Register.Indirect) 742 r600_dst->rel = V_SQ_REL_RELATIVE; 743 if (inst->Instruction.Saturate) { 744 r600_dst->clamp = 1; 745 } 746 return 0; 747} 748 749static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) 750{ 751 switch (swizzle) { 752 case 0: 753 return tgsi_src->Register.SwizzleX; 754 case 1: 755 return tgsi_src->Register.SwizzleY; 756 case 2: 757 return tgsi_src->Register.SwizzleZ; 758 case 3: 759 return tgsi_src->Register.SwizzleW; 760 default: 761 return 0; 762 } 763} 764 765static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 766{ 767 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 768 struct r600_bc_alu alu; 769 int i, j, k, nconst, r; 770 771 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 772 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 773 nconst++; 774 } 775 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); 776 if (r) { 777 return r; 778 } 779 } 780 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 781 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 782 int treg = r600_get_temp(ctx); 783 for (k = 0; k < 4; k++) { 784 memset(&alu, 0, sizeof(struct r600_bc_alu)); 785 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 786 alu.src[0].sel = r600_src[i].sel; 787 alu.src[0].chan = k; 788 alu.dst.sel = treg; 789 alu.dst.chan = k; 790 alu.dst.write = 1; 791 if (k == 3) 792 alu.last = 1; 793 r = r600_bc_add_alu(ctx->bc, &alu); 794 if (r) 795 return r; 796 } 797 r600_src[i].sel = treg; 798 j--; 799 } 800 } 801 return 0; 802} 803 804/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 805static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 806{ 807 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 808 struct r600_bc_alu alu; 809 int i, j, k, nliteral, r, index; 810 811 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 812 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 813 nliteral++; 814 } 815 } 816 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 817 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 818 int treg = r600_get_temp(ctx); 819 for (k = 0; k < 4; k++) { 820 memset(&alu, 0, sizeof(struct r600_bc_alu)); 821 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 822 alu.src[0].sel = r600_src[i].sel; 823 alu.src[0].chan = k; 824 alu.dst.sel = treg; 825 alu.dst.chan = k; 826 alu.dst.write = 1; 827 if (k == 3) 828 alu.last = 1; 829 r = r600_bc_add_alu(ctx->bc, &alu); 830 if (r) 831 return r; 832 } 833 r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); 834 if (r) 835 return r; 836 r600_src[i].sel = treg; 837 j--; 838 } 839 } 840 return 0; 841} 842 843static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 844{ 845 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 846 struct r600_bc_alu_src r600_src[3]; 847 struct r600_bc_alu alu; 848 int i, j, r; 849 int lasti = 0; 850 851 for (i = 0; i < 4; i++) { 852 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 853 lasti = i; 854 } 855 } 856 857 r = tgsi_split_constant(ctx, r600_src); 858 if (r) 859 return r; 860 r = tgsi_split_literal_constant(ctx, r600_src); 861 if (r) 862 return r; 863 for (i = 0; i < lasti + 1; i++) { 864 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 865 continue; 866 867 memset(&alu, 0, sizeof(struct r600_bc_alu)); 868 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 869 if (r) 870 return r; 871 872 alu.inst = ctx->inst_info->r600_opcode; 873 if (!swap) { 874 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 875 alu.src[j] = r600_src[j]; 876 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 877 } 878 } else { 879 alu.src[0] = r600_src[1]; 880 alu.src[0].chan = tgsi_chan(&inst->Src[1], i); 881 882 alu.src[1] = r600_src[0]; 883 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 884 } 885 /* handle some special cases */ 886 switch (ctx->inst_info->tgsi_opcode) { 887 case TGSI_OPCODE_SUB: 888 alu.src[1].neg = 1; 889 break; 890 case TGSI_OPCODE_ABS: 891 alu.src[0].abs = 1; 892 break; 893 default: 894 break; 895 } 896 if (i == lasti) { 897 alu.last = 1; 898 } 899 r = r600_bc_add_alu(ctx->bc, &alu); 900 if (r) 901 return r; 902 } 903 return 0; 904} 905 906static int tgsi_op2(struct r600_shader_ctx *ctx) 907{ 908 return tgsi_op2_s(ctx, 0); 909} 910 911static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 912{ 913 return tgsi_op2_s(ctx, 1); 914} 915 916/* 917 * r600 - trunc to -PI..PI range 918 * r700 - normalize by dividing by 2PI 919 * see fdo bug 27901 920 */ 921static int tgsi_setup_trig(struct r600_shader_ctx *ctx, 922 struct r600_bc_alu_src r600_src[3]) 923{ 924 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 925 int r; 926 uint32_t lit_vals[4]; 927 struct r600_bc_alu alu; 928 929 memset(lit_vals, 0, 4*4); 930 r = tgsi_split_constant(ctx, r600_src); 931 if (r) 932 return r; 933 r = tgsi_split_literal_constant(ctx, r600_src); 934 if (r) 935 return r; 936 937 r = tgsi_split_literal_constant(ctx, r600_src); 938 if (r) 939 return r; 940 941 lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); 942 lit_vals[1] = fui(0.5f); 943 944 memset(&alu, 0, sizeof(struct r600_bc_alu)); 945 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 946 alu.is_op3 = 1; 947 948 alu.dst.chan = 0; 949 alu.dst.sel = ctx->temp_reg; 950 alu.dst.write = 1; 951 952 alu.src[0] = r600_src[0]; 953 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 954 955 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 956 alu.src[1].chan = 0; 957 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 958 alu.src[2].chan = 1; 959 alu.last = 1; 960 r = r600_bc_add_alu(ctx->bc, &alu); 961 if (r) 962 return r; 963 r = r600_bc_add_literal(ctx->bc, lit_vals); 964 if (r) 965 return r; 966 967 memset(&alu, 0, sizeof(struct r600_bc_alu)); 968 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 969 970 alu.dst.chan = 0; 971 alu.dst.sel = ctx->temp_reg; 972 alu.dst.write = 1; 973 974 alu.src[0].sel = ctx->temp_reg; 975 alu.src[0].chan = 0; 976 alu.last = 1; 977 r = r600_bc_add_alu(ctx->bc, &alu); 978 if (r) 979 return r; 980 981 if (ctx->bc->chiprev == 0) { 982 lit_vals[0] = fui(3.1415926535897f * 2.0f); 983 lit_vals[1] = fui(-3.1415926535897f); 984 } else { 985 lit_vals[0] = fui(1.0f); 986 lit_vals[1] = fui(-0.5f); 987 } 988 989 memset(&alu, 0, sizeof(struct r600_bc_alu)); 990 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 991 alu.is_op3 = 1; 992 993 alu.dst.chan = 0; 994 alu.dst.sel = ctx->temp_reg; 995 alu.dst.write = 1; 996 997 alu.src[0].sel = ctx->temp_reg; 998 alu.src[0].chan = 0; 999 1000 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1001 alu.src[1].chan = 0; 1002 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1003 alu.src[2].chan = 1; 1004 alu.last = 1; 1005 r = r600_bc_add_alu(ctx->bc, &alu); 1006 if (r) 1007 return r; 1008 r = r600_bc_add_literal(ctx->bc, lit_vals); 1009 if (r) 1010 return r; 1011 return 0; 1012} 1013 1014static int tgsi_trig(struct r600_shader_ctx *ctx) 1015{ 1016 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1017 struct r600_bc_alu_src r600_src[3]; 1018 struct r600_bc_alu alu; 1019 int i, r; 1020 int lasti = 0; 1021 1022 r = tgsi_setup_trig(ctx, r600_src); 1023 if (r) 1024 return r; 1025 1026 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1027 alu.inst = ctx->inst_info->r600_opcode; 1028 alu.dst.chan = 0; 1029 alu.dst.sel = ctx->temp_reg; 1030 alu.dst.write = 1; 1031 1032 alu.src[0].sel = ctx->temp_reg; 1033 alu.src[0].chan = 0; 1034 alu.last = 1; 1035 r = r600_bc_add_alu(ctx->bc, &alu); 1036 if (r) 1037 return r; 1038 1039 /* replicate result */ 1040 for (i = 0; i < 4; i++) { 1041 if (inst->Dst[0].Register.WriteMask & (1 << i)) 1042 lasti = i; 1043 } 1044 for (i = 0; i < lasti + 1; i++) { 1045 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1046 continue; 1047 1048 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1049 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1050 1051 alu.src[0].sel = ctx->temp_reg; 1052 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1053 if (r) 1054 return r; 1055 if (i == lasti) 1056 alu.last = 1; 1057 r = r600_bc_add_alu(ctx->bc, &alu); 1058 if (r) 1059 return r; 1060 } 1061 return 0; 1062} 1063 1064static int tgsi_scs(struct r600_shader_ctx *ctx) 1065{ 1066 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1067 struct r600_bc_alu_src r600_src[3]; 1068 struct r600_bc_alu alu; 1069 int r; 1070 1071 /* We'll only need the trig stuff if we are going to write to the 1072 * X or Y components of the destination vector. 1073 */ 1074 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1075 r = tgsi_setup_trig(ctx, r600_src); 1076 if (r) 1077 return r; 1078 } 1079 1080 /* dst.x = COS */ 1081 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1082 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1083 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1084 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1085 if (r) 1086 return r; 1087 1088 alu.src[0].sel = ctx->temp_reg; 1089 alu.src[0].chan = 0; 1090 alu.last = 1; 1091 r = r600_bc_add_alu(ctx->bc, &alu); 1092 if (r) 1093 return r; 1094 } 1095 1096 /* dst.y = SIN */ 1097 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1098 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1099 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1100 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1101 if (r) 1102 return r; 1103 1104 alu.src[0].sel = ctx->temp_reg; 1105 alu.src[0].chan = 0; 1106 alu.last = 1; 1107 r = r600_bc_add_alu(ctx->bc, &alu); 1108 if (r) 1109 return r; 1110 } 1111 1112 /* dst.z = 0.0; */ 1113 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1114 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1115 1116 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1117 1118 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1119 if (r) 1120 return r; 1121 1122 alu.src[0].sel = V_SQ_ALU_SRC_0; 1123 alu.src[0].chan = 0; 1124 1125 alu.last = 1; 1126 1127 r = r600_bc_add_alu(ctx->bc, &alu); 1128 if (r) 1129 return r; 1130 1131 r = r600_bc_add_literal(ctx->bc, ctx->value); 1132 if (r) 1133 return r; 1134 } 1135 1136 /* dst.w = 1.0; */ 1137 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1138 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1139 1140 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1141 1142 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1143 if (r) 1144 return r; 1145 1146 alu.src[0].sel = V_SQ_ALU_SRC_1; 1147 alu.src[0].chan = 0; 1148 1149 alu.last = 1; 1150 1151 r = r600_bc_add_alu(ctx->bc, &alu); 1152 if (r) 1153 return r; 1154 1155 r = r600_bc_add_literal(ctx->bc, ctx->value); 1156 if (r) 1157 return r; 1158 } 1159 1160 return 0; 1161} 1162 1163static int tgsi_kill(struct r600_shader_ctx *ctx) 1164{ 1165 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1166 struct r600_bc_alu alu; 1167 int i, r; 1168 1169 for (i = 0; i < 4; i++) { 1170 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1171 alu.inst = ctx->inst_info->r600_opcode; 1172 1173 alu.dst.chan = i; 1174 1175 alu.src[0].sel = V_SQ_ALU_SRC_0; 1176 1177 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1178 alu.src[1].sel = V_SQ_ALU_SRC_1; 1179 alu.src[1].neg = 1; 1180 } else { 1181 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1182 if (r) 1183 return r; 1184 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1185 } 1186 if (i == 3) { 1187 alu.last = 1; 1188 } 1189 r = r600_bc_add_alu(ctx->bc, &alu); 1190 if (r) 1191 return r; 1192 } 1193 r = r600_bc_add_literal(ctx->bc, ctx->value); 1194 if (r) 1195 return r; 1196 1197 /* kill must be last in ALU */ 1198 ctx->bc->force_add_cf = 1; 1199 ctx->shader->uses_kill = TRUE; 1200 return 0; 1201} 1202 1203static int tgsi_lit(struct r600_shader_ctx *ctx) 1204{ 1205 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1206 struct r600_bc_alu alu; 1207 struct r600_bc_alu_src r600_src[3]; 1208 int r; 1209 1210 r = tgsi_split_constant(ctx, r600_src); 1211 if (r) 1212 return r; 1213 r = tgsi_split_literal_constant(ctx, r600_src); 1214 if (r) 1215 return r; 1216 1217 /* dst.x, <- 1.0 */ 1218 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1219 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1220 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1221 alu.src[0].chan = 0; 1222 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1223 if (r) 1224 return r; 1225 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1226 r = r600_bc_add_alu(ctx->bc, &alu); 1227 if (r) 1228 return r; 1229 1230 /* dst.y = max(src.x, 0.0) */ 1231 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1232 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1233 alu.src[0] = r600_src[0]; 1234 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1235 alu.src[1].chan = 0; 1236 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1237 if (r) 1238 return r; 1239 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1240 r = r600_bc_add_alu(ctx->bc, &alu); 1241 if (r) 1242 return r; 1243 1244 /* dst.w, <- 1.0 */ 1245 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1246 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1247 alu.src[0].sel = V_SQ_ALU_SRC_1; 1248 alu.src[0].chan = 0; 1249 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1250 if (r) 1251 return r; 1252 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1253 alu.last = 1; 1254 r = r600_bc_add_alu(ctx->bc, &alu); 1255 if (r) 1256 return r; 1257 1258 r = r600_bc_add_literal(ctx->bc, ctx->value); 1259 if (r) 1260 return r; 1261 1262 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1263 { 1264 int chan; 1265 int sel; 1266 1267 /* dst.z = log(src.y) */ 1268 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1269 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1270 alu.src[0] = r600_src[0]; 1271 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1272 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1273 if (r) 1274 return r; 1275 alu.last = 1; 1276 r = r600_bc_add_alu(ctx->bc, &alu); 1277 if (r) 1278 return r; 1279 1280 r = r600_bc_add_literal(ctx->bc, ctx->value); 1281 if (r) 1282 return r; 1283 1284 chan = alu.dst.chan; 1285 sel = alu.dst.sel; 1286 1287 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1288 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1289 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1290 alu.src[0] = r600_src[0]; 1291 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1292 alu.src[1].sel = sel; 1293 alu.src[1].chan = chan; 1294 1295 alu.src[2] = r600_src[0]; 1296 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); 1297 alu.dst.sel = ctx->temp_reg; 1298 alu.dst.chan = 0; 1299 alu.dst.write = 1; 1300 alu.is_op3 = 1; 1301 alu.last = 1; 1302 r = r600_bc_add_alu(ctx->bc, &alu); 1303 if (r) 1304 return r; 1305 1306 r = r600_bc_add_literal(ctx->bc, ctx->value); 1307 if (r) 1308 return r; 1309 /* dst.z = exp(tmp.x) */ 1310 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1311 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1312 alu.src[0].sel = ctx->temp_reg; 1313 alu.src[0].chan = 0; 1314 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1315 if (r) 1316 return r; 1317 alu.last = 1; 1318 r = r600_bc_add_alu(ctx->bc, &alu); 1319 if (r) 1320 return r; 1321 } 1322 return 0; 1323} 1324 1325static int tgsi_rsq(struct r600_shader_ctx *ctx) 1326{ 1327 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1328 struct r600_bc_alu alu; 1329 int i, r; 1330 1331 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1332 1333 /* FIXME: 1334 * For state trackers other than OpenGL, we'll want to use 1335 * _RECIPSQRT_IEEE instead. 1336 */ 1337 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1338 1339 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1340 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1341 if (r) 1342 return r; 1343 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1344 alu.src[i].abs = 1; 1345 } 1346 alu.dst.sel = ctx->temp_reg; 1347 alu.dst.write = 1; 1348 alu.last = 1; 1349 r = r600_bc_add_alu(ctx->bc, &alu); 1350 if (r) 1351 return r; 1352 r = r600_bc_add_literal(ctx->bc, ctx->value); 1353 if (r) 1354 return r; 1355 /* replicate result */ 1356 return tgsi_helper_tempx_replicate(ctx); 1357} 1358 1359static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1360{ 1361 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1362 struct r600_bc_alu alu; 1363 int i, r; 1364 1365 for (i = 0; i < 4; i++) { 1366 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1367 alu.src[0].sel = ctx->temp_reg; 1368 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1369 alu.dst.chan = i; 1370 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1371 if (r) 1372 return r; 1373 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1374 if (i == 3) 1375 alu.last = 1; 1376 r = r600_bc_add_alu(ctx->bc, &alu); 1377 if (r) 1378 return r; 1379 } 1380 return 0; 1381} 1382 1383static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1384{ 1385 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1386 struct r600_bc_alu alu; 1387 int i, r; 1388 1389 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1390 alu.inst = ctx->inst_info->r600_opcode; 1391 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1392 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1393 if (r) 1394 return r; 1395 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1396 } 1397 alu.dst.sel = ctx->temp_reg; 1398 alu.dst.write = 1; 1399 alu.last = 1; 1400 r = r600_bc_add_alu(ctx->bc, &alu); 1401 if (r) 1402 return r; 1403 r = r600_bc_add_literal(ctx->bc, ctx->value); 1404 if (r) 1405 return r; 1406 /* replicate result */ 1407 return tgsi_helper_tempx_replicate(ctx); 1408} 1409 1410static int tgsi_pow(struct r600_shader_ctx *ctx) 1411{ 1412 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1413 struct r600_bc_alu alu; 1414 int r; 1415 1416 /* LOG2(a) */ 1417 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1418 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1419 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1420 if (r) 1421 return r; 1422 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1423 alu.dst.sel = ctx->temp_reg; 1424 alu.dst.write = 1; 1425 alu.last = 1; 1426 r = r600_bc_add_alu(ctx->bc, &alu); 1427 if (r) 1428 return r; 1429 r = r600_bc_add_literal(ctx->bc,ctx->value); 1430 if (r) 1431 return r; 1432 /* b * LOG2(a) */ 1433 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1434 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE); 1435 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); 1436 if (r) 1437 return r; 1438 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); 1439 alu.src[1].sel = ctx->temp_reg; 1440 alu.dst.sel = ctx->temp_reg; 1441 alu.dst.write = 1; 1442 alu.last = 1; 1443 r = r600_bc_add_alu(ctx->bc, &alu); 1444 if (r) 1445 return r; 1446 r = r600_bc_add_literal(ctx->bc,ctx->value); 1447 if (r) 1448 return r; 1449 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1450 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1451 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1452 alu.src[0].sel = ctx->temp_reg; 1453 alu.dst.sel = ctx->temp_reg; 1454 alu.dst.write = 1; 1455 alu.last = 1; 1456 r = r600_bc_add_alu(ctx->bc, &alu); 1457 if (r) 1458 return r; 1459 r = r600_bc_add_literal(ctx->bc,ctx->value); 1460 if (r) 1461 return r; 1462 return tgsi_helper_tempx_replicate(ctx); 1463} 1464 1465static int tgsi_ssg(struct r600_shader_ctx *ctx) 1466{ 1467 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1468 struct r600_bc_alu alu; 1469 struct r600_bc_alu_src r600_src[3]; 1470 int i, r; 1471 1472 r = tgsi_split_constant(ctx, r600_src); 1473 if (r) 1474 return r; 1475 r = tgsi_split_literal_constant(ctx, r600_src); 1476 if (r) 1477 return r; 1478 1479 /* tmp = (src > 0 ? 1 : src) */ 1480 for (i = 0; i < 4; i++) { 1481 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1482 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1483 alu.is_op3 = 1; 1484 1485 alu.dst.sel = ctx->temp_reg; 1486 alu.dst.chan = i; 1487 1488 alu.src[0] = r600_src[0]; 1489 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1490 1491 alu.src[1].sel = V_SQ_ALU_SRC_1; 1492 1493 alu.src[2] = r600_src[0]; 1494 alu.src[2].chan = tgsi_chan(&inst->Src[0], i); 1495 if (i == 3) 1496 alu.last = 1; 1497 r = r600_bc_add_alu(ctx->bc, &alu); 1498 if (r) 1499 return r; 1500 } 1501 r = r600_bc_add_literal(ctx->bc, ctx->value); 1502 if (r) 1503 return r; 1504 1505 /* dst = (-tmp > 0 ? -1 : tmp) */ 1506 for (i = 0; i < 4; i++) { 1507 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1508 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1509 alu.is_op3 = 1; 1510 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1511 if (r) 1512 return r; 1513 1514 alu.src[0].sel = ctx->temp_reg; 1515 alu.src[0].chan = i; 1516 alu.src[0].neg = 1; 1517 1518 alu.src[1].sel = V_SQ_ALU_SRC_1; 1519 alu.src[1].neg = 1; 1520 1521 alu.src[2].sel = ctx->temp_reg; 1522 alu.src[2].chan = i; 1523 1524 if (i == 3) 1525 alu.last = 1; 1526 r = r600_bc_add_alu(ctx->bc, &alu); 1527 if (r) 1528 return r; 1529 } 1530 return 0; 1531} 1532 1533static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1534{ 1535 struct r600_bc_alu alu; 1536 int i, r; 1537 1538 r = r600_bc_add_literal(ctx->bc, ctx->value); 1539 if (r) 1540 return r; 1541 for (i = 0; i < 4; i++) { 1542 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1543 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1544 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1545 alu.dst.chan = i; 1546 } else { 1547 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1548 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1549 if (r) 1550 return r; 1551 alu.src[0].sel = ctx->temp_reg; 1552 alu.src[0].chan = i; 1553 } 1554 if (i == 3) { 1555 alu.last = 1; 1556 } 1557 r = r600_bc_add_alu(ctx->bc, &alu); 1558 if (r) 1559 return r; 1560 } 1561 return 0; 1562} 1563 1564static int tgsi_op3(struct r600_shader_ctx *ctx) 1565{ 1566 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1567 struct r600_bc_alu_src r600_src[3]; 1568 struct r600_bc_alu alu; 1569 int i, j, r; 1570 1571 r = tgsi_split_constant(ctx, r600_src); 1572 if (r) 1573 return r; 1574 r = tgsi_split_literal_constant(ctx, r600_src); 1575 if (r) 1576 return r; 1577 /* do it in 2 step as op3 doesn't support writemask */ 1578 for (i = 0; i < 4; i++) { 1579 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1580 alu.inst = ctx->inst_info->r600_opcode; 1581 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1582 alu.src[j] = r600_src[j]; 1583 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1584 } 1585 alu.dst.sel = ctx->temp_reg; 1586 alu.dst.chan = i; 1587 alu.dst.write = 1; 1588 alu.is_op3 = 1; 1589 if (i == 3) { 1590 alu.last = 1; 1591 } 1592 r = r600_bc_add_alu(ctx->bc, &alu); 1593 if (r) 1594 return r; 1595 } 1596 return tgsi_helper_copy(ctx, inst); 1597} 1598 1599static int tgsi_dp(struct r600_shader_ctx *ctx) 1600{ 1601 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1602 struct r600_bc_alu_src r600_src[3]; 1603 struct r600_bc_alu alu; 1604 int i, j, r; 1605 1606 r = tgsi_split_constant(ctx, r600_src); 1607 if (r) 1608 return r; 1609 r = tgsi_split_literal_constant(ctx, r600_src); 1610 if (r) 1611 return r; 1612 for (i = 0; i < 4; i++) { 1613 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1614 alu.inst = ctx->inst_info->r600_opcode; 1615 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1616 alu.src[j] = r600_src[j]; 1617 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1618 } 1619 alu.dst.sel = ctx->temp_reg; 1620 alu.dst.chan = i; 1621 alu.dst.write = 1; 1622 /* handle some special cases */ 1623 switch (ctx->inst_info->tgsi_opcode) { 1624 case TGSI_OPCODE_DP2: 1625 if (i > 1) { 1626 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1627 alu.src[0].chan = alu.src[1].chan = 0; 1628 } 1629 break; 1630 case TGSI_OPCODE_DP3: 1631 if (i > 2) { 1632 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1633 alu.src[0].chan = alu.src[1].chan = 0; 1634 } 1635 break; 1636 case TGSI_OPCODE_DPH: 1637 if (i == 3) { 1638 alu.src[0].sel = V_SQ_ALU_SRC_1; 1639 alu.src[0].chan = 0; 1640 alu.src[0].neg = 0; 1641 } 1642 break; 1643 default: 1644 break; 1645 } 1646 if (i == 3) { 1647 alu.last = 1; 1648 } 1649 r = r600_bc_add_alu(ctx->bc, &alu); 1650 if (r) 1651 return r; 1652 } 1653 return tgsi_helper_copy(ctx, inst); 1654} 1655 1656static int tgsi_tex(struct r600_shader_ctx *ctx) 1657{ 1658 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1659 struct r600_bc_tex tex; 1660 struct r600_bc_alu alu; 1661 unsigned src_gpr; 1662 int r, i; 1663 int opcode; 1664 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; 1665 uint32_t lit_vals[4]; 1666 1667 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1668 1669 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1670 /* Add perspective divide */ 1671 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1672 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1673 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1674 if (r) 1675 return r; 1676 1677 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1678 alu.dst.sel = ctx->temp_reg; 1679 alu.dst.chan = 3; 1680 alu.last = 1; 1681 alu.dst.write = 1; 1682 r = r600_bc_add_alu(ctx->bc, &alu); 1683 if (r) 1684 return r; 1685 1686 for (i = 0; i < 3; i++) { 1687 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1688 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1689 alu.src[0].sel = ctx->temp_reg; 1690 alu.src[0].chan = 3; 1691 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1692 if (r) 1693 return r; 1694 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1695 alu.dst.sel = ctx->temp_reg; 1696 alu.dst.chan = i; 1697 alu.dst.write = 1; 1698 r = r600_bc_add_alu(ctx->bc, &alu); 1699 if (r) 1700 return r; 1701 } 1702 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1703 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1704 alu.src[0].sel = V_SQ_ALU_SRC_1; 1705 alu.src[0].chan = 0; 1706 alu.dst.sel = ctx->temp_reg; 1707 alu.dst.chan = 3; 1708 alu.last = 1; 1709 alu.dst.write = 1; 1710 r = r600_bc_add_alu(ctx->bc, &alu); 1711 if (r) 1712 return r; 1713 src_not_temp = FALSE; 1714 src_gpr = ctx->temp_reg; 1715 } 1716 1717 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1718 int src_chan, src2_chan; 1719 1720 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1721 for (i = 0; i < 4; i++) { 1722 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1723 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1724 switch (i) { 1725 case 0: 1726 src_chan = 2; 1727 src2_chan = 1; 1728 break; 1729 case 1: 1730 src_chan = 2; 1731 src2_chan = 0; 1732 break; 1733 case 2: 1734 src_chan = 0; 1735 src2_chan = 2; 1736 break; 1737 case 3: 1738 src_chan = 1; 1739 src2_chan = 2; 1740 break; 1741 default: 1742 assert(0); 1743 src_chan = 0; 1744 src2_chan = 0; 1745 break; 1746 } 1747 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1748 if (r) 1749 return r; 1750 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); 1751 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1752 if (r) 1753 return r; 1754 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); 1755 alu.dst.sel = ctx->temp_reg; 1756 alu.dst.chan = i; 1757 if (i == 3) 1758 alu.last = 1; 1759 alu.dst.write = 1; 1760 r = r600_bc_add_alu(ctx->bc, &alu); 1761 if (r) 1762 return r; 1763 } 1764 1765 /* tmp1.z = RCP_e(|tmp1.z|) */ 1766 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1767 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1768 alu.src[0].sel = ctx->temp_reg; 1769 alu.src[0].chan = 2; 1770 alu.src[0].abs = 1; 1771 alu.dst.sel = ctx->temp_reg; 1772 alu.dst.chan = 2; 1773 alu.dst.write = 1; 1774 alu.last = 1; 1775 r = r600_bc_add_alu(ctx->bc, &alu); 1776 if (r) 1777 return r; 1778 1779 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1780 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1781 * muladd has no writemask, have to use another temp 1782 */ 1783 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1784 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1785 alu.is_op3 = 1; 1786 1787 alu.src[0].sel = ctx->temp_reg; 1788 alu.src[0].chan = 0; 1789 alu.src[1].sel = ctx->temp_reg; 1790 alu.src[1].chan = 2; 1791 1792 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1793 alu.src[2].chan = 0; 1794 1795 alu.dst.sel = ctx->temp_reg; 1796 alu.dst.chan = 0; 1797 alu.dst.write = 1; 1798 1799 r = r600_bc_add_alu(ctx->bc, &alu); 1800 if (r) 1801 return r; 1802 1803 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1804 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1805 alu.is_op3 = 1; 1806 1807 alu.src[0].sel = ctx->temp_reg; 1808 alu.src[0].chan = 1; 1809 alu.src[1].sel = ctx->temp_reg; 1810 alu.src[1].chan = 2; 1811 1812 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1813 alu.src[2].chan = 0; 1814 1815 alu.dst.sel = ctx->temp_reg; 1816 alu.dst.chan = 1; 1817 alu.dst.write = 1; 1818 1819 alu.last = 1; 1820 r = r600_bc_add_alu(ctx->bc, &alu); 1821 if (r) 1822 return r; 1823 1824 lit_vals[0] = fui(1.5f); 1825 1826 r = r600_bc_add_literal(ctx->bc, lit_vals); 1827 if (r) 1828 return r; 1829 src_not_temp = FALSE; 1830 src_gpr = ctx->temp_reg; 1831 } 1832 1833 if (src_not_temp) { 1834 for (i = 0; i < 4; i++) { 1835 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1836 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1837 alu.src[0].sel = src_gpr; 1838 alu.src[0].chan = i; 1839 alu.dst.sel = ctx->temp_reg; 1840 alu.dst.chan = i; 1841 if (i == 3) 1842 alu.last = 1; 1843 alu.dst.write = 1; 1844 r = r600_bc_add_alu(ctx->bc, &alu); 1845 if (r) 1846 return r; 1847 } 1848 src_gpr = ctx->temp_reg; 1849 } 1850 1851 opcode = ctx->inst_info->r600_opcode; 1852 if (opcode == SQ_TEX_INST_SAMPLE && 1853 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 1854 opcode = SQ_TEX_INST_SAMPLE_C; 1855 1856 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1857 tex.inst = opcode; 1858 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1859 tex.sampler_id = tex.resource_id; 1860 tex.src_gpr = src_gpr; 1861 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1862 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 1863 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 1864 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 1865 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 1866 tex.src_sel_x = 0; 1867 tex.src_sel_y = 1; 1868 tex.src_sel_z = 2; 1869 tex.src_sel_w = 3; 1870 1871 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1872 tex.src_sel_x = 1; 1873 tex.src_sel_y = 0; 1874 tex.src_sel_z = 3; 1875 tex.src_sel_w = 1; 1876 } 1877 1878 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1879 tex.coord_type_x = 1; 1880 tex.coord_type_y = 1; 1881 tex.coord_type_z = 1; 1882 tex.coord_type_w = 1; 1883 } 1884 1885 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 1886 tex.src_sel_w = 2; 1887 1888 r = r600_bc_add_tex(ctx->bc, &tex); 1889 if (r) 1890 return r; 1891 1892 /* add shadow ambient support - gallium doesn't do it yet */ 1893 return 0; 1894 1895} 1896 1897static int tgsi_lrp(struct r600_shader_ctx *ctx) 1898{ 1899 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1900 struct r600_bc_alu_src r600_src[3]; 1901 struct r600_bc_alu alu; 1902 unsigned i; 1903 int r; 1904 1905 r = tgsi_split_constant(ctx, r600_src); 1906 if (r) 1907 return r; 1908 r = tgsi_split_literal_constant(ctx, r600_src); 1909 if (r) 1910 return r; 1911 /* 1 - src0 */ 1912 for (i = 0; i < 4; i++) { 1913 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1914 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1915 alu.src[0].sel = V_SQ_ALU_SRC_1; 1916 alu.src[0].chan = 0; 1917 alu.src[1] = r600_src[0]; 1918 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1919 alu.src[1].neg = 1; 1920 alu.dst.sel = ctx->temp_reg; 1921 alu.dst.chan = i; 1922 if (i == 3) { 1923 alu.last = 1; 1924 } 1925 alu.dst.write = 1; 1926 r = r600_bc_add_alu(ctx->bc, &alu); 1927 if (r) 1928 return r; 1929 } 1930 r = r600_bc_add_literal(ctx->bc, ctx->value); 1931 if (r) 1932 return r; 1933 1934 /* (1 - src0) * src2 */ 1935 for (i = 0; i < 4; i++) { 1936 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1937 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1938 alu.src[0].sel = ctx->temp_reg; 1939 alu.src[0].chan = i; 1940 alu.src[1] = r600_src[2]; 1941 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 1942 alu.dst.sel = ctx->temp_reg; 1943 alu.dst.chan = i; 1944 if (i == 3) { 1945 alu.last = 1; 1946 } 1947 alu.dst.write = 1; 1948 r = r600_bc_add_alu(ctx->bc, &alu); 1949 if (r) 1950 return r; 1951 } 1952 r = r600_bc_add_literal(ctx->bc, ctx->value); 1953 if (r) 1954 return r; 1955 1956 /* src0 * src1 + (1 - src0) * src2 */ 1957 for (i = 0; i < 4; i++) { 1958 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1959 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1960 alu.is_op3 = 1; 1961 alu.src[0] = r600_src[0]; 1962 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1963 alu.src[1] = r600_src[1]; 1964 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 1965 alu.src[2].sel = ctx->temp_reg; 1966 alu.src[2].chan = i; 1967 alu.dst.sel = ctx->temp_reg; 1968 alu.dst.chan = i; 1969 if (i == 3) { 1970 alu.last = 1; 1971 } 1972 r = r600_bc_add_alu(ctx->bc, &alu); 1973 if (r) 1974 return r; 1975 } 1976 return tgsi_helper_copy(ctx, inst); 1977} 1978 1979static int tgsi_cmp(struct r600_shader_ctx *ctx) 1980{ 1981 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1982 struct r600_bc_alu_src r600_src[3]; 1983 struct r600_bc_alu alu; 1984 int use_temp = 0; 1985 int i, r; 1986 1987 r = tgsi_split_constant(ctx, r600_src); 1988 if (r) 1989 return r; 1990 r = tgsi_split_literal_constant(ctx, r600_src); 1991 if (r) 1992 return r; 1993 1994 if (inst->Dst[0].Register.WriteMask != 0xf) 1995 use_temp = 1; 1996 1997 for (i = 0; i < 4; i++) { 1998 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1999 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2000 alu.src[0] = r600_src[0]; 2001 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2002 2003 alu.src[1] = r600_src[2]; 2004 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 2005 2006 alu.src[2] = r600_src[1]; 2007 alu.src[2].chan = tgsi_chan(&inst->Src[1], i); 2008 2009 if (use_temp) 2010 alu.dst.sel = ctx->temp_reg; 2011 else { 2012 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2013 if (r) 2014 return r; 2015 } 2016 alu.dst.chan = i; 2017 alu.dst.write = 1; 2018 alu.is_op3 = 1; 2019 if (i == 3) 2020 alu.last = 1; 2021 r = r600_bc_add_alu(ctx->bc, &alu); 2022 if (r) 2023 return r; 2024 } 2025 if (use_temp) 2026 return tgsi_helper_copy(ctx, inst); 2027 return 0; 2028} 2029 2030static int tgsi_xpd(struct r600_shader_ctx *ctx) 2031{ 2032 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2033 struct r600_bc_alu_src r600_src[3]; 2034 struct r600_bc_alu alu; 2035 uint32_t use_temp = 0; 2036 int i, r; 2037 2038 if (inst->Dst[0].Register.WriteMask != 0xf) 2039 use_temp = 1; 2040 2041 r = tgsi_split_constant(ctx, r600_src); 2042 if (r) 2043 return r; 2044 r = tgsi_split_literal_constant(ctx, r600_src); 2045 if (r) 2046 return r; 2047 2048 for (i = 0; i < 4; i++) { 2049 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2050 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2051 2052 alu.src[0] = r600_src[0]; 2053 switch (i) { 2054 case 0: 2055 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2056 break; 2057 case 1: 2058 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2059 break; 2060 case 2: 2061 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2062 break; 2063 case 3: 2064 alu.src[0].sel = V_SQ_ALU_SRC_0; 2065 alu.src[0].chan = i; 2066 } 2067 2068 alu.src[1] = r600_src[1]; 2069 switch (i) { 2070 case 0: 2071 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2072 break; 2073 case 1: 2074 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2075 break; 2076 case 2: 2077 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2078 break; 2079 case 3: 2080 alu.src[1].sel = V_SQ_ALU_SRC_0; 2081 alu.src[1].chan = i; 2082 } 2083 2084 alu.dst.sel = ctx->temp_reg; 2085 alu.dst.chan = i; 2086 alu.dst.write = 1; 2087 2088 if (i == 3) 2089 alu.last = 1; 2090 r = r600_bc_add_alu(ctx->bc, &alu); 2091 if (r) 2092 return r; 2093 2094 r = r600_bc_add_literal(ctx->bc, ctx->value); 2095 if (r) 2096 return r; 2097 } 2098 2099 for (i = 0; i < 4; i++) { 2100 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2101 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2102 2103 alu.src[0] = r600_src[0]; 2104 switch (i) { 2105 case 0: 2106 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2107 break; 2108 case 1: 2109 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2110 break; 2111 case 2: 2112 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2113 break; 2114 case 3: 2115 alu.src[0].sel = V_SQ_ALU_SRC_0; 2116 alu.src[0].chan = i; 2117 } 2118 2119 alu.src[1] = r600_src[1]; 2120 switch (i) { 2121 case 0: 2122 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2123 break; 2124 case 1: 2125 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2126 break; 2127 case 2: 2128 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2129 break; 2130 case 3: 2131 alu.src[1].sel = V_SQ_ALU_SRC_0; 2132 alu.src[1].chan = i; 2133 } 2134 2135 alu.src[2].sel = ctx->temp_reg; 2136 alu.src[2].neg = 1; 2137 alu.src[2].chan = i; 2138 2139 if (use_temp) 2140 alu.dst.sel = ctx->temp_reg; 2141 else { 2142 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2143 if (r) 2144 return r; 2145 } 2146 alu.dst.chan = i; 2147 alu.dst.write = 1; 2148 alu.is_op3 = 1; 2149 if (i == 3) 2150 alu.last = 1; 2151 r = r600_bc_add_alu(ctx->bc, &alu); 2152 if (r) 2153 return r; 2154 2155 r = r600_bc_add_literal(ctx->bc, ctx->value); 2156 if (r) 2157 return r; 2158 } 2159 if (use_temp) 2160 return tgsi_helper_copy(ctx, inst); 2161 return 0; 2162} 2163 2164static int tgsi_exp(struct r600_shader_ctx *ctx) 2165{ 2166 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2167 struct r600_bc_alu_src r600_src[3]; 2168 struct r600_bc_alu alu; 2169 int r; 2170 2171 /* result.x = 2^floor(src); */ 2172 if (inst->Dst[0].Register.WriteMask & 1) { 2173 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2174 2175 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2176 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2177 if (r) 2178 return r; 2179 2180 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2181 2182 alu.dst.sel = ctx->temp_reg; 2183 alu.dst.chan = 0; 2184 alu.dst.write = 1; 2185 alu.last = 1; 2186 r = r600_bc_add_alu(ctx->bc, &alu); 2187 if (r) 2188 return r; 2189 2190 r = r600_bc_add_literal(ctx->bc, ctx->value); 2191 if (r) 2192 return r; 2193 2194 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2195 alu.src[0].sel = ctx->temp_reg; 2196 alu.src[0].chan = 0; 2197 2198 alu.dst.sel = ctx->temp_reg; 2199 alu.dst.chan = 0; 2200 alu.dst.write = 1; 2201 alu.last = 1; 2202 r = r600_bc_add_alu(ctx->bc, &alu); 2203 if (r) 2204 return r; 2205 2206 r = r600_bc_add_literal(ctx->bc, ctx->value); 2207 if (r) 2208 return r; 2209 } 2210 2211 /* result.y = tmp - floor(tmp); */ 2212 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2213 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2214 2215 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2216 alu.src[0] = r600_src[0]; 2217 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2218 if (r) 2219 return r; 2220 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2221 2222 alu.dst.sel = ctx->temp_reg; 2223// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2224// if (r) 2225// return r; 2226 alu.dst.write = 1; 2227 alu.dst.chan = 1; 2228 2229 alu.last = 1; 2230 2231 r = r600_bc_add_alu(ctx->bc, &alu); 2232 if (r) 2233 return r; 2234 r = r600_bc_add_literal(ctx->bc, ctx->value); 2235 if (r) 2236 return r; 2237 } 2238 2239 /* result.z = RoughApprox2ToX(tmp);*/ 2240 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2241 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2242 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2243 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2244 if (r) 2245 return r; 2246 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2247 2248 alu.dst.sel = ctx->temp_reg; 2249 alu.dst.write = 1; 2250 alu.dst.chan = 2; 2251 2252 alu.last = 1; 2253 2254 r = r600_bc_add_alu(ctx->bc, &alu); 2255 if (r) 2256 return r; 2257 r = r600_bc_add_literal(ctx->bc, ctx->value); 2258 if (r) 2259 return r; 2260 } 2261 2262 /* result.w = 1.0;*/ 2263 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2264 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2265 2266 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2267 alu.src[0].sel = V_SQ_ALU_SRC_1; 2268 alu.src[0].chan = 0; 2269 2270 alu.dst.sel = ctx->temp_reg; 2271 alu.dst.chan = 3; 2272 alu.dst.write = 1; 2273 alu.last = 1; 2274 r = r600_bc_add_alu(ctx->bc, &alu); 2275 if (r) 2276 return r; 2277 r = r600_bc_add_literal(ctx->bc, ctx->value); 2278 if (r) 2279 return r; 2280 } 2281 return tgsi_helper_copy(ctx, inst); 2282} 2283 2284static int tgsi_log(struct r600_shader_ctx *ctx) 2285{ 2286 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2287 struct r600_bc_alu alu; 2288 int r; 2289 2290 /* result.x = floor(log2(src)); */ 2291 if (inst->Dst[0].Register.WriteMask & 1) { 2292 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2293 2294 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2295 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2296 if (r) 2297 return r; 2298 2299 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2300 2301 alu.dst.sel = ctx->temp_reg; 2302 alu.dst.chan = 0; 2303 alu.dst.write = 1; 2304 alu.last = 1; 2305 r = r600_bc_add_alu(ctx->bc, &alu); 2306 if (r) 2307 return r; 2308 2309 r = r600_bc_add_literal(ctx->bc, ctx->value); 2310 if (r) 2311 return r; 2312 2313 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2314 alu.src[0].sel = ctx->temp_reg; 2315 alu.src[0].chan = 0; 2316 2317 alu.dst.sel = ctx->temp_reg; 2318 alu.dst.chan = 0; 2319 alu.dst.write = 1; 2320 alu.last = 1; 2321 2322 r = r600_bc_add_alu(ctx->bc, &alu); 2323 if (r) 2324 return r; 2325 2326 r = r600_bc_add_literal(ctx->bc, ctx->value); 2327 if (r) 2328 return r; 2329 } 2330 2331 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2332 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2333 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2334 2335 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2336 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2337 if (r) 2338 return r; 2339 2340 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2341 2342 alu.dst.sel = ctx->temp_reg; 2343 alu.dst.chan = 1; 2344 alu.dst.write = 1; 2345 alu.last = 1; 2346 2347 r = r600_bc_add_alu(ctx->bc, &alu); 2348 if (r) 2349 return r; 2350 2351 r = r600_bc_add_literal(ctx->bc, ctx->value); 2352 if (r) 2353 return r; 2354 2355 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2356 2357 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2358 alu.src[0].sel = ctx->temp_reg; 2359 alu.src[0].chan = 1; 2360 2361 alu.dst.sel = ctx->temp_reg; 2362 alu.dst.chan = 1; 2363 alu.dst.write = 1; 2364 alu.last = 1; 2365 2366 r = r600_bc_add_alu(ctx->bc, &alu); 2367 if (r) 2368 return r; 2369 2370 r = r600_bc_add_literal(ctx->bc, ctx->value); 2371 if (r) 2372 return r; 2373 2374 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2375 2376 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2377 alu.src[0].sel = ctx->temp_reg; 2378 alu.src[0].chan = 1; 2379 2380 alu.dst.sel = ctx->temp_reg; 2381 alu.dst.chan = 1; 2382 alu.dst.write = 1; 2383 alu.last = 1; 2384 2385 r = r600_bc_add_alu(ctx->bc, &alu); 2386 if (r) 2387 return r; 2388 2389 r = r600_bc_add_literal(ctx->bc, ctx->value); 2390 if (r) 2391 return r; 2392 2393 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2394 2395 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2396 alu.src[0].sel = ctx->temp_reg; 2397 alu.src[0].chan = 1; 2398 2399 alu.dst.sel = ctx->temp_reg; 2400 alu.dst.chan = 1; 2401 alu.dst.write = 1; 2402 alu.last = 1; 2403 2404 r = r600_bc_add_alu(ctx->bc, &alu); 2405 if (r) 2406 return r; 2407 2408 r = r600_bc_add_literal(ctx->bc, ctx->value); 2409 if (r) 2410 return r; 2411 2412 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2413 2414 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2415 2416 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2417 if (r) 2418 return r; 2419 2420 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2421 2422 alu.src[1].sel = ctx->temp_reg; 2423 alu.src[1].chan = 1; 2424 2425 alu.dst.sel = ctx->temp_reg; 2426 alu.dst.chan = 1; 2427 alu.dst.write = 1; 2428 alu.last = 1; 2429 2430 r = r600_bc_add_alu(ctx->bc, &alu); 2431 if (r) 2432 return r; 2433 2434 r = r600_bc_add_literal(ctx->bc, ctx->value); 2435 if (r) 2436 return r; 2437 } 2438 2439 /* result.z = log2(src);*/ 2440 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2441 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2442 2443 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2444 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2445 if (r) 2446 return r; 2447 2448 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2449 2450 alu.dst.sel = ctx->temp_reg; 2451 alu.dst.write = 1; 2452 alu.dst.chan = 2; 2453 alu.last = 1; 2454 2455 r = r600_bc_add_alu(ctx->bc, &alu); 2456 if (r) 2457 return r; 2458 2459 r = r600_bc_add_literal(ctx->bc, ctx->value); 2460 if (r) 2461 return r; 2462 } 2463 2464 /* result.w = 1.0; */ 2465 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2466 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2467 2468 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2469 alu.src[0].sel = V_SQ_ALU_SRC_1; 2470 alu.src[0].chan = 0; 2471 2472 alu.dst.sel = ctx->temp_reg; 2473 alu.dst.chan = 3; 2474 alu.dst.write = 1; 2475 alu.last = 1; 2476 2477 r = r600_bc_add_alu(ctx->bc, &alu); 2478 if (r) 2479 return r; 2480 2481 r = r600_bc_add_literal(ctx->bc, ctx->value); 2482 if (r) 2483 return r; 2484 } 2485 2486 return tgsi_helper_copy(ctx, inst); 2487} 2488 2489/* r6/7 only for now */ 2490static int tgsi_arl(struct r600_shader_ctx *ctx) 2491{ 2492 /* TODO from r600c, ar values don't persist between clauses */ 2493 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2494 struct r600_bc_alu alu; 2495 int r; 2496 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2497 2498 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; 2499 2500 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2501 if (r) 2502 return r; 2503 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2504 2505 alu.last = 1; 2506 2507 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2508 if (r) 2509 return r; 2510 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2511 return 0; 2512} 2513 2514static int tgsi_opdst(struct r600_shader_ctx *ctx) 2515{ 2516 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2517 struct r600_bc_alu alu; 2518 int i, r = 0; 2519 2520 for (i = 0; i < 4; i++) { 2521 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2522 2523 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2524 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2525 if (r) 2526 return r; 2527 2528 if (i == 0 || i == 3) { 2529 alu.src[0].sel = V_SQ_ALU_SRC_1; 2530 } else { 2531 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2532 if (r) 2533 return r; 2534 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2535 } 2536 2537 if (i == 0 || i == 2) { 2538 alu.src[1].sel = V_SQ_ALU_SRC_1; 2539 } else { 2540 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); 2541 if (r) 2542 return r; 2543 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2544 } 2545 if (i == 3) 2546 alu.last = 1; 2547 r = r600_bc_add_alu(ctx->bc, &alu); 2548 if (r) 2549 return r; 2550 } 2551 return 0; 2552} 2553 2554static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2555{ 2556 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2557 struct r600_bc_alu alu; 2558 int r; 2559 2560 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2561 alu.inst = opcode; 2562 alu.predicate = 1; 2563 2564 alu.dst.sel = ctx->temp_reg; 2565 alu.dst.write = 1; 2566 alu.dst.chan = 0; 2567 2568 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2569 if (r) 2570 return r; 2571 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2572 alu.src[1].sel = V_SQ_ALU_SRC_0; 2573 alu.src[1].chan = 0; 2574 2575 alu.last = 1; 2576 2577 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2578 if (r) 2579 return r; 2580 return 0; 2581} 2582 2583static int pops(struct r600_shader_ctx *ctx, int pops) 2584{ 2585 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2586 ctx->bc->cf_last->pop_count = pops; 2587 return 0; 2588} 2589 2590static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2591{ 2592 switch(reason) { 2593 case FC_PUSH_VPM: 2594 ctx->bc->callstack[ctx->bc->call_sp].current--; 2595 break; 2596 case FC_PUSH_WQM: 2597 case FC_LOOP: 2598 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2599 break; 2600 case FC_REP: 2601 /* TOODO : for 16 vp asic should -= 2; */ 2602 ctx->bc->callstack[ctx->bc->call_sp].current --; 2603 break; 2604 } 2605} 2606 2607static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2608{ 2609 if (check_max_only) { 2610 int diff; 2611 switch (reason) { 2612 case FC_PUSH_VPM: 2613 diff = 1; 2614 break; 2615 case FC_PUSH_WQM: 2616 diff = 4; 2617 break; 2618 default: 2619 assert(0); 2620 diff = 0; 2621 } 2622 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2623 ctx->bc->callstack[ctx->bc->call_sp].max) { 2624 ctx->bc->callstack[ctx->bc->call_sp].max = 2625 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2626 } 2627 return; 2628 } 2629 switch (reason) { 2630 case FC_PUSH_VPM: 2631 ctx->bc->callstack[ctx->bc->call_sp].current++; 2632 break; 2633 case FC_PUSH_WQM: 2634 case FC_LOOP: 2635 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2636 break; 2637 case FC_REP: 2638 ctx->bc->callstack[ctx->bc->call_sp].current++; 2639 break; 2640 } 2641 2642 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2643 ctx->bc->callstack[ctx->bc->call_sp].max) { 2644 ctx->bc->callstack[ctx->bc->call_sp].max = 2645 ctx->bc->callstack[ctx->bc->call_sp].current; 2646 } 2647} 2648 2649static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2650{ 2651 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2652 2653 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2654 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2655 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2656 sp->num_mid++; 2657} 2658 2659static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2660{ 2661 ctx->bc->fc_sp++; 2662 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2663 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2664} 2665 2666static void fc_poplevel(struct r600_shader_ctx *ctx) 2667{ 2668 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2669 if (sp->mid) { 2670 free(sp->mid); 2671 sp->mid = NULL; 2672 } 2673 sp->num_mid = 0; 2674 sp->start = NULL; 2675 sp->type = 0; 2676 ctx->bc->fc_sp--; 2677} 2678 2679#if 0 2680static int emit_return(struct r600_shader_ctx *ctx) 2681{ 2682 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2683 return 0; 2684} 2685 2686static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2687{ 2688 2689 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2690 ctx->bc->cf_last->pop_count = pops; 2691 /* TODO work out offset */ 2692 return 0; 2693} 2694 2695static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2696{ 2697 return 0; 2698} 2699 2700static void emit_testflag(struct r600_shader_ctx *ctx) 2701{ 2702 2703} 2704 2705static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2706{ 2707 emit_testflag(ctx); 2708 emit_jump_to_offset(ctx, 1, 4); 2709 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2710 pops(ctx, ifidx + 1); 2711 emit_return(ctx); 2712} 2713 2714static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2715{ 2716 emit_testflag(ctx); 2717 2718 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2719 ctx->bc->cf_last->pop_count = 1; 2720 2721 fc_set_mid(ctx, fc_sp); 2722 2723 pops(ctx, 1); 2724} 2725#endif 2726 2727static int tgsi_if(struct r600_shader_ctx *ctx) 2728{ 2729 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2730 2731 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2732 2733 fc_pushlevel(ctx, FC_IF); 2734 2735 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2736 return 0; 2737} 2738 2739static int tgsi_else(struct r600_shader_ctx *ctx) 2740{ 2741 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2742 ctx->bc->cf_last->pop_count = 1; 2743 2744 fc_set_mid(ctx, ctx->bc->fc_sp); 2745 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2746 return 0; 2747} 2748 2749static int tgsi_endif(struct r600_shader_ctx *ctx) 2750{ 2751 pops(ctx, 1); 2752 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2753 R600_ERR("if/endif unbalanced in shader\n"); 2754 return -1; 2755 } 2756 2757 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2758 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2759 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2760 } else { 2761 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2762 } 2763 fc_poplevel(ctx); 2764 2765 callstack_decrease_current(ctx, FC_PUSH_VPM); 2766 return 0; 2767} 2768 2769static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2770{ 2771 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2772 2773 fc_pushlevel(ctx, FC_LOOP); 2774 2775 /* check stack depth */ 2776 callstack_check_depth(ctx, FC_LOOP, 0); 2777 return 0; 2778} 2779 2780static int tgsi_endloop(struct r600_shader_ctx *ctx) 2781{ 2782 int i; 2783 2784 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 2785 2786 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2787 R600_ERR("loop/endloop in shader code are not paired.\n"); 2788 return -EINVAL; 2789 } 2790 2791 /* fixup loop pointers - from r600isa 2792 LOOP END points to CF after LOOP START, 2793 LOOP START point to CF after LOOP END 2794 BRK/CONT point to LOOP END CF 2795 */ 2796 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2797 2798 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2799 2800 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2801 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2802 } 2803 /* TODO add LOOPRET support */ 2804 fc_poplevel(ctx); 2805 callstack_decrease_current(ctx, FC_LOOP); 2806 return 0; 2807} 2808 2809static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2810{ 2811 unsigned int fscp; 2812 2813 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2814 { 2815 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2816 break; 2817 } 2818 2819 if (fscp == 0) { 2820 R600_ERR("Break not inside loop/endloop pair\n"); 2821 return -EINVAL; 2822 } 2823 2824 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2825 ctx->bc->cf_last->pop_count = 1; 2826 2827 fc_set_mid(ctx, fscp); 2828 2829 pops(ctx, 1); 2830 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2831 return 0; 2832} 2833 2834static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 2835 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl}, 2836 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2837 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2838 2839 /* FIXME: 2840 * For state trackers other than OpenGL, we'll want to use 2841 * _RECIP_IEEE instead. 2842 */ 2843 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 2844 2845 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 2846 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2847 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 2848 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2849 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2850 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2851 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2852 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2853 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2854 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2855 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2856 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2857 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2858 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2859 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2860 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2861 /* gap */ 2862 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2863 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2864 /* gap */ 2865 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2866 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2867 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2868 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2869 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2870 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2871 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2872 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2873 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2874 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2875 /* gap */ 2876 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2877 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2878 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2879 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2880 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2881 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2882 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2883 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2884 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2885 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2886 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2887 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2888 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2889 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2890 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2891 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2892 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2893 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2894 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2895 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2896 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2897 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2898 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2899 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2900 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2901 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2902 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2903 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2904 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2905 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2906 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2907 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2908 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2909 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2910 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2911 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2912 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2913 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2914 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2915 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2916 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2917 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2918 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2919 /* gap */ 2920 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2921 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2922 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2923 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2924 /* gap */ 2925 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2926 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2927 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2928 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2929 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2930 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2931 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2932 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 2933 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2934 /* gap */ 2935 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2936 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2937 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2938 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2939 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2940 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2941 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2942 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2943 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2944 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2945 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2946 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2947 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2948 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2949 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2950 /* gap */ 2951 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2952 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2953 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2954 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2955 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2956 /* gap */ 2957 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2958 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2959 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2960 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2961 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2962 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2963 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2964 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2965 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2966 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2967 /* gap */ 2968 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2969 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2970 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2971 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2972 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2973 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2974 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2975 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2976 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2977 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2978 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2979 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2980 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2981 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2982 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2983 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2984 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2985 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2986 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2987 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2988 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2989 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2990 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2991 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2992 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2993 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2994 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2995 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2996}; 2997 2998static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 2999 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3000 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3001 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3002 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 3003 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 3004 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3005 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3006 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3007 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3008 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3009 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3010 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3011 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3012 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3013 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3014 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3015 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3016 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3017 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3018 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3019 /* gap */ 3020 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3021 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3022 /* gap */ 3023 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3024 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3025 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3026 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3027 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3028 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3029 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3030 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3031 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3032 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3033 /* gap */ 3034 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3035 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3036 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3037 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3038 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3039 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3040 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3041 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3042 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3043 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3044 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3045 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3046 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3047 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3048 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3049 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3050 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3051 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3052 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3053 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3054 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3055 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3056 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3057 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3058 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3059 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3060 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3061 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3062 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3063 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3064 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3065 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3066 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3067 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3068 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3069 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3070 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3071 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3072 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3073 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3074 {TGSI_OPCODE_TXL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3075 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3076 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3077 /* gap */ 3078 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3079 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3080 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3081 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3082 /* gap */ 3083 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3084 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3085 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3086 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3087 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3088 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3089 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3090 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3091 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3092 /* gap */ 3093 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3094 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3095 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3096 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3097 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3098 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3099 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3100 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3101 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3102 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3103 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3104 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3105 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3106 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3107 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3108 /* gap */ 3109 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3110 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3111 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3112 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3113 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3114 /* gap */ 3115 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3116 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3117 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3118 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3119 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3120 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3121 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3122 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3123 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3124 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3125 /* gap */ 3126 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3127 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3128 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3129 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3130 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3131 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3132 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3133 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3134 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3135 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3136 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3137 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3138 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3139 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3140 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3141 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3142 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3143 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3144 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3145 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3146 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3147 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3148 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3149 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3150 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3151 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3152 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3153 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3154}; 3155