r600_shader.c revision 7be5455796facbe35cf1f1bdbefa83759b2e3b58
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_pipe.h" 29#include "r600_asm.h" 30#include "r600_sq.h" 31#include "r600_opcodes.h" 32#include "r600d.h" 33#include <stdio.h> 34#include <errno.h> 35 36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) 37{ 38 struct r600_pipe_state *rstate = &shader->rstate; 39 struct r600_shader *rshader = &shader->shader; 40 unsigned spi_vs_out_id[10]; 41 unsigned i, tmp; 42 43 /* clear previous register */ 44 rstate->nregs = 0; 45 46 /* so far never got proper semantic id from tgsi */ 47 /* FIXME better to move this in config things so they get emited 48 * only one time per cs 49 */ 50 for (i = 0; i < 10; i++) { 51 spi_vs_out_id[i] = 0; 52 } 53 for (i = 0; i < 32; i++) { 54 tmp = i << ((i & 3) * 8); 55 spi_vs_out_id[i / 4] |= tmp; 56 } 57 for (i = 0; i < 10; i++) { 58 r600_pipe_state_add_reg(rstate, 59 R_028614_SPI_VS_OUT_ID_0 + i * 4, 60 spi_vs_out_id[i], 0xFFFFFFFF, NULL); 61 } 62 63 r600_pipe_state_add_reg(rstate, 64 R_0286C4_SPI_VS_OUT_CONFIG, 65 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), 66 0xFFFFFFFF, NULL); 67 r600_pipe_state_add_reg(rstate, 68 R_028868_SQ_PGM_RESOURCES_VS, 69 S_028868_NUM_GPRS(rshader->bc.ngpr) | 70 S_028868_STACK_SIZE(rshader->bc.nstack), 71 0xFFFFFFFF, NULL); 72 r600_pipe_state_add_reg(rstate, 73 R_0288D0_SQ_PGM_CF_OFFSET_VS, 74 0x00000000, 0xFFFFFFFF, NULL); 75 r600_pipe_state_add_reg(rstate, 76 R_028858_SQ_PGM_START_VS, 77 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 78 79 r600_pipe_state_add_reg(rstate, 80 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, 81 0xFFFFFFFF, NULL); 82 83} 84 85int r600_find_vs_semantic_index(struct r600_shader *vs, 86 struct r600_shader *ps, int id) 87{ 88 struct r600_shader_io *input = &ps->input[id]; 89 90 for (int i = 0; i < vs->noutput; i++) { 91 if (input->name == vs->output[i].name && 92 input->sid == vs->output[i].sid) { 93 return i - 1; 94 } 95 } 96 return 0; 97} 98 99static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) 100{ 101 struct r600_pipe_state *rstate = &shader->rstate; 102 struct r600_shader *rshader = &shader->shader; 103 unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; 104 int pos_index = -1, face_index = -1; 105 106 rstate->nregs = 0; 107 108 for (i = 0; i < rshader->ninput; i++) { 109 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 110 pos_index = i; 111 if (rshader->input[i].name == TGSI_SEMANTIC_FACE) 112 face_index = i; 113 } 114 115 for (i = 0; i < rshader->noutput; i++) { 116 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 117 r600_pipe_state_add_reg(rstate, 118 R_02880C_DB_SHADER_CONTROL, 119 S_02880C_Z_EXPORT_ENABLE(1), 120 S_02880C_Z_EXPORT_ENABLE(1), NULL); 121 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 122 r600_pipe_state_add_reg(rstate, 123 R_02880C_DB_SHADER_CONTROL, 124 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), 125 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); 126 } 127 128 exports_ps = 0; 129 num_cout = 0; 130 for (i = 0; i < rshader->noutput; i++) { 131 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 132 exports_ps |= 1; 133 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { 134 num_cout++; 135 } 136 } 137 exports_ps |= S_028854_EXPORT_COLORS(num_cout); 138 if (!exports_ps) { 139 /* always at least export 1 component per pixel */ 140 exports_ps = 2; 141 } 142 143 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | 144 S_0286CC_PERSP_GRADIENT_ENA(1); 145 spi_input_z = 0; 146 if (pos_index != -1) { 147 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | 148 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | 149 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | 150 S_0286CC_BARYC_SAMPLE_CNTL(1)); 151 spi_input_z |= 1; 152 } 153 154 spi_ps_in_control_1 = 0; 155 if (face_index != -1) { 156 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | 157 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); 158 } 159 160 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); 161 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); 162 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); 163 r600_pipe_state_add_reg(rstate, 164 R_028840_SQ_PGM_START_PS, 165 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 166 r600_pipe_state_add_reg(rstate, 167 R_028850_SQ_PGM_RESOURCES_PS, 168 S_028868_NUM_GPRS(rshader->bc.ngpr) | 169 S_028868_STACK_SIZE(rshader->bc.nstack), 170 0xFFFFFFFF, NULL); 171 r600_pipe_state_add_reg(rstate, 172 R_028854_SQ_PGM_EXPORTS_PS, 173 exports_ps, 0xFFFFFFFF, NULL); 174 r600_pipe_state_add_reg(rstate, 175 R_0288CC_SQ_PGM_CF_OFFSET_PS, 176 0x00000000, 0xFFFFFFFF, NULL); 177 178 if (rshader->uses_kill) { 179 /* only set some bits here, the other bits are set in the dsa state */ 180 r600_pipe_state_add_reg(rstate, 181 R_02880C_DB_SHADER_CONTROL, 182 S_02880C_KILL_ENABLE(1), 183 S_02880C_KILL_ENABLE(1), NULL); 184 } 185 r600_pipe_state_add_reg(rstate, 186 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, 187 0xFFFFFFFF, NULL); 188} 189 190int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 191{ 192 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 193 struct r600_shader *rshader = &shader->shader; 194 void *ptr; 195 196 /* copy new shader */ 197 if (shader->bo == NULL) { 198 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0); 199 if (shader->bo == NULL) { 200 return -ENOMEM; 201 } 202 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); 203 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); 204 r600_bo_unmap(rctx->radeon, shader->bo); 205 } 206 /* build state */ 207 switch (rshader->processor_type) { 208 case TGSI_PROCESSOR_VERTEX: 209 if (rshader->family >= CHIP_CEDAR) { 210 evergreen_pipe_shader_vs(ctx, shader); 211 } else { 212 r600_pipe_shader_vs(ctx, shader); 213 } 214 break; 215 case TGSI_PROCESSOR_FRAGMENT: 216 if (rshader->family >= CHIP_CEDAR) { 217 evergreen_pipe_shader_ps(ctx, shader); 218 } else { 219 r600_pipe_shader_ps(ctx, shader); 220 } 221 break; 222 default: 223 return -EINVAL; 224 } 225 return 0; 226} 227 228int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 229int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) 230{ 231 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 232 int r; 233 234//fprintf(stderr, "--------------------------------------------------------------\n"); 235//tgsi_dump(tokens, 0); 236 shader->shader.family = r600_get_family(rctx->radeon); 237 r = r600_shader_from_tgsi(tokens, &shader->shader); 238 if (r) { 239 R600_ERR("translation from TGSI failed !\n"); 240 return r; 241 } 242 r = r600_bc_build(&shader->shader.bc); 243 if (r) { 244 R600_ERR("building bytecode failed !\n"); 245 return r; 246 } 247//r600_bc_dump(&shader->shader.bc); 248//fprintf(stderr, "______________________________________________________________\n"); 249 return r600_pipe_shader(ctx, shader); 250} 251 252void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 253{ 254 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 255 256 r600_bo_reference(rctx->radeon, &shader->bo, NULL); 257 r600_bc_clear(&shader->shader.bc); 258} 259 260/* 261 * tgsi -> r600 shader 262 */ 263struct r600_shader_tgsi_instruction; 264 265struct r600_shader_ctx { 266 struct tgsi_shader_info info; 267 struct tgsi_parse_context parse; 268 const struct tgsi_token *tokens; 269 unsigned type; 270 unsigned file_offset[TGSI_FILE_COUNT]; 271 unsigned temp_reg; 272 struct r600_shader_tgsi_instruction *inst_info; 273 struct r600_bc *bc; 274 struct r600_shader *shader; 275 u32 value[4]; 276 u32 *literals; 277 u32 nliterals; 278 u32 max_driver_temp_used; 279 /* needed for evergreen interpolation */ 280 boolean input_centroid; 281 boolean input_linear; 282 boolean input_perspective; 283 int num_interp_gpr; 284}; 285 286struct r600_shader_tgsi_instruction { 287 unsigned tgsi_opcode; 288 unsigned is_op3; 289 unsigned r600_opcode; 290 int (*process)(struct r600_shader_ctx *ctx); 291}; 292 293static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 294static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 295 296static int tgsi_is_supported(struct r600_shader_ctx *ctx) 297{ 298 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 299 int j; 300 301 if (i->Instruction.NumDstRegs > 1) { 302 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 303 return -EINVAL; 304 } 305 if (i->Instruction.Predicate) { 306 R600_ERR("predicate unsupported\n"); 307 return -EINVAL; 308 } 309#if 0 310 if (i->Instruction.Label) { 311 R600_ERR("label unsupported\n"); 312 return -EINVAL; 313 } 314#endif 315 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 316 if (i->Src[j].Register.Dimension) { 317 R600_ERR("unsupported src %d (dimension %d)\n", j, 318 i->Src[j].Register.Dimension); 319 return -EINVAL; 320 } 321 } 322 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 323 if (i->Dst[j].Register.Dimension) { 324 R600_ERR("unsupported dst (dimension)\n"); 325 return -EINVAL; 326 } 327 } 328 return 0; 329} 330 331static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 332{ 333 int i, r; 334 struct r600_bc_alu alu; 335 int gpr = 0, base_chan = 0; 336 int ij_index = 0; 337 338 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 339 ij_index = 0; 340 if (ctx->shader->input[input].centroid) 341 ij_index++; 342 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 343 ij_index = 0; 344 /* if we have perspective add one */ 345 if (ctx->input_perspective) { 346 ij_index++; 347 /* if we have perspective centroid */ 348 if (ctx->input_centroid) 349 ij_index++; 350 } 351 if (ctx->shader->input[input].centroid) 352 ij_index++; 353 } 354 355 /* work out gpr and base_chan from index */ 356 gpr = ij_index / 2; 357 base_chan = (2 * (ij_index % 2)) + 1; 358 359 for (i = 0; i < 8; i++) { 360 memset(&alu, 0, sizeof(struct r600_bc_alu)); 361 362 if (i < 4) 363 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 364 else 365 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 366 367 if ((i > 1) && (i < 6)) { 368 alu.dst.sel = ctx->shader->input[input].gpr; 369 alu.dst.write = 1; 370 } 371 372 alu.dst.chan = i % 4; 373 374 alu.src[0].sel = gpr; 375 alu.src[0].chan = (base_chan - (i % 2)); 376 377 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 378 379 alu.bank_swizzle_force = SQ_ALU_VEC_210; 380 if ((i % 4) == 3) 381 alu.last = 1; 382 r = r600_bc_add_alu(ctx->bc, &alu); 383 if (r) 384 return r; 385 } 386 return 0; 387} 388 389 390static int tgsi_declaration(struct r600_shader_ctx *ctx) 391{ 392 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 393 unsigned i; 394 395 switch (d->Declaration.File) { 396 case TGSI_FILE_INPUT: 397 i = ctx->shader->ninput++; 398 ctx->shader->input[i].name = d->Semantic.Name; 399 ctx->shader->input[i].sid = d->Semantic.Index; 400 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 401 ctx->shader->input[i].centroid = d->Declaration.Centroid; 402 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 403 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) { 404 /* turn input into interpolate on EG */ 405 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { 406 if (ctx->shader->input[i].interpolate > 0) { 407 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 408 evergreen_interp_alu(ctx, i); 409 } 410 } 411 } 412 break; 413 case TGSI_FILE_OUTPUT: 414 i = ctx->shader->noutput++; 415 ctx->shader->output[i].name = d->Semantic.Name; 416 ctx->shader->output[i].sid = d->Semantic.Index; 417 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 418 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 419 break; 420 case TGSI_FILE_CONSTANT: 421 case TGSI_FILE_TEMPORARY: 422 case TGSI_FILE_SAMPLER: 423 case TGSI_FILE_ADDRESS: 424 break; 425 default: 426 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 427 return -EINVAL; 428 } 429 return 0; 430} 431 432static int r600_get_temp(struct r600_shader_ctx *ctx) 433{ 434 return ctx->temp_reg + ctx->max_driver_temp_used++; 435} 436 437/* 438 * for evergreen we need to scan the shader to find the number of GPRs we need to 439 * reserve for interpolation. 440 * 441 * we need to know if we are going to emit 442 * any centroid inputs 443 * if perspective and linear are required 444*/ 445static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 446{ 447 int i; 448 int num_baryc; 449 450 ctx->input_linear = FALSE; 451 ctx->input_perspective = FALSE; 452 ctx->input_centroid = FALSE; 453 ctx->num_interp_gpr = 1; 454 455 /* any centroid inputs */ 456 for (i = 0; i < ctx->info.num_inputs; i++) { 457 /* skip position/face */ 458 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 459 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 460 continue; 461 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 462 ctx->input_linear = TRUE; 463 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 464 ctx->input_perspective = TRUE; 465 if (ctx->info.input_centroid[i]) 466 ctx->input_centroid = TRUE; 467 } 468 469 num_baryc = 0; 470 /* ignoring sample for now */ 471 if (ctx->input_perspective) 472 num_baryc++; 473 if (ctx->input_linear) 474 num_baryc++; 475 if (ctx->input_centroid) 476 num_baryc *= 2; 477 478 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 479 480 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 481 return ctx->num_interp_gpr; 482} 483 484int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 485{ 486 struct tgsi_full_immediate *immediate; 487 struct r600_shader_ctx ctx; 488 struct r600_bc_output output[32]; 489 unsigned output_done, noutput; 490 unsigned opcode; 491 int i, r = 0, pos0; 492 493 ctx.bc = &shader->bc; 494 ctx.shader = shader; 495 r = r600_bc_init(ctx.bc, shader->family); 496 if (r) 497 return r; 498 ctx.tokens = tokens; 499 tgsi_scan_shader(tokens, &ctx.info); 500 tgsi_parse_init(&ctx.parse, tokens); 501 ctx.type = ctx.parse.FullHeader.Processor.Processor; 502 shader->processor_type = ctx.type; 503 ctx.bc->type = shader->processor_type; 504 505 /* register allocations */ 506 /* Values [0,127] correspond to GPR[0..127]. 507 * Values [128,159] correspond to constant buffer bank 0 508 * Values [160,191] correspond to constant buffer bank 1 509 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 510 * Values [256,287] correspond to constant buffer bank 2 (EG) 511 * Values [288,319] correspond to constant buffer bank 3 (EG) 512 * Other special values are shown in the list below. 513 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 514 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 515 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 516 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 517 * 248 SQ_ALU_SRC_0: special constant 0.0. 518 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 519 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 520 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 521 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 522 * 253 SQ_ALU_SRC_LITERAL: literal constant. 523 * 254 SQ_ALU_SRC_PV: previous vector result. 524 * 255 SQ_ALU_SRC_PS: previous scalar result. 525 */ 526 for (i = 0; i < TGSI_FILE_COUNT; i++) { 527 ctx.file_offset[i] = 0; 528 } 529 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 530 ctx.file_offset[TGSI_FILE_INPUT] = 1; 531 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { 532 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 533 } else { 534 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 535 } 536 } 537 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) { 538 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 539 } 540 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 541 ctx.info.file_count[TGSI_FILE_INPUT]; 542 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 543 ctx.info.file_count[TGSI_FILE_OUTPUT]; 544 545 /* Outside the GPR range. This will be translated to one of the 546 * kcache banks later. */ 547 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 548 549 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 550 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 551 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 552 553 ctx.nliterals = 0; 554 ctx.literals = NULL; 555 556 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 557 tgsi_parse_token(&ctx.parse); 558 switch (ctx.parse.FullToken.Token.Type) { 559 case TGSI_TOKEN_TYPE_IMMEDIATE: 560 immediate = &ctx.parse.FullToken.FullImmediate; 561 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 562 if(ctx.literals == NULL) { 563 r = -ENOMEM; 564 goto out_err; 565 } 566 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 567 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 568 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 569 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 570 ctx.nliterals++; 571 break; 572 case TGSI_TOKEN_TYPE_DECLARATION: 573 r = tgsi_declaration(&ctx); 574 if (r) 575 goto out_err; 576 break; 577 case TGSI_TOKEN_TYPE_INSTRUCTION: 578 r = tgsi_is_supported(&ctx); 579 if (r) 580 goto out_err; 581 ctx.max_driver_temp_used = 0; 582 /* reserve first tmp for everyone */ 583 r600_get_temp(&ctx); 584 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 585 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) 586 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 587 else 588 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 589 r = ctx.inst_info->process(&ctx); 590 if (r) 591 goto out_err; 592 r = r600_bc_add_literal(ctx.bc, ctx.value); 593 if (r) 594 goto out_err; 595 break; 596 case TGSI_TOKEN_TYPE_PROPERTY: 597 break; 598 default: 599 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 600 r = -EINVAL; 601 goto out_err; 602 } 603 } 604 /* export output */ 605 noutput = shader->noutput; 606 for (i = 0, pos0 = 0; i < noutput; i++) { 607 memset(&output[i], 0, sizeof(struct r600_bc_output)); 608 output[i].gpr = shader->output[i].gpr; 609 output[i].elem_size = 3; 610 output[i].swizzle_x = 0; 611 output[i].swizzle_y = 1; 612 output[i].swizzle_z = 2; 613 output[i].swizzle_w = 3; 614 output[i].barrier = 1; 615 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 616 output[i].array_base = i - pos0; 617 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 618 switch (ctx.type) { 619 case TGSI_PROCESSOR_VERTEX: 620 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 621 output[i].array_base = 60; 622 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 623 /* position doesn't count in array_base */ 624 pos0++; 625 } 626 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 627 output[i].array_base = 61; 628 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 629 /* position doesn't count in array_base */ 630 pos0++; 631 } 632 break; 633 case TGSI_PROCESSOR_FRAGMENT: 634 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 635 output[i].array_base = shader->output[i].sid; 636 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 637 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 638 output[i].array_base = 61; 639 output[i].swizzle_x = 2; 640 output[i].swizzle_y = 7; 641 output[i].swizzle_z = output[i].swizzle_w = 7; 642 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 643 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 644 output[i].array_base = 61; 645 output[i].swizzle_x = 7; 646 output[i].swizzle_y = 1; 647 output[i].swizzle_z = output[i].swizzle_w = 7; 648 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 649 } else { 650 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 651 r = -EINVAL; 652 goto out_err; 653 } 654 break; 655 default: 656 R600_ERR("unsupported processor type %d\n", ctx.type); 657 r = -EINVAL; 658 goto out_err; 659 } 660 } 661 /* add fake param output for vertex shader if no param is exported */ 662 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 663 for (i = 0, pos0 = 0; i < noutput; i++) { 664 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 665 pos0 = 1; 666 break; 667 } 668 } 669 if (!pos0) { 670 memset(&output[i], 0, sizeof(struct r600_bc_output)); 671 output[i].gpr = 0; 672 output[i].elem_size = 3; 673 output[i].swizzle_x = 0; 674 output[i].swizzle_y = 1; 675 output[i].swizzle_z = 2; 676 output[i].swizzle_w = 3; 677 output[i].barrier = 1; 678 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 679 output[i].array_base = 0; 680 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 681 noutput++; 682 } 683 } 684 /* add fake pixel export */ 685 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 686 memset(&output[0], 0, sizeof(struct r600_bc_output)); 687 output[0].gpr = 0; 688 output[0].elem_size = 3; 689 output[0].swizzle_x = 7; 690 output[0].swizzle_y = 7; 691 output[0].swizzle_z = 7; 692 output[0].swizzle_w = 7; 693 output[0].barrier = 1; 694 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 695 output[0].array_base = 0; 696 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 697 noutput++; 698 } 699 /* set export done on last export of each type */ 700 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 701 if (i == (noutput - 1)) { 702 output[i].end_of_program = 1; 703 } 704 if (!(output_done & (1 << output[i].type))) { 705 output_done |= (1 << output[i].type); 706 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 707 } 708 } 709 /* add output to bytecode */ 710 for (i = 0; i < noutput; i++) { 711 r = r600_bc_add_output(ctx.bc, &output[i]); 712 if (r) 713 goto out_err; 714 } 715 free(ctx.literals); 716 tgsi_parse_free(&ctx.parse); 717 return 0; 718out_err: 719 free(ctx.literals); 720 tgsi_parse_free(&ctx.parse); 721 return r; 722} 723 724static int tgsi_unsupported(struct r600_shader_ctx *ctx) 725{ 726 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 727 return -EINVAL; 728} 729 730static int tgsi_end(struct r600_shader_ctx *ctx) 731{ 732 return 0; 733} 734 735static int tgsi_src(struct r600_shader_ctx *ctx, 736 const struct tgsi_full_src_register *tgsi_src, 737 struct r600_bc_alu_src *r600_src) 738{ 739 int index; 740 memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); 741 r600_src->sel = tgsi_src->Register.Index; 742 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 743 r600_src->sel = 0; 744 index = tgsi_src->Register.Index; 745 ctx->value[0] = ctx->literals[index * 4 + 0]; 746 ctx->value[1] = ctx->literals[index * 4 + 1]; 747 ctx->value[2] = ctx->literals[index * 4 + 2]; 748 ctx->value[3] = ctx->literals[index * 4 + 3]; 749 } 750 if (tgsi_src->Register.Indirect) 751 r600_src->rel = V_SQ_REL_RELATIVE; 752 r600_src->neg = tgsi_src->Register.Negate; 753 r600_src->abs = tgsi_src->Register.Absolute; 754 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 755 return 0; 756} 757 758static int tgsi_dst(struct r600_shader_ctx *ctx, 759 const struct tgsi_full_dst_register *tgsi_dst, 760 unsigned swizzle, 761 struct r600_bc_alu_dst *r600_dst) 762{ 763 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 764 765 r600_dst->sel = tgsi_dst->Register.Index; 766 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 767 r600_dst->chan = swizzle; 768 r600_dst->write = 1; 769 if (tgsi_dst->Register.Indirect) 770 r600_dst->rel = V_SQ_REL_RELATIVE; 771 if (inst->Instruction.Saturate) { 772 r600_dst->clamp = 1; 773 } 774 return 0; 775} 776 777static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) 778{ 779 switch (swizzle) { 780 case 0: 781 return tgsi_src->Register.SwizzleX; 782 case 1: 783 return tgsi_src->Register.SwizzleY; 784 case 2: 785 return tgsi_src->Register.SwizzleZ; 786 case 3: 787 return tgsi_src->Register.SwizzleW; 788 default: 789 return 0; 790 } 791} 792 793static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 794{ 795 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 796 struct r600_bc_alu alu; 797 int i, j, k, nconst, r; 798 799 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 800 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 801 nconst++; 802 } 803 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); 804 if (r) { 805 return r; 806 } 807 } 808 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 809 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 810 int treg = r600_get_temp(ctx); 811 for (k = 0; k < 4; k++) { 812 memset(&alu, 0, sizeof(struct r600_bc_alu)); 813 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 814 alu.src[0].sel = r600_src[i].sel; 815 alu.src[0].chan = k; 816 alu.src[0].rel = r600_src[i].rel; 817 alu.dst.sel = treg; 818 alu.dst.chan = k; 819 alu.dst.write = 1; 820 if (k == 3) 821 alu.last = 1; 822 r = r600_bc_add_alu(ctx->bc, &alu); 823 if (r) 824 return r; 825 } 826 r600_src[i].sel = treg; 827 r600_src[i].rel =0; 828 j--; 829 } 830 } 831 return 0; 832} 833 834/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 835static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 836{ 837 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 838 struct r600_bc_alu alu; 839 int i, j, k, nliteral, r; 840 841 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 842 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 843 nliteral++; 844 } 845 } 846 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 847 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 848 int treg = r600_get_temp(ctx); 849 for (k = 0; k < 4; k++) { 850 memset(&alu, 0, sizeof(struct r600_bc_alu)); 851 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 852 alu.src[0].sel = r600_src[i].sel; 853 alu.src[0].chan = k; 854 alu.dst.sel = treg; 855 alu.dst.chan = k; 856 alu.dst.write = 1; 857 if (k == 3) 858 alu.last = 1; 859 r = r600_bc_add_alu(ctx->bc, &alu); 860 if (r) 861 return r; 862 } 863 r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); 864 if (r) 865 return r; 866 r600_src[i].sel = treg; 867 j--; 868 } 869 } 870 return 0; 871} 872 873static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 874{ 875 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 876 struct r600_bc_alu_src r600_src[3]; 877 struct r600_bc_alu alu; 878 int i, j, r; 879 int lasti = 0; 880 881 for (i = 0; i < 4; i++) { 882 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 883 lasti = i; 884 } 885 } 886 887 r = tgsi_split_constant(ctx, r600_src); 888 if (r) 889 return r; 890 r = tgsi_split_literal_constant(ctx, r600_src); 891 if (r) 892 return r; 893 for (i = 0; i < lasti + 1; i++) { 894 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 895 continue; 896 897 memset(&alu, 0, sizeof(struct r600_bc_alu)); 898 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 899 if (r) 900 return r; 901 902 alu.inst = ctx->inst_info->r600_opcode; 903 if (!swap) { 904 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 905 alu.src[j] = r600_src[j]; 906 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 907 } 908 } else { 909 alu.src[0] = r600_src[1]; 910 alu.src[0].chan = tgsi_chan(&inst->Src[1], i); 911 912 alu.src[1] = r600_src[0]; 913 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 914 } 915 /* handle some special cases */ 916 switch (ctx->inst_info->tgsi_opcode) { 917 case TGSI_OPCODE_SUB: 918 alu.src[1].neg = 1; 919 break; 920 case TGSI_OPCODE_ABS: 921 alu.src[0].abs = 1; 922 break; 923 default: 924 break; 925 } 926 if (i == lasti) { 927 alu.last = 1; 928 } 929 r = r600_bc_add_alu(ctx->bc, &alu); 930 if (r) 931 return r; 932 } 933 return 0; 934} 935 936static int tgsi_op2(struct r600_shader_ctx *ctx) 937{ 938 return tgsi_op2_s(ctx, 0); 939} 940 941static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 942{ 943 return tgsi_op2_s(ctx, 1); 944} 945 946/* 947 * r600 - trunc to -PI..PI range 948 * r700 - normalize by dividing by 2PI 949 * see fdo bug 27901 950 */ 951static int tgsi_setup_trig(struct r600_shader_ctx *ctx, 952 struct r600_bc_alu_src r600_src[3]) 953{ 954 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 955 int r, src0_chan; 956 uint32_t lit_vals[4]; 957 struct r600_bc_alu alu; 958 959 memset(lit_vals, 0, 4*4); 960 r = tgsi_split_constant(ctx, r600_src); 961 if (r) 962 return r; 963 r = tgsi_split_literal_constant(ctx, r600_src); 964 if (r) 965 return r; 966 967 src0_chan = tgsi_chan(&inst->Src[0], 0); 968 969 /* We are going to feed two literals to the MAD below, 970 * which means that if the first operand is a literal as well, 971 * we need to copy its value manually. 972 */ 973 if (r600_src[0].sel == V_SQ_ALU_SRC_LITERAL) { 974 unsigned index = inst->Src[0].Register.Index; 975 976 lit_vals[2] = ctx->literals[index * 4 + src0_chan]; 977 src0_chan = 2; 978 } 979 980 lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); 981 lit_vals[1] = fui(0.5f); 982 983 memset(&alu, 0, sizeof(struct r600_bc_alu)); 984 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 985 alu.is_op3 = 1; 986 987 alu.dst.chan = 0; 988 alu.dst.sel = ctx->temp_reg; 989 alu.dst.write = 1; 990 991 alu.src[0] = r600_src[0]; 992 alu.src[0].chan = src0_chan; 993 994 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 995 alu.src[1].chan = 0; 996 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 997 alu.src[2].chan = 1; 998 alu.last = 1; 999 r = r600_bc_add_alu(ctx->bc, &alu); 1000 if (r) 1001 return r; 1002 r = r600_bc_add_literal(ctx->bc, lit_vals); 1003 if (r) 1004 return r; 1005 1006 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1007 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1008 1009 alu.dst.chan = 0; 1010 alu.dst.sel = ctx->temp_reg; 1011 alu.dst.write = 1; 1012 1013 alu.src[0].sel = ctx->temp_reg; 1014 alu.src[0].chan = 0; 1015 alu.last = 1; 1016 r = r600_bc_add_alu(ctx->bc, &alu); 1017 if (r) 1018 return r; 1019 1020 if (ctx->bc->chiprev == CHIPREV_R600) { 1021 lit_vals[0] = fui(3.1415926535897f * 2.0f); 1022 lit_vals[1] = fui(-3.1415926535897f); 1023 } else { 1024 lit_vals[0] = fui(1.0f); 1025 lit_vals[1] = fui(-0.5f); 1026 } 1027 1028 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1029 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1030 alu.is_op3 = 1; 1031 1032 alu.dst.chan = 0; 1033 alu.dst.sel = ctx->temp_reg; 1034 alu.dst.write = 1; 1035 1036 alu.src[0].sel = ctx->temp_reg; 1037 alu.src[0].chan = 0; 1038 1039 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1040 alu.src[1].chan = 0; 1041 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1042 alu.src[2].chan = 1; 1043 alu.last = 1; 1044 r = r600_bc_add_alu(ctx->bc, &alu); 1045 if (r) 1046 return r; 1047 r = r600_bc_add_literal(ctx->bc, lit_vals); 1048 if (r) 1049 return r; 1050 return 0; 1051} 1052 1053static int tgsi_trig(struct r600_shader_ctx *ctx) 1054{ 1055 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1056 struct r600_bc_alu_src r600_src[3]; 1057 struct r600_bc_alu alu; 1058 int i, r; 1059 int lasti = 0; 1060 1061 r = tgsi_setup_trig(ctx, r600_src); 1062 if (r) 1063 return r; 1064 1065 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1066 alu.inst = ctx->inst_info->r600_opcode; 1067 alu.dst.chan = 0; 1068 alu.dst.sel = ctx->temp_reg; 1069 alu.dst.write = 1; 1070 1071 alu.src[0].sel = ctx->temp_reg; 1072 alu.src[0].chan = 0; 1073 alu.last = 1; 1074 r = r600_bc_add_alu(ctx->bc, &alu); 1075 if (r) 1076 return r; 1077 1078 /* replicate result */ 1079 for (i = 0; i < 4; i++) { 1080 if (inst->Dst[0].Register.WriteMask & (1 << i)) 1081 lasti = i; 1082 } 1083 for (i = 0; i < lasti + 1; i++) { 1084 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1085 continue; 1086 1087 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1088 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1089 1090 alu.src[0].sel = ctx->temp_reg; 1091 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1092 if (r) 1093 return r; 1094 if (i == lasti) 1095 alu.last = 1; 1096 r = r600_bc_add_alu(ctx->bc, &alu); 1097 if (r) 1098 return r; 1099 } 1100 return 0; 1101} 1102 1103static int tgsi_scs(struct r600_shader_ctx *ctx) 1104{ 1105 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1106 struct r600_bc_alu_src r600_src[3]; 1107 struct r600_bc_alu alu; 1108 int r; 1109 1110 /* We'll only need the trig stuff if we are going to write to the 1111 * X or Y components of the destination vector. 1112 */ 1113 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1114 r = tgsi_setup_trig(ctx, r600_src); 1115 if (r) 1116 return r; 1117 } 1118 1119 /* dst.x = COS */ 1120 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1121 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1122 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1123 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1124 if (r) 1125 return r; 1126 1127 alu.src[0].sel = ctx->temp_reg; 1128 alu.src[0].chan = 0; 1129 alu.last = 1; 1130 r = r600_bc_add_alu(ctx->bc, &alu); 1131 if (r) 1132 return r; 1133 } 1134 1135 /* dst.y = SIN */ 1136 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1137 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1138 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1139 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1140 if (r) 1141 return r; 1142 1143 alu.src[0].sel = ctx->temp_reg; 1144 alu.src[0].chan = 0; 1145 alu.last = 1; 1146 r = r600_bc_add_alu(ctx->bc, &alu); 1147 if (r) 1148 return r; 1149 } 1150 1151 /* dst.z = 0.0; */ 1152 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1153 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1154 1155 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1156 1157 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1158 if (r) 1159 return r; 1160 1161 alu.src[0].sel = V_SQ_ALU_SRC_0; 1162 alu.src[0].chan = 0; 1163 1164 alu.last = 1; 1165 1166 r = r600_bc_add_alu(ctx->bc, &alu); 1167 if (r) 1168 return r; 1169 1170 r = r600_bc_add_literal(ctx->bc, ctx->value); 1171 if (r) 1172 return r; 1173 } 1174 1175 /* dst.w = 1.0; */ 1176 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1177 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1178 1179 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1180 1181 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1182 if (r) 1183 return r; 1184 1185 alu.src[0].sel = V_SQ_ALU_SRC_1; 1186 alu.src[0].chan = 0; 1187 1188 alu.last = 1; 1189 1190 r = r600_bc_add_alu(ctx->bc, &alu); 1191 if (r) 1192 return r; 1193 1194 r = r600_bc_add_literal(ctx->bc, ctx->value); 1195 if (r) 1196 return r; 1197 } 1198 1199 return 0; 1200} 1201 1202static int tgsi_kill(struct r600_shader_ctx *ctx) 1203{ 1204 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1205 struct r600_bc_alu alu; 1206 int i, r; 1207 1208 for (i = 0; i < 4; i++) { 1209 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1210 alu.inst = ctx->inst_info->r600_opcode; 1211 1212 alu.dst.chan = i; 1213 1214 alu.src[0].sel = V_SQ_ALU_SRC_0; 1215 1216 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1217 alu.src[1].sel = V_SQ_ALU_SRC_1; 1218 alu.src[1].neg = 1; 1219 } else { 1220 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1221 if (r) 1222 return r; 1223 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1224 } 1225 if (i == 3) { 1226 alu.last = 1; 1227 } 1228 r = r600_bc_add_alu(ctx->bc, &alu); 1229 if (r) 1230 return r; 1231 } 1232 r = r600_bc_add_literal(ctx->bc, ctx->value); 1233 if (r) 1234 return r; 1235 1236 /* kill must be last in ALU */ 1237 ctx->bc->force_add_cf = 1; 1238 ctx->shader->uses_kill = TRUE; 1239 return 0; 1240} 1241 1242static int tgsi_lit(struct r600_shader_ctx *ctx) 1243{ 1244 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1245 struct r600_bc_alu alu; 1246 struct r600_bc_alu_src r600_src[3]; 1247 int r; 1248 1249 r = tgsi_split_constant(ctx, r600_src); 1250 if (r) 1251 return r; 1252 r = tgsi_split_literal_constant(ctx, r600_src); 1253 if (r) 1254 return r; 1255 1256 /* dst.x, <- 1.0 */ 1257 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1258 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1259 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1260 alu.src[0].chan = 0; 1261 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1262 if (r) 1263 return r; 1264 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1265 r = r600_bc_add_alu(ctx->bc, &alu); 1266 if (r) 1267 return r; 1268 1269 /* dst.y = max(src.x, 0.0) */ 1270 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1271 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1272 alu.src[0] = r600_src[0]; 1273 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1274 alu.src[1].chan = 0; 1275 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1276 if (r) 1277 return r; 1278 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1279 r = r600_bc_add_alu(ctx->bc, &alu); 1280 if (r) 1281 return r; 1282 1283 /* dst.w, <- 1.0 */ 1284 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1285 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1286 alu.src[0].sel = V_SQ_ALU_SRC_1; 1287 alu.src[0].chan = 0; 1288 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1289 if (r) 1290 return r; 1291 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1292 alu.last = 1; 1293 r = r600_bc_add_alu(ctx->bc, &alu); 1294 if (r) 1295 return r; 1296 1297 r = r600_bc_add_literal(ctx->bc, ctx->value); 1298 if (r) 1299 return r; 1300 1301 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1302 { 1303 int chan; 1304 int sel; 1305 1306 /* dst.z = log(src.y) */ 1307 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1308 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1309 alu.src[0] = r600_src[0]; 1310 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1311 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1312 if (r) 1313 return r; 1314 alu.last = 1; 1315 r = r600_bc_add_alu(ctx->bc, &alu); 1316 if (r) 1317 return r; 1318 1319 r = r600_bc_add_literal(ctx->bc, ctx->value); 1320 if (r) 1321 return r; 1322 1323 chan = alu.dst.chan; 1324 sel = alu.dst.sel; 1325 1326 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1327 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1328 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1329 alu.src[0] = r600_src[0]; 1330 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1331 alu.src[1].sel = sel; 1332 alu.src[1].chan = chan; 1333 1334 alu.src[2] = r600_src[0]; 1335 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); 1336 alu.dst.sel = ctx->temp_reg; 1337 alu.dst.chan = 0; 1338 alu.dst.write = 1; 1339 alu.is_op3 = 1; 1340 alu.last = 1; 1341 r = r600_bc_add_alu(ctx->bc, &alu); 1342 if (r) 1343 return r; 1344 1345 r = r600_bc_add_literal(ctx->bc, ctx->value); 1346 if (r) 1347 return r; 1348 /* dst.z = exp(tmp.x) */ 1349 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1350 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1351 alu.src[0].sel = ctx->temp_reg; 1352 alu.src[0].chan = 0; 1353 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1354 if (r) 1355 return r; 1356 alu.last = 1; 1357 r = r600_bc_add_alu(ctx->bc, &alu); 1358 if (r) 1359 return r; 1360 } 1361 return 0; 1362} 1363 1364static int tgsi_rsq(struct r600_shader_ctx *ctx) 1365{ 1366 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1367 struct r600_bc_alu alu; 1368 int i, r; 1369 1370 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1371 1372 /* FIXME: 1373 * For state trackers other than OpenGL, we'll want to use 1374 * _RECIPSQRT_IEEE instead. 1375 */ 1376 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1377 1378 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1379 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1380 if (r) 1381 return r; 1382 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1383 alu.src[i].abs = 1; 1384 } 1385 alu.dst.sel = ctx->temp_reg; 1386 alu.dst.write = 1; 1387 alu.last = 1; 1388 r = r600_bc_add_alu(ctx->bc, &alu); 1389 if (r) 1390 return r; 1391 r = r600_bc_add_literal(ctx->bc, ctx->value); 1392 if (r) 1393 return r; 1394 /* replicate result */ 1395 return tgsi_helper_tempx_replicate(ctx); 1396} 1397 1398static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1399{ 1400 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1401 struct r600_bc_alu alu; 1402 int i, r; 1403 1404 for (i = 0; i < 4; i++) { 1405 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1406 alu.src[0].sel = ctx->temp_reg; 1407 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1408 alu.dst.chan = i; 1409 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1410 if (r) 1411 return r; 1412 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1413 if (i == 3) 1414 alu.last = 1; 1415 r = r600_bc_add_alu(ctx->bc, &alu); 1416 if (r) 1417 return r; 1418 } 1419 return 0; 1420} 1421 1422static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1423{ 1424 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1425 struct r600_bc_alu alu; 1426 int i, r; 1427 1428 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1429 alu.inst = ctx->inst_info->r600_opcode; 1430 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1431 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1432 if (r) 1433 return r; 1434 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1435 } 1436 alu.dst.sel = ctx->temp_reg; 1437 alu.dst.write = 1; 1438 alu.last = 1; 1439 r = r600_bc_add_alu(ctx->bc, &alu); 1440 if (r) 1441 return r; 1442 r = r600_bc_add_literal(ctx->bc, ctx->value); 1443 if (r) 1444 return r; 1445 /* replicate result */ 1446 return tgsi_helper_tempx_replicate(ctx); 1447} 1448 1449static int tgsi_pow(struct r600_shader_ctx *ctx) 1450{ 1451 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1452 struct r600_bc_alu alu; 1453 int r; 1454 1455 /* LOG2(a) */ 1456 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1457 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1458 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1459 if (r) 1460 return r; 1461 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1462 alu.dst.sel = ctx->temp_reg; 1463 alu.dst.write = 1; 1464 alu.last = 1; 1465 r = r600_bc_add_alu(ctx->bc, &alu); 1466 if (r) 1467 return r; 1468 r = r600_bc_add_literal(ctx->bc,ctx->value); 1469 if (r) 1470 return r; 1471 /* b * LOG2(a) */ 1472 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1473 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1474 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); 1475 if (r) 1476 return r; 1477 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); 1478 alu.src[1].sel = ctx->temp_reg; 1479 alu.dst.sel = ctx->temp_reg; 1480 alu.dst.write = 1; 1481 alu.last = 1; 1482 r = r600_bc_add_alu(ctx->bc, &alu); 1483 if (r) 1484 return r; 1485 r = r600_bc_add_literal(ctx->bc,ctx->value); 1486 if (r) 1487 return r; 1488 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1489 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1490 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1491 alu.src[0].sel = ctx->temp_reg; 1492 alu.dst.sel = ctx->temp_reg; 1493 alu.dst.write = 1; 1494 alu.last = 1; 1495 r = r600_bc_add_alu(ctx->bc, &alu); 1496 if (r) 1497 return r; 1498 r = r600_bc_add_literal(ctx->bc,ctx->value); 1499 if (r) 1500 return r; 1501 return tgsi_helper_tempx_replicate(ctx); 1502} 1503 1504static int tgsi_ssg(struct r600_shader_ctx *ctx) 1505{ 1506 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1507 struct r600_bc_alu alu; 1508 struct r600_bc_alu_src r600_src[3]; 1509 int i, r; 1510 1511 r = tgsi_split_constant(ctx, r600_src); 1512 if (r) 1513 return r; 1514 r = tgsi_split_literal_constant(ctx, r600_src); 1515 if (r) 1516 return r; 1517 1518 /* tmp = (src > 0 ? 1 : src) */ 1519 for (i = 0; i < 4; i++) { 1520 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1521 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1522 alu.is_op3 = 1; 1523 1524 alu.dst.sel = ctx->temp_reg; 1525 alu.dst.chan = i; 1526 1527 alu.src[0] = r600_src[0]; 1528 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1529 1530 alu.src[1].sel = V_SQ_ALU_SRC_1; 1531 1532 alu.src[2] = r600_src[0]; 1533 alu.src[2].chan = tgsi_chan(&inst->Src[0], i); 1534 if (i == 3) 1535 alu.last = 1; 1536 r = r600_bc_add_alu(ctx->bc, &alu); 1537 if (r) 1538 return r; 1539 } 1540 r = r600_bc_add_literal(ctx->bc, ctx->value); 1541 if (r) 1542 return r; 1543 1544 /* dst = (-tmp > 0 ? -1 : tmp) */ 1545 for (i = 0; i < 4; i++) { 1546 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1547 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1548 alu.is_op3 = 1; 1549 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1550 if (r) 1551 return r; 1552 1553 alu.src[0].sel = ctx->temp_reg; 1554 alu.src[0].chan = i; 1555 alu.src[0].neg = 1; 1556 1557 alu.src[1].sel = V_SQ_ALU_SRC_1; 1558 alu.src[1].neg = 1; 1559 1560 alu.src[2].sel = ctx->temp_reg; 1561 alu.src[2].chan = i; 1562 1563 if (i == 3) 1564 alu.last = 1; 1565 r = r600_bc_add_alu(ctx->bc, &alu); 1566 if (r) 1567 return r; 1568 } 1569 return 0; 1570} 1571 1572static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1573{ 1574 struct r600_bc_alu alu; 1575 int i, r; 1576 1577 r = r600_bc_add_literal(ctx->bc, ctx->value); 1578 if (r) 1579 return r; 1580 for (i = 0; i < 4; i++) { 1581 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1582 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1583 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1584 alu.dst.chan = i; 1585 } else { 1586 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1587 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1588 if (r) 1589 return r; 1590 alu.src[0].sel = ctx->temp_reg; 1591 alu.src[0].chan = i; 1592 } 1593 if (i == 3) { 1594 alu.last = 1; 1595 } 1596 r = r600_bc_add_alu(ctx->bc, &alu); 1597 if (r) 1598 return r; 1599 } 1600 return 0; 1601} 1602 1603static int tgsi_op3(struct r600_shader_ctx *ctx) 1604{ 1605 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1606 struct r600_bc_alu_src r600_src[3]; 1607 struct r600_bc_alu alu; 1608 int i, j, r; 1609 int lasti = 0; 1610 1611 for (i = 0; i < 4; i++) { 1612 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 1613 lasti = i; 1614 } 1615 } 1616 1617 r = tgsi_split_constant(ctx, r600_src); 1618 if (r) 1619 return r; 1620 r = tgsi_split_literal_constant(ctx, r600_src); 1621 if (r) 1622 return r; 1623 for (i = 0; i < lasti + 1; i++) { 1624 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1625 continue; 1626 1627 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1628 alu.inst = ctx->inst_info->r600_opcode; 1629 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1630 alu.src[j] = r600_src[j]; 1631 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1632 } 1633 1634 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1635 if (r) 1636 return r; 1637 1638 alu.dst.chan = i; 1639 alu.dst.write = 1; 1640 alu.is_op3 = 1; 1641 if (i == lasti) { 1642 alu.last = 1; 1643 } 1644 r = r600_bc_add_alu(ctx->bc, &alu); 1645 if (r) 1646 return r; 1647 } 1648 return 0; 1649} 1650 1651static int tgsi_dp(struct r600_shader_ctx *ctx) 1652{ 1653 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1654 struct r600_bc_alu_src r600_src[3]; 1655 struct r600_bc_alu alu; 1656 int i, j, r; 1657 1658 r = tgsi_split_constant(ctx, r600_src); 1659 if (r) 1660 return r; 1661 r = tgsi_split_literal_constant(ctx, r600_src); 1662 if (r) 1663 return r; 1664 for (i = 0; i < 4; i++) { 1665 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1666 alu.inst = ctx->inst_info->r600_opcode; 1667 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1668 alu.src[j] = r600_src[j]; 1669 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1670 } 1671 if(inst->Dst[0].Register.WriteMask & (1 << i)) { 1672 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1673 if (r) 1674 return r; 1675 } else { 1676 alu.dst.sel = ctx->temp_reg; 1677 } 1678 alu.dst.chan = i; 1679 alu.dst.write = 1; 1680 /* handle some special cases */ 1681 switch (ctx->inst_info->tgsi_opcode) { 1682 case TGSI_OPCODE_DP2: 1683 if (i > 1) { 1684 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1685 alu.src[0].chan = alu.src[1].chan = 0; 1686 } 1687 break; 1688 case TGSI_OPCODE_DP3: 1689 if (i > 2) { 1690 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1691 alu.src[0].chan = alu.src[1].chan = 0; 1692 } 1693 break; 1694 case TGSI_OPCODE_DPH: 1695 if (i == 3) { 1696 alu.src[0].sel = V_SQ_ALU_SRC_1; 1697 alu.src[0].chan = 0; 1698 alu.src[0].neg = 0; 1699 } 1700 break; 1701 default: 1702 break; 1703 } 1704 if (i == 3) { 1705 alu.last = 1; 1706 } 1707 r = r600_bc_add_alu(ctx->bc, &alu); 1708 if (r) 1709 return r; 1710 } 1711 return 0; 1712} 1713 1714static int tgsi_tex(struct r600_shader_ctx *ctx) 1715{ 1716 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1717 struct r600_bc_tex tex; 1718 struct r600_bc_alu alu; 1719 unsigned src_gpr; 1720 int r, i; 1721 int opcode; 1722 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; 1723 uint32_t lit_vals[4]; 1724 1725 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1726 1727 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1728 /* Add perspective divide */ 1729 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1730 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1731 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1732 if (r) 1733 return r; 1734 1735 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1736 alu.dst.sel = ctx->temp_reg; 1737 alu.dst.chan = 3; 1738 alu.last = 1; 1739 alu.dst.write = 1; 1740 r = r600_bc_add_alu(ctx->bc, &alu); 1741 if (r) 1742 return r; 1743 1744 for (i = 0; i < 3; i++) { 1745 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1746 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1747 alu.src[0].sel = ctx->temp_reg; 1748 alu.src[0].chan = 3; 1749 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1750 if (r) 1751 return r; 1752 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1753 alu.dst.sel = ctx->temp_reg; 1754 alu.dst.chan = i; 1755 alu.dst.write = 1; 1756 r = r600_bc_add_alu(ctx->bc, &alu); 1757 if (r) 1758 return r; 1759 } 1760 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1761 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1762 alu.src[0].sel = V_SQ_ALU_SRC_1; 1763 alu.src[0].chan = 0; 1764 alu.dst.sel = ctx->temp_reg; 1765 alu.dst.chan = 3; 1766 alu.last = 1; 1767 alu.dst.write = 1; 1768 r = r600_bc_add_alu(ctx->bc, &alu); 1769 if (r) 1770 return r; 1771 src_not_temp = FALSE; 1772 src_gpr = ctx->temp_reg; 1773 } 1774 1775 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1776 int src_chan, src2_chan; 1777 1778 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1779 for (i = 0; i < 4; i++) { 1780 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1781 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1782 switch (i) { 1783 case 0: 1784 src_chan = 2; 1785 src2_chan = 1; 1786 break; 1787 case 1: 1788 src_chan = 2; 1789 src2_chan = 0; 1790 break; 1791 case 2: 1792 src_chan = 0; 1793 src2_chan = 2; 1794 break; 1795 case 3: 1796 src_chan = 1; 1797 src2_chan = 2; 1798 break; 1799 default: 1800 assert(0); 1801 src_chan = 0; 1802 src2_chan = 0; 1803 break; 1804 } 1805 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1806 if (r) 1807 return r; 1808 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); 1809 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1810 if (r) 1811 return r; 1812 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); 1813 alu.dst.sel = ctx->temp_reg; 1814 alu.dst.chan = i; 1815 if (i == 3) 1816 alu.last = 1; 1817 alu.dst.write = 1; 1818 r = r600_bc_add_alu(ctx->bc, &alu); 1819 if (r) 1820 return r; 1821 } 1822 1823 /* tmp1.z = RCP_e(|tmp1.z|) */ 1824 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1825 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1826 alu.src[0].sel = ctx->temp_reg; 1827 alu.src[0].chan = 2; 1828 alu.src[0].abs = 1; 1829 alu.dst.sel = ctx->temp_reg; 1830 alu.dst.chan = 2; 1831 alu.dst.write = 1; 1832 alu.last = 1; 1833 r = r600_bc_add_alu(ctx->bc, &alu); 1834 if (r) 1835 return r; 1836 1837 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1838 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1839 * muladd has no writemask, have to use another temp 1840 */ 1841 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1842 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1843 alu.is_op3 = 1; 1844 1845 alu.src[0].sel = ctx->temp_reg; 1846 alu.src[0].chan = 0; 1847 alu.src[1].sel = ctx->temp_reg; 1848 alu.src[1].chan = 2; 1849 1850 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1851 alu.src[2].chan = 0; 1852 1853 alu.dst.sel = ctx->temp_reg; 1854 alu.dst.chan = 0; 1855 alu.dst.write = 1; 1856 1857 r = r600_bc_add_alu(ctx->bc, &alu); 1858 if (r) 1859 return r; 1860 1861 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1862 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1863 alu.is_op3 = 1; 1864 1865 alu.src[0].sel = ctx->temp_reg; 1866 alu.src[0].chan = 1; 1867 alu.src[1].sel = ctx->temp_reg; 1868 alu.src[1].chan = 2; 1869 1870 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1871 alu.src[2].chan = 0; 1872 1873 alu.dst.sel = ctx->temp_reg; 1874 alu.dst.chan = 1; 1875 alu.dst.write = 1; 1876 1877 alu.last = 1; 1878 r = r600_bc_add_alu(ctx->bc, &alu); 1879 if (r) 1880 return r; 1881 1882 lit_vals[0] = fui(1.5f); 1883 1884 r = r600_bc_add_literal(ctx->bc, lit_vals); 1885 if (r) 1886 return r; 1887 src_not_temp = FALSE; 1888 src_gpr = ctx->temp_reg; 1889 } 1890 1891 if (src_not_temp) { 1892 for (i = 0; i < 4; i++) { 1893 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1894 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1895 alu.src[0].sel = src_gpr; 1896 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1897 alu.dst.sel = ctx->temp_reg; 1898 alu.dst.chan = i; 1899 if (i == 3) 1900 alu.last = 1; 1901 alu.dst.write = 1; 1902 r = r600_bc_add_alu(ctx->bc, &alu); 1903 if (r) 1904 return r; 1905 } 1906 src_gpr = ctx->temp_reg; 1907 } 1908 1909 opcode = ctx->inst_info->r600_opcode; 1910 if (opcode == SQ_TEX_INST_SAMPLE && 1911 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 1912 opcode = SQ_TEX_INST_SAMPLE_C; 1913 1914 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1915 tex.inst = opcode; 1916 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1917 tex.resource_id = tex.sampler_id; 1918 tex.src_gpr = src_gpr; 1919 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1920 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 1921 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 1922 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 1923 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 1924 tex.src_sel_x = 0; 1925 tex.src_sel_y = 1; 1926 tex.src_sel_z = 2; 1927 tex.src_sel_w = 3; 1928 1929 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1930 tex.src_sel_x = 1; 1931 tex.src_sel_y = 0; 1932 tex.src_sel_z = 3; 1933 tex.src_sel_w = 1; 1934 } 1935 1936 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1937 tex.coord_type_x = 1; 1938 tex.coord_type_y = 1; 1939 tex.coord_type_z = 1; 1940 tex.coord_type_w = 1; 1941 } 1942 1943 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 1944 tex.src_sel_w = 2; 1945 1946 r = r600_bc_add_tex(ctx->bc, &tex); 1947 if (r) 1948 return r; 1949 1950 /* add shadow ambient support - gallium doesn't do it yet */ 1951 return 0; 1952} 1953 1954static int tgsi_lrp(struct r600_shader_ctx *ctx) 1955{ 1956 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1957 struct r600_bc_alu_src r600_src[3]; 1958 struct r600_bc_alu alu; 1959 unsigned i; 1960 int r; 1961 1962 r = tgsi_split_constant(ctx, r600_src); 1963 if (r) 1964 return r; 1965 r = tgsi_split_literal_constant(ctx, r600_src); 1966 if (r) 1967 return r; 1968 /* 1 - src0 */ 1969 for (i = 0; i < 4; i++) { 1970 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1971 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1972 alu.src[0].sel = V_SQ_ALU_SRC_1; 1973 alu.src[0].chan = 0; 1974 alu.src[1] = r600_src[0]; 1975 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1976 alu.src[1].neg = 1; 1977 alu.dst.sel = ctx->temp_reg; 1978 alu.dst.chan = i; 1979 if (i == 3) { 1980 alu.last = 1; 1981 } 1982 alu.dst.write = 1; 1983 r = r600_bc_add_alu(ctx->bc, &alu); 1984 if (r) 1985 return r; 1986 } 1987 r = r600_bc_add_literal(ctx->bc, ctx->value); 1988 if (r) 1989 return r; 1990 1991 /* (1 - src0) * src2 */ 1992 for (i = 0; i < 4; i++) { 1993 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1994 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1995 alu.src[0].sel = ctx->temp_reg; 1996 alu.src[0].chan = i; 1997 alu.src[1] = r600_src[2]; 1998 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 1999 alu.dst.sel = ctx->temp_reg; 2000 alu.dst.chan = i; 2001 if (i == 3) { 2002 alu.last = 1; 2003 } 2004 alu.dst.write = 1; 2005 r = r600_bc_add_alu(ctx->bc, &alu); 2006 if (r) 2007 return r; 2008 } 2009 r = r600_bc_add_literal(ctx->bc, ctx->value); 2010 if (r) 2011 return r; 2012 2013 /* src0 * src1 + (1 - src0) * src2 */ 2014 for (i = 0; i < 4; i++) { 2015 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2016 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2017 alu.is_op3 = 1; 2018 alu.src[0] = r600_src[0]; 2019 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2020 alu.src[1] = r600_src[1]; 2021 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2022 alu.src[2].sel = ctx->temp_reg; 2023 alu.src[2].chan = i; 2024 alu.dst.sel = ctx->temp_reg; 2025 alu.dst.chan = i; 2026 if (i == 3) { 2027 alu.last = 1; 2028 } 2029 r = r600_bc_add_alu(ctx->bc, &alu); 2030 if (r) 2031 return r; 2032 } 2033 return tgsi_helper_copy(ctx, inst); 2034} 2035 2036static int tgsi_cmp(struct r600_shader_ctx *ctx) 2037{ 2038 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2039 struct r600_bc_alu_src r600_src[3]; 2040 struct r600_bc_alu alu; 2041 int i, r; 2042 int lasti = 0; 2043 2044 for (i = 0; i < 4; i++) { 2045 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 2046 lasti = i; 2047 } 2048 } 2049 2050 r = tgsi_split_constant(ctx, r600_src); 2051 if (r) 2052 return r; 2053 r = tgsi_split_literal_constant(ctx, r600_src); 2054 if (r) 2055 return r; 2056 2057 for (i = 0; i < lasti + 1; i++) { 2058 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2059 continue; 2060 2061 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2062 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2063 alu.src[0] = r600_src[0]; 2064 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2065 2066 alu.src[1] = r600_src[2]; 2067 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 2068 2069 alu.src[2] = r600_src[1]; 2070 alu.src[2].chan = tgsi_chan(&inst->Src[1], i); 2071 2072 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2073 if (r) 2074 return r; 2075 2076 alu.dst.chan = i; 2077 alu.dst.write = 1; 2078 alu.is_op3 = 1; 2079 if (i == lasti) 2080 alu.last = 1; 2081 r = r600_bc_add_alu(ctx->bc, &alu); 2082 if (r) 2083 return r; 2084 } 2085 return 0; 2086} 2087 2088static int tgsi_xpd(struct r600_shader_ctx *ctx) 2089{ 2090 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2091 struct r600_bc_alu_src r600_src[3]; 2092 struct r600_bc_alu alu; 2093 uint32_t use_temp = 0; 2094 int i, r; 2095 2096 if (inst->Dst[0].Register.WriteMask != 0xf) 2097 use_temp = 1; 2098 2099 r = tgsi_split_constant(ctx, r600_src); 2100 if (r) 2101 return r; 2102 r = tgsi_split_literal_constant(ctx, r600_src); 2103 if (r) 2104 return r; 2105 2106 for (i = 0; i < 4; i++) { 2107 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2108 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2109 2110 alu.src[0] = r600_src[0]; 2111 switch (i) { 2112 case 0: 2113 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2114 break; 2115 case 1: 2116 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2117 break; 2118 case 2: 2119 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2120 break; 2121 case 3: 2122 alu.src[0].sel = V_SQ_ALU_SRC_0; 2123 alu.src[0].chan = i; 2124 } 2125 2126 alu.src[1] = r600_src[1]; 2127 switch (i) { 2128 case 0: 2129 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2130 break; 2131 case 1: 2132 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2133 break; 2134 case 2: 2135 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2136 break; 2137 case 3: 2138 alu.src[1].sel = V_SQ_ALU_SRC_0; 2139 alu.src[1].chan = i; 2140 } 2141 2142 alu.dst.sel = ctx->temp_reg; 2143 alu.dst.chan = i; 2144 alu.dst.write = 1; 2145 2146 if (i == 3) 2147 alu.last = 1; 2148 r = r600_bc_add_alu(ctx->bc, &alu); 2149 if (r) 2150 return r; 2151 2152 r = r600_bc_add_literal(ctx->bc, ctx->value); 2153 if (r) 2154 return r; 2155 } 2156 2157 for (i = 0; i < 4; i++) { 2158 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2159 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2160 2161 alu.src[0] = r600_src[0]; 2162 switch (i) { 2163 case 0: 2164 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2165 break; 2166 case 1: 2167 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2168 break; 2169 case 2: 2170 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2171 break; 2172 case 3: 2173 alu.src[0].sel = V_SQ_ALU_SRC_0; 2174 alu.src[0].chan = i; 2175 } 2176 2177 alu.src[1] = r600_src[1]; 2178 switch (i) { 2179 case 0: 2180 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2181 break; 2182 case 1: 2183 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2184 break; 2185 case 2: 2186 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2187 break; 2188 case 3: 2189 alu.src[1].sel = V_SQ_ALU_SRC_0; 2190 alu.src[1].chan = i; 2191 } 2192 2193 alu.src[2].sel = ctx->temp_reg; 2194 alu.src[2].neg = 1; 2195 alu.src[2].chan = i; 2196 2197 if (use_temp) 2198 alu.dst.sel = ctx->temp_reg; 2199 else { 2200 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2201 if (r) 2202 return r; 2203 } 2204 alu.dst.chan = i; 2205 alu.dst.write = 1; 2206 alu.is_op3 = 1; 2207 if (i == 3) 2208 alu.last = 1; 2209 r = r600_bc_add_alu(ctx->bc, &alu); 2210 if (r) 2211 return r; 2212 2213 r = r600_bc_add_literal(ctx->bc, ctx->value); 2214 if (r) 2215 return r; 2216 } 2217 if (use_temp) 2218 return tgsi_helper_copy(ctx, inst); 2219 return 0; 2220} 2221 2222static int tgsi_exp(struct r600_shader_ctx *ctx) 2223{ 2224 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2225 struct r600_bc_alu_src r600_src[3] = { { 0 } }; 2226 struct r600_bc_alu alu; 2227 int r; 2228 2229 /* result.x = 2^floor(src); */ 2230 if (inst->Dst[0].Register.WriteMask & 1) { 2231 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2232 2233 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2234 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2235 if (r) 2236 return r; 2237 2238 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2239 2240 alu.dst.sel = ctx->temp_reg; 2241 alu.dst.chan = 0; 2242 alu.dst.write = 1; 2243 alu.last = 1; 2244 r = r600_bc_add_alu(ctx->bc, &alu); 2245 if (r) 2246 return r; 2247 2248 r = r600_bc_add_literal(ctx->bc, ctx->value); 2249 if (r) 2250 return r; 2251 2252 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2253 alu.src[0].sel = ctx->temp_reg; 2254 alu.src[0].chan = 0; 2255 2256 alu.dst.sel = ctx->temp_reg; 2257 alu.dst.chan = 0; 2258 alu.dst.write = 1; 2259 alu.last = 1; 2260 r = r600_bc_add_alu(ctx->bc, &alu); 2261 if (r) 2262 return r; 2263 2264 r = r600_bc_add_literal(ctx->bc, ctx->value); 2265 if (r) 2266 return r; 2267 } 2268 2269 /* result.y = tmp - floor(tmp); */ 2270 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2271 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2272 2273 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2274 alu.src[0] = r600_src[0]; 2275 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2276 if (r) 2277 return r; 2278 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2279 2280 alu.dst.sel = ctx->temp_reg; 2281// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2282// if (r) 2283// return r; 2284 alu.dst.write = 1; 2285 alu.dst.chan = 1; 2286 2287 alu.last = 1; 2288 2289 r = r600_bc_add_alu(ctx->bc, &alu); 2290 if (r) 2291 return r; 2292 r = r600_bc_add_literal(ctx->bc, ctx->value); 2293 if (r) 2294 return r; 2295 } 2296 2297 /* result.z = RoughApprox2ToX(tmp);*/ 2298 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2299 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2300 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2301 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2302 if (r) 2303 return r; 2304 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2305 2306 alu.dst.sel = ctx->temp_reg; 2307 alu.dst.write = 1; 2308 alu.dst.chan = 2; 2309 2310 alu.last = 1; 2311 2312 r = r600_bc_add_alu(ctx->bc, &alu); 2313 if (r) 2314 return r; 2315 r = r600_bc_add_literal(ctx->bc, ctx->value); 2316 if (r) 2317 return r; 2318 } 2319 2320 /* result.w = 1.0;*/ 2321 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2322 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2323 2324 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2325 alu.src[0].sel = V_SQ_ALU_SRC_1; 2326 alu.src[0].chan = 0; 2327 2328 alu.dst.sel = ctx->temp_reg; 2329 alu.dst.chan = 3; 2330 alu.dst.write = 1; 2331 alu.last = 1; 2332 r = r600_bc_add_alu(ctx->bc, &alu); 2333 if (r) 2334 return r; 2335 r = r600_bc_add_literal(ctx->bc, ctx->value); 2336 if (r) 2337 return r; 2338 } 2339 return tgsi_helper_copy(ctx, inst); 2340} 2341 2342static int tgsi_log(struct r600_shader_ctx *ctx) 2343{ 2344 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2345 struct r600_bc_alu alu; 2346 int r; 2347 2348 /* result.x = floor(log2(src)); */ 2349 if (inst->Dst[0].Register.WriteMask & 1) { 2350 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2351 2352 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2353 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2354 if (r) 2355 return r; 2356 2357 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2358 2359 alu.dst.sel = ctx->temp_reg; 2360 alu.dst.chan = 0; 2361 alu.dst.write = 1; 2362 alu.last = 1; 2363 r = r600_bc_add_alu(ctx->bc, &alu); 2364 if (r) 2365 return r; 2366 2367 r = r600_bc_add_literal(ctx->bc, ctx->value); 2368 if (r) 2369 return r; 2370 2371 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2372 alu.src[0].sel = ctx->temp_reg; 2373 alu.src[0].chan = 0; 2374 2375 alu.dst.sel = ctx->temp_reg; 2376 alu.dst.chan = 0; 2377 alu.dst.write = 1; 2378 alu.last = 1; 2379 2380 r = r600_bc_add_alu(ctx->bc, &alu); 2381 if (r) 2382 return r; 2383 2384 r = r600_bc_add_literal(ctx->bc, ctx->value); 2385 if (r) 2386 return r; 2387 } 2388 2389 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2390 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2391 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2392 2393 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2394 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2395 if (r) 2396 return r; 2397 2398 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2399 2400 alu.dst.sel = ctx->temp_reg; 2401 alu.dst.chan = 1; 2402 alu.dst.write = 1; 2403 alu.last = 1; 2404 2405 r = r600_bc_add_alu(ctx->bc, &alu); 2406 if (r) 2407 return r; 2408 2409 r = r600_bc_add_literal(ctx->bc, ctx->value); 2410 if (r) 2411 return r; 2412 2413 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2414 2415 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2416 alu.src[0].sel = ctx->temp_reg; 2417 alu.src[0].chan = 1; 2418 2419 alu.dst.sel = ctx->temp_reg; 2420 alu.dst.chan = 1; 2421 alu.dst.write = 1; 2422 alu.last = 1; 2423 2424 r = r600_bc_add_alu(ctx->bc, &alu); 2425 if (r) 2426 return r; 2427 2428 r = r600_bc_add_literal(ctx->bc, ctx->value); 2429 if (r) 2430 return r; 2431 2432 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2433 2434 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2435 alu.src[0].sel = ctx->temp_reg; 2436 alu.src[0].chan = 1; 2437 2438 alu.dst.sel = ctx->temp_reg; 2439 alu.dst.chan = 1; 2440 alu.dst.write = 1; 2441 alu.last = 1; 2442 2443 r = r600_bc_add_alu(ctx->bc, &alu); 2444 if (r) 2445 return r; 2446 2447 r = r600_bc_add_literal(ctx->bc, ctx->value); 2448 if (r) 2449 return r; 2450 2451 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2452 2453 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2454 alu.src[0].sel = ctx->temp_reg; 2455 alu.src[0].chan = 1; 2456 2457 alu.dst.sel = ctx->temp_reg; 2458 alu.dst.chan = 1; 2459 alu.dst.write = 1; 2460 alu.last = 1; 2461 2462 r = r600_bc_add_alu(ctx->bc, &alu); 2463 if (r) 2464 return r; 2465 2466 r = r600_bc_add_literal(ctx->bc, ctx->value); 2467 if (r) 2468 return r; 2469 2470 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2471 2472 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2473 2474 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2475 if (r) 2476 return r; 2477 2478 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2479 2480 alu.src[1].sel = ctx->temp_reg; 2481 alu.src[1].chan = 1; 2482 2483 alu.dst.sel = ctx->temp_reg; 2484 alu.dst.chan = 1; 2485 alu.dst.write = 1; 2486 alu.last = 1; 2487 2488 r = r600_bc_add_alu(ctx->bc, &alu); 2489 if (r) 2490 return r; 2491 2492 r = r600_bc_add_literal(ctx->bc, ctx->value); 2493 if (r) 2494 return r; 2495 } 2496 2497 /* result.z = log2(src);*/ 2498 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2499 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2500 2501 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2502 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2503 if (r) 2504 return r; 2505 2506 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2507 2508 alu.dst.sel = ctx->temp_reg; 2509 alu.dst.write = 1; 2510 alu.dst.chan = 2; 2511 alu.last = 1; 2512 2513 r = r600_bc_add_alu(ctx->bc, &alu); 2514 if (r) 2515 return r; 2516 2517 r = r600_bc_add_literal(ctx->bc, ctx->value); 2518 if (r) 2519 return r; 2520 } 2521 2522 /* result.w = 1.0; */ 2523 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2524 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2525 2526 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2527 alu.src[0].sel = V_SQ_ALU_SRC_1; 2528 alu.src[0].chan = 0; 2529 2530 alu.dst.sel = ctx->temp_reg; 2531 alu.dst.chan = 3; 2532 alu.dst.write = 1; 2533 alu.last = 1; 2534 2535 r = r600_bc_add_alu(ctx->bc, &alu); 2536 if (r) 2537 return r; 2538 2539 r = r600_bc_add_literal(ctx->bc, ctx->value); 2540 if (r) 2541 return r; 2542 } 2543 2544 return tgsi_helper_copy(ctx, inst); 2545} 2546 2547static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2548{ 2549 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2550 struct r600_bc_alu alu; 2551 int r; 2552 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2553 2554 switch (inst->Instruction.Opcode) { 2555 case TGSI_OPCODE_ARL: 2556 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2557 break; 2558 case TGSI_OPCODE_ARR: 2559 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2560 break; 2561 default: 2562 assert(0); 2563 return -1; 2564 } 2565 2566 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2567 if (r) 2568 return r; 2569 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2570 alu.last = 1; 2571 alu.dst.chan = 0; 2572 alu.dst.sel = ctx->temp_reg; 2573 alu.dst.write = 1; 2574 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2575 if (r) 2576 return r; 2577 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2578 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2579 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2580 if (r) 2581 return r; 2582 alu.src[0].sel = ctx->temp_reg; 2583 alu.src[0].chan = 0; 2584 alu.last = 1; 2585 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2586 if (r) 2587 return r; 2588 return 0; 2589} 2590static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2591{ 2592 /* TODO from r600c, ar values don't persist between clauses */ 2593 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2594 struct r600_bc_alu alu; 2595 int r; 2596 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2597 2598 switch (inst->Instruction.Opcode) { 2599 case TGSI_OPCODE_ARL: 2600 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; 2601 break; 2602 case TGSI_OPCODE_ARR: 2603 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; 2604 break; 2605 default: 2606 assert(0); 2607 return -1; 2608 } 2609 2610 2611 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2612 if (r) 2613 return r; 2614 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2615 2616 alu.last = 1; 2617 2618 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2619 if (r) 2620 return r; 2621 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2622 return 0; 2623} 2624 2625static int tgsi_opdst(struct r600_shader_ctx *ctx) 2626{ 2627 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2628 struct r600_bc_alu alu; 2629 int i, r = 0; 2630 2631 for (i = 0; i < 4; i++) { 2632 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2633 2634 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2635 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2636 if (r) 2637 return r; 2638 2639 if (i == 0 || i == 3) { 2640 alu.src[0].sel = V_SQ_ALU_SRC_1; 2641 } else { 2642 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2643 if (r) 2644 return r; 2645 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2646 } 2647 2648 if (i == 0 || i == 2) { 2649 alu.src[1].sel = V_SQ_ALU_SRC_1; 2650 } else { 2651 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); 2652 if (r) 2653 return r; 2654 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2655 } 2656 if (i == 3) 2657 alu.last = 1; 2658 r = r600_bc_add_alu(ctx->bc, &alu); 2659 if (r) 2660 return r; 2661 } 2662 return 0; 2663} 2664 2665static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2666{ 2667 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2668 struct r600_bc_alu alu; 2669 int r; 2670 2671 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2672 alu.inst = opcode; 2673 alu.predicate = 1; 2674 2675 alu.dst.sel = ctx->temp_reg; 2676 alu.dst.write = 1; 2677 alu.dst.chan = 0; 2678 2679 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2680 if (r) 2681 return r; 2682 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2683 alu.src[1].sel = V_SQ_ALU_SRC_0; 2684 alu.src[1].chan = 0; 2685 2686 alu.last = 1; 2687 2688 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2689 if (r) 2690 return r; 2691 return 0; 2692} 2693 2694static int pops(struct r600_shader_ctx *ctx, int pops) 2695{ 2696 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2697 ctx->bc->cf_last->pop_count = pops; 2698 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 2699 return 0; 2700} 2701 2702static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2703{ 2704 switch(reason) { 2705 case FC_PUSH_VPM: 2706 ctx->bc->callstack[ctx->bc->call_sp].current--; 2707 break; 2708 case FC_PUSH_WQM: 2709 case FC_LOOP: 2710 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2711 break; 2712 case FC_REP: 2713 /* TOODO : for 16 vp asic should -= 2; */ 2714 ctx->bc->callstack[ctx->bc->call_sp].current --; 2715 break; 2716 } 2717} 2718 2719static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2720{ 2721 if (check_max_only) { 2722 int diff; 2723 switch (reason) { 2724 case FC_PUSH_VPM: 2725 diff = 1; 2726 break; 2727 case FC_PUSH_WQM: 2728 diff = 4; 2729 break; 2730 default: 2731 assert(0); 2732 diff = 0; 2733 } 2734 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2735 ctx->bc->callstack[ctx->bc->call_sp].max) { 2736 ctx->bc->callstack[ctx->bc->call_sp].max = 2737 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2738 } 2739 return; 2740 } 2741 switch (reason) { 2742 case FC_PUSH_VPM: 2743 ctx->bc->callstack[ctx->bc->call_sp].current++; 2744 break; 2745 case FC_PUSH_WQM: 2746 case FC_LOOP: 2747 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2748 break; 2749 case FC_REP: 2750 ctx->bc->callstack[ctx->bc->call_sp].current++; 2751 break; 2752 } 2753 2754 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2755 ctx->bc->callstack[ctx->bc->call_sp].max) { 2756 ctx->bc->callstack[ctx->bc->call_sp].max = 2757 ctx->bc->callstack[ctx->bc->call_sp].current; 2758 } 2759} 2760 2761static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2762{ 2763 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2764 2765 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2766 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2767 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2768 sp->num_mid++; 2769} 2770 2771static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2772{ 2773 ctx->bc->fc_sp++; 2774 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2775 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2776} 2777 2778static void fc_poplevel(struct r600_shader_ctx *ctx) 2779{ 2780 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2781 if (sp->mid) { 2782 free(sp->mid); 2783 sp->mid = NULL; 2784 } 2785 sp->num_mid = 0; 2786 sp->start = NULL; 2787 sp->type = 0; 2788 ctx->bc->fc_sp--; 2789} 2790 2791#if 0 2792static int emit_return(struct r600_shader_ctx *ctx) 2793{ 2794 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2795 return 0; 2796} 2797 2798static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2799{ 2800 2801 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2802 ctx->bc->cf_last->pop_count = pops; 2803 /* TODO work out offset */ 2804 return 0; 2805} 2806 2807static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2808{ 2809 return 0; 2810} 2811 2812static void emit_testflag(struct r600_shader_ctx *ctx) 2813{ 2814 2815} 2816 2817static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2818{ 2819 emit_testflag(ctx); 2820 emit_jump_to_offset(ctx, 1, 4); 2821 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2822 pops(ctx, ifidx + 1); 2823 emit_return(ctx); 2824} 2825 2826static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2827{ 2828 emit_testflag(ctx); 2829 2830 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2831 ctx->bc->cf_last->pop_count = 1; 2832 2833 fc_set_mid(ctx, fc_sp); 2834 2835 pops(ctx, 1); 2836} 2837#endif 2838 2839static int tgsi_if(struct r600_shader_ctx *ctx) 2840{ 2841 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2842 2843 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2844 2845 fc_pushlevel(ctx, FC_IF); 2846 2847 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2848 return 0; 2849} 2850 2851static int tgsi_else(struct r600_shader_ctx *ctx) 2852{ 2853 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2854 ctx->bc->cf_last->pop_count = 1; 2855 2856 fc_set_mid(ctx, ctx->bc->fc_sp); 2857 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2858 return 0; 2859} 2860 2861static int tgsi_endif(struct r600_shader_ctx *ctx) 2862{ 2863 pops(ctx, 1); 2864 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2865 R600_ERR("if/endif unbalanced in shader\n"); 2866 return -1; 2867 } 2868 2869 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2870 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2871 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2872 } else { 2873 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2874 } 2875 fc_poplevel(ctx); 2876 2877 callstack_decrease_current(ctx, FC_PUSH_VPM); 2878 return 0; 2879} 2880 2881static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2882{ 2883 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2884 2885 fc_pushlevel(ctx, FC_LOOP); 2886 2887 /* check stack depth */ 2888 callstack_check_depth(ctx, FC_LOOP, 0); 2889 return 0; 2890} 2891 2892static int tgsi_endloop(struct r600_shader_ctx *ctx) 2893{ 2894 int i; 2895 2896 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 2897 2898 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2899 R600_ERR("loop/endloop in shader code are not paired.\n"); 2900 return -EINVAL; 2901 } 2902 2903 /* fixup loop pointers - from r600isa 2904 LOOP END points to CF after LOOP START, 2905 LOOP START point to CF after LOOP END 2906 BRK/CONT point to LOOP END CF 2907 */ 2908 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2909 2910 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2911 2912 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2913 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2914 } 2915 /* TODO add LOOPRET support */ 2916 fc_poplevel(ctx); 2917 callstack_decrease_current(ctx, FC_LOOP); 2918 return 0; 2919} 2920 2921static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2922{ 2923 unsigned int fscp; 2924 2925 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2926 { 2927 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2928 break; 2929 } 2930 2931 if (fscp == 0) { 2932 R600_ERR("Break not inside loop/endloop pair\n"); 2933 return -EINVAL; 2934 } 2935 2936 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2937 ctx->bc->cf_last->pop_count = 1; 2938 2939 fc_set_mid(ctx, fscp); 2940 2941 pops(ctx, 1); 2942 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2943 return 0; 2944} 2945 2946static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 2947 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 2948 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2949 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2950 2951 /* FIXME: 2952 * For state trackers other than OpenGL, we'll want to use 2953 * _RECIP_IEEE instead. 2954 */ 2955 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 2956 2957 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 2958 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2959 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 2960 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2961 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2962 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2963 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2964 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2965 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2966 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2967 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2968 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2969 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2970 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2971 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2972 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2973 /* gap */ 2974 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2975 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2976 /* gap */ 2977 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2978 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2979 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2980 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2981 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2982 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2983 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2984 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2985 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2986 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2987 /* gap */ 2988 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2989 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2990 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2991 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2992 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2993 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2994 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2995 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2996 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2997 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2998 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2999 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3000 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3001 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3002 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3003 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3004 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3005 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3006 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3007 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3008 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3009 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3010 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3011 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3012 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3013 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3014 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3015 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3016 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3017 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3018 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3019 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3020 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3021 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3022 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3023 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3024 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3025 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3026 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3027 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3028 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3029 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3030 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3031 /* gap */ 3032 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3033 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3034 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3035 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3036 /* gap */ 3037 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3038 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3039 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3040 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3041 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3042 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3043 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3044 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3045 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3046 /* gap */ 3047 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3048 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3049 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3050 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3051 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3052 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3053 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3054 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3055 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3056 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3057 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3058 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3059 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3060 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3061 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3062 /* gap */ 3063 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3064 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3065 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3066 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3067 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3068 /* gap */ 3069 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3070 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3071 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3072 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3073 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3074 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3075 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3076 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3077 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3078 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3079 /* gap */ 3080 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3081 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3082 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3083 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3084 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3085 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3086 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3087 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3088 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3089 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3090 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3091 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3092 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3093 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3094 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3095 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3096 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3097 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3098 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3099 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3100 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3101 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3102 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3103 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3104 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3105 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3106 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3107 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3108}; 3109 3110static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 3111 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3112 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3113 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3114 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 3115 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 3116 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3117 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3118 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3119 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3120 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3121 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3122 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3123 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3124 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3125 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3126 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3127 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3128 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3129 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3130 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3131 /* gap */ 3132 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3133 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3134 /* gap */ 3135 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3136 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3137 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3138 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3139 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3140 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3141 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3142 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3143 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3144 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3145 /* gap */ 3146 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3147 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3148 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3149 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3150 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3151 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3152 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3153 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3154 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3155 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3156 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3157 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3158 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3159 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3160 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3161 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3162 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3163 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3164 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3165 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3166 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3167 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3168 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3169 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3170 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3171 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3172 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3173 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3174 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3175 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3176 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3177 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3178 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3179 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3180 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3181 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3182 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3183 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3184 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3185 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3186 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3187 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3188 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3189 /* gap */ 3190 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3191 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3192 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3193 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3194 /* gap */ 3195 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3196 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3197 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3198 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3199 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3200 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3201 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3202 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3203 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3204 /* gap */ 3205 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3206 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3207 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3208 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3209 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3210 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3211 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3212 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3213 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3214 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3215 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3216 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3217 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3218 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3219 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3220 /* gap */ 3221 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3222 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3223 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3224 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3225 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3226 /* gap */ 3227 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3228 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3229 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3230 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3231 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3232 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3233 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3234 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3235 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3236 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3237 /* gap */ 3238 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3239 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3240 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3241 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3242 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3243 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3244 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3245 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3246 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3247 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3248 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3249 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3250 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3251 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3252 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3253 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3254 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3255 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3256 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3257 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3258 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3259 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3260 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3261 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3262 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3263 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3264 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3265 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3266}; 3267