r600_shader.c revision 0ab7dcddb35560626c1aab4e8e6181dc4b4703a6
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_pipe.h" 29#include "r600_asm.h" 30#include "r600_sq.h" 31#include "r600_formats.h" 32#include "r600_opcodes.h" 33#include "r600d.h" 34#include <stdio.h> 35#include <errno.h> 36 37static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) 38{ 39 struct r600_pipe_state *rstate = &shader->rstate; 40 struct r600_shader *rshader = &shader->shader; 41 unsigned spi_vs_out_id[10]; 42 unsigned i, tmp; 43 44 /* clear previous register */ 45 rstate->nregs = 0; 46 47 /* so far never got proper semantic id from tgsi */ 48 /* FIXME better to move this in config things so they get emited 49 * only one time per cs 50 */ 51 for (i = 0; i < 10; i++) { 52 spi_vs_out_id[i] = 0; 53 } 54 for (i = 0; i < 32; i++) { 55 tmp = i << ((i & 3) * 8); 56 spi_vs_out_id[i / 4] |= tmp; 57 } 58 for (i = 0; i < 10; i++) { 59 r600_pipe_state_add_reg(rstate, 60 R_028614_SPI_VS_OUT_ID_0 + i * 4, 61 spi_vs_out_id[i], 0xFFFFFFFF, NULL); 62 } 63 64 r600_pipe_state_add_reg(rstate, 65 R_0286C4_SPI_VS_OUT_CONFIG, 66 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), 67 0xFFFFFFFF, NULL); 68 r600_pipe_state_add_reg(rstate, 69 R_028868_SQ_PGM_RESOURCES_VS, 70 S_028868_NUM_GPRS(rshader->bc.ngpr) | 71 S_028868_STACK_SIZE(rshader->bc.nstack), 72 0xFFFFFFFF, NULL); 73 r600_pipe_state_add_reg(rstate, 74 R_0288D0_SQ_PGM_CF_OFFSET_VS, 75 0x00000000, 0xFFFFFFFF, NULL); 76 r600_pipe_state_add_reg(rstate, 77 R_028858_SQ_PGM_START_VS, 78 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 79 80 r600_pipe_state_add_reg(rstate, 81 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, 82 0xFFFFFFFF, NULL); 83 84} 85 86int r600_find_vs_semantic_index(struct r600_shader *vs, 87 struct r600_shader *ps, int id) 88{ 89 struct r600_shader_io *input = &ps->input[id]; 90 91 for (int i = 0; i < vs->noutput; i++) { 92 if (input->name == vs->output[i].name && 93 input->sid == vs->output[i].sid) { 94 return i - 1; 95 } 96 } 97 return 0; 98} 99 100static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) 101{ 102 struct r600_pipe_state *rstate = &shader->rstate; 103 struct r600_shader *rshader = &shader->shader; 104 unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; 105 int pos_index = -1, face_index = -1; 106 107 rstate->nregs = 0; 108 109 for (i = 0; i < rshader->ninput; i++) { 110 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 111 pos_index = i; 112 if (rshader->input[i].name == TGSI_SEMANTIC_FACE) 113 face_index = i; 114 } 115 116 for (i = 0; i < rshader->noutput; i++) { 117 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 118 r600_pipe_state_add_reg(rstate, 119 R_02880C_DB_SHADER_CONTROL, 120 S_02880C_Z_EXPORT_ENABLE(1), 121 S_02880C_Z_EXPORT_ENABLE(1), NULL); 122 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 123 r600_pipe_state_add_reg(rstate, 124 R_02880C_DB_SHADER_CONTROL, 125 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), 126 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); 127 } 128 129 exports_ps = 0; 130 num_cout = 0; 131 for (i = 0; i < rshader->noutput; i++) { 132 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 133 exports_ps |= 1; 134 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { 135 num_cout++; 136 } 137 } 138 exports_ps |= S_028854_EXPORT_COLORS(num_cout); 139 if (!exports_ps) { 140 /* always at least export 1 component per pixel */ 141 exports_ps = 2; 142 } 143 144 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | 145 S_0286CC_PERSP_GRADIENT_ENA(1); 146 spi_input_z = 0; 147 if (pos_index != -1) { 148 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | 149 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | 150 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | 151 S_0286CC_BARYC_SAMPLE_CNTL(1)); 152 spi_input_z |= 1; 153 } 154 155 spi_ps_in_control_1 = 0; 156 if (face_index != -1) { 157 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | 158 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); 159 } 160 161 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); 162 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); 163 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); 164 r600_pipe_state_add_reg(rstate, 165 R_028840_SQ_PGM_START_PS, 166 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 167 r600_pipe_state_add_reg(rstate, 168 R_028850_SQ_PGM_RESOURCES_PS, 169 S_028868_NUM_GPRS(rshader->bc.ngpr) | 170 S_028868_STACK_SIZE(rshader->bc.nstack), 171 0xFFFFFFFF, NULL); 172 r600_pipe_state_add_reg(rstate, 173 R_028854_SQ_PGM_EXPORTS_PS, 174 exports_ps, 0xFFFFFFFF, NULL); 175 r600_pipe_state_add_reg(rstate, 176 R_0288CC_SQ_PGM_CF_OFFSET_PS, 177 0x00000000, 0xFFFFFFFF, NULL); 178 179 if (rshader->fs_write_all) { 180 r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, 181 S_028808_MULTIWRITE_ENABLE(1), 182 S_028808_MULTIWRITE_ENABLE(1), 183 NULL); 184 } 185 186 if (rshader->uses_kill) { 187 /* only set some bits here, the other bits are set in the dsa state */ 188 r600_pipe_state_add_reg(rstate, 189 R_02880C_DB_SHADER_CONTROL, 190 S_02880C_KILL_ENABLE(1), 191 S_02880C_KILL_ENABLE(1), NULL); 192 } 193 r600_pipe_state_add_reg(rstate, 194 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, 195 0xFFFFFFFF, NULL); 196} 197 198static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 199{ 200 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 201 struct r600_shader *rshader = &shader->shader; 202 void *ptr; 203 204 /* copy new shader */ 205 if (shader->bo == NULL) { 206 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0); 207 if (shader->bo == NULL) { 208 return -ENOMEM; 209 } 210 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); 211 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); 212 r600_bo_unmap(rctx->radeon, shader->bo); 213 } 214 /* build state */ 215 switch (rshader->processor_type) { 216 case TGSI_PROCESSOR_VERTEX: 217 if (rshader->family >= CHIP_CEDAR) { 218 evergreen_pipe_shader_vs(ctx, shader); 219 } else { 220 r600_pipe_shader_vs(ctx, shader); 221 } 222 break; 223 case TGSI_PROCESSOR_FRAGMENT: 224 if (rshader->family >= CHIP_CEDAR) { 225 evergreen_pipe_shader_ps(ctx, shader); 226 } else { 227 r600_pipe_shader_ps(ctx, shader); 228 } 229 break; 230 default: 231 return -EINVAL; 232 } 233 return 0; 234} 235 236static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 237 238int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) 239{ 240 static int dump_shaders = -1; 241 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 242 int r; 243 244 /* Would like some magic "get_bool_option_once" routine. 245 */ 246 if (dump_shaders == -1) 247 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 248 249 if (dump_shaders) { 250 fprintf(stderr, "--------------------------------------------------------------\n"); 251 tgsi_dump(tokens, 0); 252 } 253 shader->shader.family = r600_get_family(rctx->radeon); 254 r = r600_shader_from_tgsi(tokens, &shader->shader); 255 if (r) { 256 R600_ERR("translation from TGSI failed !\n"); 257 return r; 258 } 259 r = r600_bc_build(&shader->shader.bc); 260 if (r) { 261 R600_ERR("building bytecode failed !\n"); 262 return r; 263 } 264 if (dump_shaders) { 265 r600_bc_dump(&shader->shader.bc); 266 fprintf(stderr, "______________________________________________________________\n"); 267 } 268 return r600_pipe_shader(ctx, shader); 269} 270 271void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 272{ 273 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 274 275 r600_bo_reference(rctx->radeon, &shader->bo, NULL); 276 r600_bc_clear(&shader->shader.bc); 277} 278 279/* 280 * tgsi -> r600 shader 281 */ 282struct r600_shader_tgsi_instruction; 283 284struct r600_shader_src { 285 unsigned sel; 286 unsigned swizzle[4]; 287 unsigned neg; 288 unsigned abs; 289 unsigned rel; 290 uint32_t value[4]; 291}; 292 293struct r600_shader_ctx { 294 struct tgsi_shader_info info; 295 struct tgsi_parse_context parse; 296 const struct tgsi_token *tokens; 297 unsigned type; 298 unsigned file_offset[TGSI_FILE_COUNT]; 299 unsigned temp_reg; 300 unsigned ar_reg; 301 struct r600_shader_tgsi_instruction *inst_info; 302 struct r600_bc *bc; 303 struct r600_shader *shader; 304 struct r600_shader_src src[3]; 305 u32 *literals; 306 u32 nliterals; 307 u32 max_driver_temp_used; 308 /* needed for evergreen interpolation */ 309 boolean input_centroid; 310 boolean input_linear; 311 boolean input_perspective; 312 int num_interp_gpr; 313}; 314 315struct r600_shader_tgsi_instruction { 316 unsigned tgsi_opcode; 317 unsigned is_op3; 318 unsigned r600_opcode; 319 int (*process)(struct r600_shader_ctx *ctx); 320}; 321 322static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 323static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 324 325static int tgsi_is_supported(struct r600_shader_ctx *ctx) 326{ 327 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 328 int j; 329 330 if (i->Instruction.NumDstRegs > 1) { 331 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 332 return -EINVAL; 333 } 334 if (i->Instruction.Predicate) { 335 R600_ERR("predicate unsupported\n"); 336 return -EINVAL; 337 } 338#if 0 339 if (i->Instruction.Label) { 340 R600_ERR("label unsupported\n"); 341 return -EINVAL; 342 } 343#endif 344 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 345 if (i->Src[j].Register.Dimension) { 346 R600_ERR("unsupported src %d (dimension %d)\n", j, 347 i->Src[j].Register.Dimension); 348 return -EINVAL; 349 } 350 } 351 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 352 if (i->Dst[j].Register.Dimension) { 353 R600_ERR("unsupported dst (dimension)\n"); 354 return -EINVAL; 355 } 356 } 357 return 0; 358} 359 360static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 361{ 362 int i, r; 363 struct r600_bc_alu alu; 364 int gpr = 0, base_chan = 0; 365 int ij_index = 0; 366 367 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 368 ij_index = 0; 369 if (ctx->shader->input[input].centroid) 370 ij_index++; 371 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 372 ij_index = 0; 373 /* if we have perspective add one */ 374 if (ctx->input_perspective) { 375 ij_index++; 376 /* if we have perspective centroid */ 377 if (ctx->input_centroid) 378 ij_index++; 379 } 380 if (ctx->shader->input[input].centroid) 381 ij_index++; 382 } 383 384 /* work out gpr and base_chan from index */ 385 gpr = ij_index / 2; 386 base_chan = (2 * (ij_index % 2)) + 1; 387 388 for (i = 0; i < 8; i++) { 389 memset(&alu, 0, sizeof(struct r600_bc_alu)); 390 391 if (i < 4) 392 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 393 else 394 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 395 396 if ((i > 1) && (i < 6)) { 397 alu.dst.sel = ctx->shader->input[input].gpr; 398 alu.dst.write = 1; 399 } 400 401 alu.dst.chan = i % 4; 402 403 alu.src[0].sel = gpr; 404 alu.src[0].chan = (base_chan - (i % 2)); 405 406 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 407 408 alu.bank_swizzle_force = SQ_ALU_VEC_210; 409 if ((i % 4) == 3) 410 alu.last = 1; 411 r = r600_bc_add_alu(ctx->bc, &alu); 412 if (r) 413 return r; 414 } 415 return 0; 416} 417 418 419static int tgsi_declaration(struct r600_shader_ctx *ctx) 420{ 421 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 422 unsigned i; 423 424 switch (d->Declaration.File) { 425 case TGSI_FILE_INPUT: 426 i = ctx->shader->ninput++; 427 ctx->shader->input[i].name = d->Semantic.Name; 428 ctx->shader->input[i].sid = d->Semantic.Index; 429 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 430 ctx->shader->input[i].centroid = d->Declaration.Centroid; 431 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 432 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) { 433 /* turn input into interpolate on EG */ 434 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { 435 if (ctx->shader->input[i].interpolate > 0) { 436 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 437 evergreen_interp_alu(ctx, i); 438 } 439 } 440 } 441 break; 442 case TGSI_FILE_OUTPUT: 443 i = ctx->shader->noutput++; 444 ctx->shader->output[i].name = d->Semantic.Name; 445 ctx->shader->output[i].sid = d->Semantic.Index; 446 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 447 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 448 break; 449 case TGSI_FILE_CONSTANT: 450 case TGSI_FILE_TEMPORARY: 451 case TGSI_FILE_SAMPLER: 452 case TGSI_FILE_ADDRESS: 453 break; 454 default: 455 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 456 return -EINVAL; 457 } 458 return 0; 459} 460 461static int r600_get_temp(struct r600_shader_ctx *ctx) 462{ 463 return ctx->temp_reg + ctx->max_driver_temp_used++; 464} 465 466/* 467 * for evergreen we need to scan the shader to find the number of GPRs we need to 468 * reserve for interpolation. 469 * 470 * we need to know if we are going to emit 471 * any centroid inputs 472 * if perspective and linear are required 473*/ 474static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 475{ 476 int i; 477 int num_baryc; 478 479 ctx->input_linear = FALSE; 480 ctx->input_perspective = FALSE; 481 ctx->input_centroid = FALSE; 482 ctx->num_interp_gpr = 1; 483 484 /* any centroid inputs */ 485 for (i = 0; i < ctx->info.num_inputs; i++) { 486 /* skip position/face */ 487 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 488 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 489 continue; 490 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 491 ctx->input_linear = TRUE; 492 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 493 ctx->input_perspective = TRUE; 494 if (ctx->info.input_centroid[i]) 495 ctx->input_centroid = TRUE; 496 } 497 498 num_baryc = 0; 499 /* ignoring sample for now */ 500 if (ctx->input_perspective) 501 num_baryc++; 502 if (ctx->input_linear) 503 num_baryc++; 504 if (ctx->input_centroid) 505 num_baryc *= 2; 506 507 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 508 509 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 510 return ctx->num_interp_gpr; 511} 512 513static void tgsi_src(struct r600_shader_ctx *ctx, 514 const struct tgsi_full_src_register *tgsi_src, 515 struct r600_shader_src *r600_src) 516{ 517 memset(r600_src, 0, sizeof(*r600_src)); 518 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 519 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 520 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 521 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 522 r600_src->neg = tgsi_src->Register.Negate; 523 r600_src->abs = tgsi_src->Register.Absolute; 524 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 525 int index; 526 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 527 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 528 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 529 530 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 531 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 532 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 533 return; 534 } 535 index = tgsi_src->Register.Index; 536 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 537 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 538 } else { 539 if (tgsi_src->Register.Indirect) 540 r600_src->rel = V_SQ_REL_RELATIVE; 541 r600_src->sel = tgsi_src->Register.Index; 542 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 543 } 544} 545 546static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) 547{ 548 struct r600_bc_vtx vtx; 549 unsigned int ar_reg; 550 int r; 551 552 if (offset) { 553 struct r600_bc_alu alu; 554 555 memset(&alu, 0, sizeof(alu)); 556 557 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 558 alu.src[0].sel = ctx->ar_reg; 559 560 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 561 alu.src[1].value = offset; 562 563 alu.dst.sel = dst_reg; 564 alu.dst.write = 1; 565 alu.last = 1; 566 567 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 568 return r; 569 570 ar_reg = dst_reg; 571 } else { 572 ar_reg = ctx->ar_reg; 573 } 574 575 memset(&vtx, 0, sizeof(vtx)); 576 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 577 vtx.src_gpr = ar_reg; 578 vtx.mega_fetch_count = 16; 579 vtx.dst_gpr = dst_reg; 580 vtx.dst_sel_x = 0; /* SEL_X */ 581 vtx.dst_sel_y = 1; /* SEL_Y */ 582 vtx.dst_sel_z = 2; /* SEL_Z */ 583 vtx.dst_sel_w = 3; /* SEL_W */ 584 vtx.data_format = FMT_32_32_32_32_FLOAT; 585 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 586 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 587 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 588 589 if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) 590 return r; 591 592 return 0; 593} 594 595static int tgsi_split_constant(struct r600_shader_ctx *ctx) 596{ 597 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 598 struct r600_bc_alu alu; 599 int i, j, k, nconst, r; 600 601 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 602 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 603 nconst++; 604 } 605 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 606 } 607 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 608 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 609 continue; 610 } 611 612 if (ctx->src[i].rel) { 613 int treg = r600_get_temp(ctx); 614 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) 615 return r; 616 617 ctx->src[i].sel = treg; 618 ctx->src[i].rel = 0; 619 j--; 620 } else if (j > 0) { 621 int treg = r600_get_temp(ctx); 622 for (k = 0; k < 4; k++) { 623 memset(&alu, 0, sizeof(struct r600_bc_alu)); 624 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 625 alu.src[0].sel = ctx->src[i].sel; 626 alu.src[0].chan = k; 627 alu.src[0].rel = ctx->src[i].rel; 628 alu.dst.sel = treg; 629 alu.dst.chan = k; 630 alu.dst.write = 1; 631 if (k == 3) 632 alu.last = 1; 633 r = r600_bc_add_alu(ctx->bc, &alu); 634 if (r) 635 return r; 636 } 637 ctx->src[i].sel = treg; 638 ctx->src[i].rel =0; 639 j--; 640 } 641 } 642 return 0; 643} 644 645/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 646static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 647{ 648 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 649 struct r600_bc_alu alu; 650 int i, j, k, nliteral, r; 651 652 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 653 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 654 nliteral++; 655 } 656 } 657 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 658 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 659 int treg = r600_get_temp(ctx); 660 for (k = 0; k < 4; k++) { 661 memset(&alu, 0, sizeof(struct r600_bc_alu)); 662 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 663 alu.src[0].sel = ctx->src[i].sel; 664 alu.src[0].chan = k; 665 alu.src[0].value = ctx->src[i].value[k]; 666 alu.dst.sel = treg; 667 alu.dst.chan = k; 668 alu.dst.write = 1; 669 if (k == 3) 670 alu.last = 1; 671 r = r600_bc_add_alu(ctx->bc, &alu); 672 if (r) 673 return r; 674 } 675 ctx->src[i].sel = treg; 676 j--; 677 } 678 } 679 return 0; 680} 681 682static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 683{ 684 struct tgsi_full_immediate *immediate; 685 struct tgsi_full_property *property; 686 struct r600_shader_ctx ctx; 687 struct r600_bc_output output[32]; 688 unsigned output_done, noutput; 689 unsigned opcode; 690 int i, r = 0, pos0; 691 692 ctx.bc = &shader->bc; 693 ctx.shader = shader; 694 r = r600_bc_init(ctx.bc, shader->family); 695 if (r) 696 return r; 697 ctx.tokens = tokens; 698 tgsi_scan_shader(tokens, &ctx.info); 699 tgsi_parse_init(&ctx.parse, tokens); 700 ctx.type = ctx.parse.FullHeader.Processor.Processor; 701 shader->processor_type = ctx.type; 702 ctx.bc->type = shader->processor_type; 703 704 /* register allocations */ 705 /* Values [0,127] correspond to GPR[0..127]. 706 * Values [128,159] correspond to constant buffer bank 0 707 * Values [160,191] correspond to constant buffer bank 1 708 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 709 * Values [256,287] correspond to constant buffer bank 2 (EG) 710 * Values [288,319] correspond to constant buffer bank 3 (EG) 711 * Other special values are shown in the list below. 712 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 713 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 714 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 715 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 716 * 248 SQ_ALU_SRC_0: special constant 0.0. 717 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 718 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 719 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 720 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 721 * 253 SQ_ALU_SRC_LITERAL: literal constant. 722 * 254 SQ_ALU_SRC_PV: previous vector result. 723 * 255 SQ_ALU_SRC_PS: previous scalar result. 724 */ 725 for (i = 0; i < TGSI_FILE_COUNT; i++) { 726 ctx.file_offset[i] = 0; 727 } 728 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 729 ctx.file_offset[TGSI_FILE_INPUT] = 1; 730 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { 731 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 732 } else { 733 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 734 } 735 } 736 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) { 737 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 738 } 739 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 740 ctx.info.file_count[TGSI_FILE_INPUT]; 741 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 742 ctx.info.file_count[TGSI_FILE_OUTPUT]; 743 744 /* Outside the GPR range. This will be translated to one of the 745 * kcache banks later. */ 746 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 747 748 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 749 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 750 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 751 ctx.temp_reg = ctx.ar_reg + 1; 752 753 ctx.nliterals = 0; 754 ctx.literals = NULL; 755 shader->fs_write_all = FALSE; 756 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 757 tgsi_parse_token(&ctx.parse); 758 switch (ctx.parse.FullToken.Token.Type) { 759 case TGSI_TOKEN_TYPE_IMMEDIATE: 760 immediate = &ctx.parse.FullToken.FullImmediate; 761 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 762 if(ctx.literals == NULL) { 763 r = -ENOMEM; 764 goto out_err; 765 } 766 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 767 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 768 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 769 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 770 ctx.nliterals++; 771 break; 772 case TGSI_TOKEN_TYPE_DECLARATION: 773 r = tgsi_declaration(&ctx); 774 if (r) 775 goto out_err; 776 break; 777 case TGSI_TOKEN_TYPE_INSTRUCTION: 778 r = tgsi_is_supported(&ctx); 779 if (r) 780 goto out_err; 781 ctx.max_driver_temp_used = 0; 782 /* reserve first tmp for everyone */ 783 r600_get_temp(&ctx); 784 785 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 786 if ((r = tgsi_split_constant(&ctx))) 787 goto out_err; 788 if ((r = tgsi_split_literal_constant(&ctx))) 789 goto out_err; 790 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) 791 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 792 else 793 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 794 r = ctx.inst_info->process(&ctx); 795 if (r) 796 goto out_err; 797 break; 798 case TGSI_TOKEN_TYPE_PROPERTY: 799 property = &ctx.parse.FullToken.FullProperty; 800 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { 801 if (property->u[0].Data == 1) 802 shader->fs_write_all = TRUE; 803 } 804 break; 805 default: 806 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 807 r = -EINVAL; 808 goto out_err; 809 } 810 } 811 /* export output */ 812 noutput = shader->noutput; 813 for (i = 0, pos0 = 0; i < noutput; i++) { 814 memset(&output[i], 0, sizeof(struct r600_bc_output)); 815 output[i].gpr = shader->output[i].gpr; 816 output[i].elem_size = 3; 817 output[i].swizzle_x = 0; 818 output[i].swizzle_y = 1; 819 output[i].swizzle_z = 2; 820 output[i].swizzle_w = 3; 821 output[i].burst_count = 1; 822 output[i].barrier = 1; 823 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 824 output[i].array_base = i - pos0; 825 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 826 switch (ctx.type) { 827 case TGSI_PROCESSOR_VERTEX: 828 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 829 output[i].array_base = 60; 830 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 831 /* position doesn't count in array_base */ 832 pos0++; 833 } 834 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 835 output[i].array_base = 61; 836 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 837 /* position doesn't count in array_base */ 838 pos0++; 839 } 840 break; 841 case TGSI_PROCESSOR_FRAGMENT: 842 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 843 output[i].array_base = shader->output[i].sid; 844 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 845 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 846 output[i].array_base = 61; 847 output[i].swizzle_x = 2; 848 output[i].swizzle_y = 7; 849 output[i].swizzle_z = output[i].swizzle_w = 7; 850 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 851 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 852 output[i].array_base = 61; 853 output[i].swizzle_x = 7; 854 output[i].swizzle_y = 1; 855 output[i].swizzle_z = output[i].swizzle_w = 7; 856 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 857 } else { 858 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 859 r = -EINVAL; 860 goto out_err; 861 } 862 break; 863 default: 864 R600_ERR("unsupported processor type %d\n", ctx.type); 865 r = -EINVAL; 866 goto out_err; 867 } 868 } 869 /* add fake param output for vertex shader if no param is exported */ 870 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 871 for (i = 0, pos0 = 0; i < noutput; i++) { 872 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 873 pos0 = 1; 874 break; 875 } 876 } 877 if (!pos0) { 878 memset(&output[i], 0, sizeof(struct r600_bc_output)); 879 output[i].gpr = 0; 880 output[i].elem_size = 3; 881 output[i].swizzle_x = 0; 882 output[i].swizzle_y = 1; 883 output[i].swizzle_z = 2; 884 output[i].swizzle_w = 3; 885 output[i].burst_count = 1; 886 output[i].barrier = 1; 887 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 888 output[i].array_base = 0; 889 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 890 noutput++; 891 } 892 } 893 /* add fake pixel export */ 894 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 895 memset(&output[0], 0, sizeof(struct r600_bc_output)); 896 output[0].gpr = 0; 897 output[0].elem_size = 3; 898 output[0].swizzle_x = 7; 899 output[0].swizzle_y = 7; 900 output[0].swizzle_z = 7; 901 output[0].swizzle_w = 7; 902 output[0].burst_count = 1; 903 output[0].barrier = 1; 904 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 905 output[0].array_base = 0; 906 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 907 noutput++; 908 } 909 /* set export done on last export of each type */ 910 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 911 if (i == (noutput - 1)) { 912 output[i].end_of_program = 1; 913 } 914 if (!(output_done & (1 << output[i].type))) { 915 output_done |= (1 << output[i].type); 916 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 917 } 918 } 919 /* add output to bytecode */ 920 for (i = 0; i < noutput; i++) { 921 r = r600_bc_add_output(ctx.bc, &output[i]); 922 if (r) 923 goto out_err; 924 } 925 free(ctx.literals); 926 tgsi_parse_free(&ctx.parse); 927 return 0; 928out_err: 929 free(ctx.literals); 930 tgsi_parse_free(&ctx.parse); 931 return r; 932} 933 934static int tgsi_unsupported(struct r600_shader_ctx *ctx) 935{ 936 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 937 return -EINVAL; 938} 939 940static int tgsi_end(struct r600_shader_ctx *ctx) 941{ 942 return 0; 943} 944 945static void r600_bc_src(struct r600_bc_alu_src *bc_src, 946 const struct r600_shader_src *shader_src, 947 unsigned chan) 948{ 949 bc_src->sel = shader_src->sel; 950 bc_src->chan = shader_src->swizzle[chan]; 951 bc_src->neg = shader_src->neg; 952 bc_src->abs = shader_src->abs; 953 bc_src->rel = shader_src->rel; 954 bc_src->value = shader_src->value[bc_src->chan]; 955} 956 957static void tgsi_dst(struct r600_shader_ctx *ctx, 958 const struct tgsi_full_dst_register *tgsi_dst, 959 unsigned swizzle, 960 struct r600_bc_alu_dst *r600_dst) 961{ 962 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 963 964 r600_dst->sel = tgsi_dst->Register.Index; 965 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 966 r600_dst->chan = swizzle; 967 r600_dst->write = 1; 968 if (tgsi_dst->Register.Indirect) 969 r600_dst->rel = V_SQ_REL_RELATIVE; 970 if (inst->Instruction.Saturate) { 971 r600_dst->clamp = 1; 972 } 973} 974 975static int tgsi_last_instruction(unsigned writemask) 976{ 977 int i, lasti = 0; 978 979 for (i = 0; i < 4; i++) { 980 if (writemask & (1 << i)) { 981 lasti = i; 982 } 983 } 984 return lasti; 985} 986 987static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 988{ 989 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 990 struct r600_bc_alu alu; 991 int i, j, r; 992 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 993 994 for (i = 0; i < lasti + 1; i++) { 995 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 996 continue; 997 998 memset(&alu, 0, sizeof(struct r600_bc_alu)); 999 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1000 1001 alu.inst = ctx->inst_info->r600_opcode; 1002 if (!swap) { 1003 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1004 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1005 } 1006 } else { 1007 r600_bc_src(&alu.src[0], &ctx->src[1], i); 1008 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1009 } 1010 /* handle some special cases */ 1011 switch (ctx->inst_info->tgsi_opcode) { 1012 case TGSI_OPCODE_SUB: 1013 alu.src[1].neg = 1; 1014 break; 1015 case TGSI_OPCODE_ABS: 1016 alu.src[0].abs = 1; 1017 /* negation is performed after absolute value is taken */ 1018 alu.src[0].neg = 0; 1019 break; 1020 default: 1021 break; 1022 } 1023 if (i == lasti) { 1024 alu.last = 1; 1025 } 1026 r = r600_bc_add_alu(ctx->bc, &alu); 1027 if (r) 1028 return r; 1029 } 1030 return 0; 1031} 1032 1033static int tgsi_op2(struct r600_shader_ctx *ctx) 1034{ 1035 return tgsi_op2_s(ctx, 0); 1036} 1037 1038static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1039{ 1040 return tgsi_op2_s(ctx, 1); 1041} 1042 1043/* 1044 * r600 - trunc to -PI..PI range 1045 * r700 - normalize by dividing by 2PI 1046 * see fdo bug 27901 1047 */ 1048static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 1049{ 1050 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 1051 static float double_pi = 3.1415926535 * 2; 1052 static float neg_pi = -3.1415926535; 1053 1054 int r; 1055 struct r600_bc_alu alu; 1056 1057 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1058 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1059 alu.is_op3 = 1; 1060 1061 alu.dst.chan = 0; 1062 alu.dst.sel = ctx->temp_reg; 1063 alu.dst.write = 1; 1064 1065 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1066 1067 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1068 alu.src[1].chan = 0; 1069 alu.src[1].value = *(uint32_t *)&half_inv_pi; 1070 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1071 alu.src[2].chan = 0; 1072 alu.last = 1; 1073 r = r600_bc_add_alu(ctx->bc, &alu); 1074 if (r) 1075 return r; 1076 1077 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1078 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1079 1080 alu.dst.chan = 0; 1081 alu.dst.sel = ctx->temp_reg; 1082 alu.dst.write = 1; 1083 1084 alu.src[0].sel = ctx->temp_reg; 1085 alu.src[0].chan = 0; 1086 alu.last = 1; 1087 r = r600_bc_add_alu(ctx->bc, &alu); 1088 if (r) 1089 return r; 1090 1091 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1092 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1093 alu.is_op3 = 1; 1094 1095 alu.dst.chan = 0; 1096 alu.dst.sel = ctx->temp_reg; 1097 alu.dst.write = 1; 1098 1099 alu.src[0].sel = ctx->temp_reg; 1100 alu.src[0].chan = 0; 1101 1102 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1103 alu.src[1].chan = 0; 1104 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1105 alu.src[2].chan = 0; 1106 1107 if (ctx->bc->chiprev == CHIPREV_R600) { 1108 alu.src[1].value = *(uint32_t *)&double_pi; 1109 alu.src[2].value = *(uint32_t *)&neg_pi; 1110 } else { 1111 alu.src[1].sel = V_SQ_ALU_SRC_1; 1112 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1113 alu.src[2].neg = 1; 1114 } 1115 1116 alu.last = 1; 1117 r = r600_bc_add_alu(ctx->bc, &alu); 1118 if (r) 1119 return r; 1120 return 0; 1121} 1122 1123static int tgsi_trig(struct r600_shader_ctx *ctx) 1124{ 1125 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1126 struct r600_bc_alu alu; 1127 int i, r; 1128 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1129 1130 r = tgsi_setup_trig(ctx); 1131 if (r) 1132 return r; 1133 1134 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1135 alu.inst = ctx->inst_info->r600_opcode; 1136 alu.dst.chan = 0; 1137 alu.dst.sel = ctx->temp_reg; 1138 alu.dst.write = 1; 1139 1140 alu.src[0].sel = ctx->temp_reg; 1141 alu.src[0].chan = 0; 1142 alu.last = 1; 1143 r = r600_bc_add_alu(ctx->bc, &alu); 1144 if (r) 1145 return r; 1146 1147 /* replicate result */ 1148 for (i = 0; i < lasti + 1; i++) { 1149 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1150 continue; 1151 1152 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1153 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1154 1155 alu.src[0].sel = ctx->temp_reg; 1156 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1157 if (i == lasti) 1158 alu.last = 1; 1159 r = r600_bc_add_alu(ctx->bc, &alu); 1160 if (r) 1161 return r; 1162 } 1163 return 0; 1164} 1165 1166static int tgsi_scs(struct r600_shader_ctx *ctx) 1167{ 1168 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1169 struct r600_bc_alu alu; 1170 int r; 1171 1172 /* We'll only need the trig stuff if we are going to write to the 1173 * X or Y components of the destination vector. 1174 */ 1175 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1176 r = tgsi_setup_trig(ctx); 1177 if (r) 1178 return r; 1179 } 1180 1181 /* dst.x = COS */ 1182 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1183 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1184 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1185 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1186 1187 alu.src[0].sel = ctx->temp_reg; 1188 alu.src[0].chan = 0; 1189 alu.last = 1; 1190 r = r600_bc_add_alu(ctx->bc, &alu); 1191 if (r) 1192 return r; 1193 } 1194 1195 /* dst.y = SIN */ 1196 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1197 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1198 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1199 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1200 1201 alu.src[0].sel = ctx->temp_reg; 1202 alu.src[0].chan = 0; 1203 alu.last = 1; 1204 r = r600_bc_add_alu(ctx->bc, &alu); 1205 if (r) 1206 return r; 1207 } 1208 1209 /* dst.z = 0.0; */ 1210 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1211 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1212 1213 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1214 1215 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1216 1217 alu.src[0].sel = V_SQ_ALU_SRC_0; 1218 alu.src[0].chan = 0; 1219 1220 alu.last = 1; 1221 1222 r = r600_bc_add_alu(ctx->bc, &alu); 1223 if (r) 1224 return r; 1225 } 1226 1227 /* dst.w = 1.0; */ 1228 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1229 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1230 1231 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1232 1233 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1234 1235 alu.src[0].sel = V_SQ_ALU_SRC_1; 1236 alu.src[0].chan = 0; 1237 1238 alu.last = 1; 1239 1240 r = r600_bc_add_alu(ctx->bc, &alu); 1241 if (r) 1242 return r; 1243 } 1244 1245 return 0; 1246} 1247 1248static int tgsi_kill(struct r600_shader_ctx *ctx) 1249{ 1250 struct r600_bc_alu alu; 1251 int i, r; 1252 1253 for (i = 0; i < 4; i++) { 1254 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1255 alu.inst = ctx->inst_info->r600_opcode; 1256 1257 alu.dst.chan = i; 1258 1259 alu.src[0].sel = V_SQ_ALU_SRC_0; 1260 1261 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1262 alu.src[1].sel = V_SQ_ALU_SRC_1; 1263 alu.src[1].neg = 1; 1264 } else { 1265 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1266 } 1267 if (i == 3) { 1268 alu.last = 1; 1269 } 1270 r = r600_bc_add_alu(ctx->bc, &alu); 1271 if (r) 1272 return r; 1273 } 1274 1275 /* kill must be last in ALU */ 1276 ctx->bc->force_add_cf = 1; 1277 ctx->shader->uses_kill = TRUE; 1278 return 0; 1279} 1280 1281static int tgsi_lit(struct r600_shader_ctx *ctx) 1282{ 1283 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1284 struct r600_bc_alu alu; 1285 int r; 1286 1287 /* dst.x, <- 1.0 */ 1288 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1289 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1290 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1291 alu.src[0].chan = 0; 1292 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1293 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1294 r = r600_bc_add_alu(ctx->bc, &alu); 1295 if (r) 1296 return r; 1297 1298 /* dst.y = max(src.x, 0.0) */ 1299 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1300 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1301 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1302 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1303 alu.src[1].chan = 0; 1304 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1305 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1306 r = r600_bc_add_alu(ctx->bc, &alu); 1307 if (r) 1308 return r; 1309 1310 /* dst.w, <- 1.0 */ 1311 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1312 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1313 alu.src[0].sel = V_SQ_ALU_SRC_1; 1314 alu.src[0].chan = 0; 1315 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1316 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1317 alu.last = 1; 1318 r = r600_bc_add_alu(ctx->bc, &alu); 1319 if (r) 1320 return r; 1321 1322 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1323 { 1324 int chan; 1325 int sel; 1326 1327 /* dst.z = log(src.y) */ 1328 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1329 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1330 r600_bc_src(&alu.src[0], &ctx->src[0], 1); 1331 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1332 alu.last = 1; 1333 r = r600_bc_add_alu(ctx->bc, &alu); 1334 if (r) 1335 return r; 1336 1337 chan = alu.dst.chan; 1338 sel = alu.dst.sel; 1339 1340 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1341 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1342 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1343 r600_bc_src(&alu.src[0], &ctx->src[0], 3); 1344 alu.src[1].sel = sel; 1345 alu.src[1].chan = chan; 1346 1347 r600_bc_src(&alu.src[2], &ctx->src[0], 0); 1348 alu.dst.sel = ctx->temp_reg; 1349 alu.dst.chan = 0; 1350 alu.dst.write = 1; 1351 alu.is_op3 = 1; 1352 alu.last = 1; 1353 r = r600_bc_add_alu(ctx->bc, &alu); 1354 if (r) 1355 return r; 1356 1357 /* dst.z = exp(tmp.x) */ 1358 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1359 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1360 alu.src[0].sel = ctx->temp_reg; 1361 alu.src[0].chan = 0; 1362 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1363 alu.last = 1; 1364 r = r600_bc_add_alu(ctx->bc, &alu); 1365 if (r) 1366 return r; 1367 } 1368 return 0; 1369} 1370 1371static int tgsi_rsq(struct r600_shader_ctx *ctx) 1372{ 1373 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1374 struct r600_bc_alu alu; 1375 int i, r; 1376 1377 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1378 1379 /* FIXME: 1380 * For state trackers other than OpenGL, we'll want to use 1381 * _RECIPSQRT_IEEE instead. 1382 */ 1383 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1384 1385 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1386 r600_bc_src(&alu.src[i], &ctx->src[i], 0); 1387 alu.src[i].abs = 1; 1388 } 1389 alu.dst.sel = ctx->temp_reg; 1390 alu.dst.write = 1; 1391 alu.last = 1; 1392 r = r600_bc_add_alu(ctx->bc, &alu); 1393 if (r) 1394 return r; 1395 /* replicate result */ 1396 return tgsi_helper_tempx_replicate(ctx); 1397} 1398 1399static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1400{ 1401 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1402 struct r600_bc_alu alu; 1403 int i, r; 1404 1405 for (i = 0; i < 4; i++) { 1406 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1407 alu.src[0].sel = ctx->temp_reg; 1408 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1409 alu.dst.chan = i; 1410 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1411 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1412 if (i == 3) 1413 alu.last = 1; 1414 r = r600_bc_add_alu(ctx->bc, &alu); 1415 if (r) 1416 return r; 1417 } 1418 return 0; 1419} 1420 1421static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1422{ 1423 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1424 struct r600_bc_alu alu; 1425 int i, r; 1426 1427 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1428 alu.inst = ctx->inst_info->r600_opcode; 1429 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1430 r600_bc_src(&alu.src[i], &ctx->src[i], 0); 1431 } 1432 alu.dst.sel = ctx->temp_reg; 1433 alu.dst.write = 1; 1434 alu.last = 1; 1435 r = r600_bc_add_alu(ctx->bc, &alu); 1436 if (r) 1437 return r; 1438 /* replicate result */ 1439 return tgsi_helper_tempx_replicate(ctx); 1440} 1441 1442static int tgsi_pow(struct r600_shader_ctx *ctx) 1443{ 1444 struct r600_bc_alu alu; 1445 int r; 1446 1447 /* LOG2(a) */ 1448 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1449 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1450 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1451 alu.dst.sel = ctx->temp_reg; 1452 alu.dst.write = 1; 1453 alu.last = 1; 1454 r = r600_bc_add_alu(ctx->bc, &alu); 1455 if (r) 1456 return r; 1457 /* b * LOG2(a) */ 1458 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1459 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1460 r600_bc_src(&alu.src[0], &ctx->src[1], 0); 1461 alu.src[1].sel = ctx->temp_reg; 1462 alu.dst.sel = ctx->temp_reg; 1463 alu.dst.write = 1; 1464 alu.last = 1; 1465 r = r600_bc_add_alu(ctx->bc, &alu); 1466 if (r) 1467 return r; 1468 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1469 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1470 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1471 alu.src[0].sel = ctx->temp_reg; 1472 alu.dst.sel = ctx->temp_reg; 1473 alu.dst.write = 1; 1474 alu.last = 1; 1475 r = r600_bc_add_alu(ctx->bc, &alu); 1476 if (r) 1477 return r; 1478 return tgsi_helper_tempx_replicate(ctx); 1479} 1480 1481static int tgsi_ssg(struct r600_shader_ctx *ctx) 1482{ 1483 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1484 struct r600_bc_alu alu; 1485 int i, r; 1486 1487 /* tmp = (src > 0 ? 1 : src) */ 1488 for (i = 0; i < 4; i++) { 1489 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1490 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1491 alu.is_op3 = 1; 1492 1493 alu.dst.sel = ctx->temp_reg; 1494 alu.dst.chan = i; 1495 1496 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1497 alu.src[1].sel = V_SQ_ALU_SRC_1; 1498 r600_bc_src(&alu.src[2], &ctx->src[0], i); 1499 1500 if (i == 3) 1501 alu.last = 1; 1502 r = r600_bc_add_alu(ctx->bc, &alu); 1503 if (r) 1504 return r; 1505 } 1506 1507 /* dst = (-tmp > 0 ? -1 : tmp) */ 1508 for (i = 0; i < 4; i++) { 1509 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1510 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1511 alu.is_op3 = 1; 1512 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1513 1514 alu.src[0].sel = ctx->temp_reg; 1515 alu.src[0].chan = i; 1516 alu.src[0].neg = 1; 1517 1518 alu.src[1].sel = V_SQ_ALU_SRC_1; 1519 alu.src[1].neg = 1; 1520 1521 alu.src[2].sel = ctx->temp_reg; 1522 alu.src[2].chan = i; 1523 1524 if (i == 3) 1525 alu.last = 1; 1526 r = r600_bc_add_alu(ctx->bc, &alu); 1527 if (r) 1528 return r; 1529 } 1530 return 0; 1531} 1532 1533static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1534{ 1535 struct r600_bc_alu alu; 1536 int i, r; 1537 1538 for (i = 0; i < 4; i++) { 1539 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1540 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1541 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1542 alu.dst.chan = i; 1543 } else { 1544 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1545 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1546 alu.src[0].sel = ctx->temp_reg; 1547 alu.src[0].chan = i; 1548 } 1549 if (i == 3) { 1550 alu.last = 1; 1551 } 1552 r = r600_bc_add_alu(ctx->bc, &alu); 1553 if (r) 1554 return r; 1555 } 1556 return 0; 1557} 1558 1559static int tgsi_op3(struct r600_shader_ctx *ctx) 1560{ 1561 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1562 struct r600_bc_alu alu; 1563 int i, j, r; 1564 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1565 1566 for (i = 0; i < lasti + 1; i++) { 1567 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1568 continue; 1569 1570 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1571 alu.inst = ctx->inst_info->r600_opcode; 1572 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1573 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1574 } 1575 1576 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1577 alu.dst.chan = i; 1578 alu.dst.write = 1; 1579 alu.is_op3 = 1; 1580 if (i == lasti) { 1581 alu.last = 1; 1582 } 1583 r = r600_bc_add_alu(ctx->bc, &alu); 1584 if (r) 1585 return r; 1586 } 1587 return 0; 1588} 1589 1590static int tgsi_dp(struct r600_shader_ctx *ctx) 1591{ 1592 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1593 struct r600_bc_alu alu; 1594 int i, j, r; 1595 1596 for (i = 0; i < 4; i++) { 1597 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1598 alu.inst = ctx->inst_info->r600_opcode; 1599 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1600 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1601 } 1602 1603 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1604 alu.dst.chan = i; 1605 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1606 /* handle some special cases */ 1607 switch (ctx->inst_info->tgsi_opcode) { 1608 case TGSI_OPCODE_DP2: 1609 if (i > 1) { 1610 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1611 alu.src[0].chan = alu.src[1].chan = 0; 1612 } 1613 break; 1614 case TGSI_OPCODE_DP3: 1615 if (i > 2) { 1616 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1617 alu.src[0].chan = alu.src[1].chan = 0; 1618 } 1619 break; 1620 case TGSI_OPCODE_DPH: 1621 if (i == 3) { 1622 alu.src[0].sel = V_SQ_ALU_SRC_1; 1623 alu.src[0].chan = 0; 1624 alu.src[0].neg = 0; 1625 } 1626 break; 1627 default: 1628 break; 1629 } 1630 if (i == 3) { 1631 alu.last = 1; 1632 } 1633 r = r600_bc_add_alu(ctx->bc, &alu); 1634 if (r) 1635 return r; 1636 } 1637 return 0; 1638} 1639 1640static int tgsi_tex(struct r600_shader_ctx *ctx) 1641{ 1642 static float one_point_five = 1.5f; 1643 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1644 struct r600_bc_tex tex; 1645 struct r600_bc_alu alu; 1646 unsigned src_gpr; 1647 int r, i; 1648 int opcode; 1649 boolean src_not_temp = 1650 inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && 1651 inst->Src[0].Register.File != TGSI_FILE_INPUT; 1652 1653 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1654 1655 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1656 /* Add perspective divide */ 1657 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1658 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1659 r600_bc_src(&alu.src[0], &ctx->src[0], 3); 1660 1661 alu.dst.sel = ctx->temp_reg; 1662 alu.dst.chan = 3; 1663 alu.last = 1; 1664 alu.dst.write = 1; 1665 r = r600_bc_add_alu(ctx->bc, &alu); 1666 if (r) 1667 return r; 1668 1669 for (i = 0; i < 3; i++) { 1670 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1671 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1672 alu.src[0].sel = ctx->temp_reg; 1673 alu.src[0].chan = 3; 1674 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1675 alu.dst.sel = ctx->temp_reg; 1676 alu.dst.chan = i; 1677 alu.dst.write = 1; 1678 r = r600_bc_add_alu(ctx->bc, &alu); 1679 if (r) 1680 return r; 1681 } 1682 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1683 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1684 alu.src[0].sel = V_SQ_ALU_SRC_1; 1685 alu.src[0].chan = 0; 1686 alu.dst.sel = ctx->temp_reg; 1687 alu.dst.chan = 3; 1688 alu.last = 1; 1689 alu.dst.write = 1; 1690 r = r600_bc_add_alu(ctx->bc, &alu); 1691 if (r) 1692 return r; 1693 src_not_temp = FALSE; 1694 src_gpr = ctx->temp_reg; 1695 } 1696 1697 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1698 int src_chan, src2_chan; 1699 1700 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1701 for (i = 0; i < 4; i++) { 1702 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1703 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1704 switch (i) { 1705 case 0: 1706 src_chan = 2; 1707 src2_chan = 1; 1708 break; 1709 case 1: 1710 src_chan = 2; 1711 src2_chan = 0; 1712 break; 1713 case 2: 1714 src_chan = 0; 1715 src2_chan = 2; 1716 break; 1717 case 3: 1718 src_chan = 1; 1719 src2_chan = 2; 1720 break; 1721 default: 1722 assert(0); 1723 src_chan = 0; 1724 src2_chan = 0; 1725 break; 1726 } 1727 r600_bc_src(&alu.src[0], &ctx->src[0], src_chan); 1728 r600_bc_src(&alu.src[1], &ctx->src[0], src2_chan); 1729 alu.dst.sel = ctx->temp_reg; 1730 alu.dst.chan = i; 1731 if (i == 3) 1732 alu.last = 1; 1733 alu.dst.write = 1; 1734 r = r600_bc_add_alu(ctx->bc, &alu); 1735 if (r) 1736 return r; 1737 } 1738 1739 /* tmp1.z = RCP_e(|tmp1.z|) */ 1740 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1741 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1742 alu.src[0].sel = ctx->temp_reg; 1743 alu.src[0].chan = 2; 1744 alu.src[0].abs = 1; 1745 alu.dst.sel = ctx->temp_reg; 1746 alu.dst.chan = 2; 1747 alu.dst.write = 1; 1748 alu.last = 1; 1749 r = r600_bc_add_alu(ctx->bc, &alu); 1750 if (r) 1751 return r; 1752 1753 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1754 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1755 * muladd has no writemask, have to use another temp 1756 */ 1757 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1758 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1759 alu.is_op3 = 1; 1760 1761 alu.src[0].sel = ctx->temp_reg; 1762 alu.src[0].chan = 0; 1763 alu.src[1].sel = ctx->temp_reg; 1764 alu.src[1].chan = 2; 1765 1766 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1767 alu.src[2].chan = 0; 1768 alu.src[2].value = *(uint32_t *)&one_point_five; 1769 1770 alu.dst.sel = ctx->temp_reg; 1771 alu.dst.chan = 0; 1772 alu.dst.write = 1; 1773 1774 r = r600_bc_add_alu(ctx->bc, &alu); 1775 if (r) 1776 return r; 1777 1778 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1779 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1780 alu.is_op3 = 1; 1781 1782 alu.src[0].sel = ctx->temp_reg; 1783 alu.src[0].chan = 1; 1784 alu.src[1].sel = ctx->temp_reg; 1785 alu.src[1].chan = 2; 1786 1787 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1788 alu.src[2].chan = 0; 1789 alu.src[2].value = *(uint32_t *)&one_point_five; 1790 1791 alu.dst.sel = ctx->temp_reg; 1792 alu.dst.chan = 1; 1793 alu.dst.write = 1; 1794 1795 alu.last = 1; 1796 r = r600_bc_add_alu(ctx->bc, &alu); 1797 if (r) 1798 return r; 1799 1800 src_not_temp = FALSE; 1801 src_gpr = ctx->temp_reg; 1802 } 1803 1804 if (src_not_temp) { 1805 for (i = 0; i < 4; i++) { 1806 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1807 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1808 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1809 alu.dst.sel = ctx->temp_reg; 1810 alu.dst.chan = i; 1811 if (i == 3) 1812 alu.last = 1; 1813 alu.dst.write = 1; 1814 r = r600_bc_add_alu(ctx->bc, &alu); 1815 if (r) 1816 return r; 1817 } 1818 src_gpr = ctx->temp_reg; 1819 } 1820 1821 opcode = ctx->inst_info->r600_opcode; 1822 if (opcode == SQ_TEX_INST_SAMPLE && 1823 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 1824 opcode = SQ_TEX_INST_SAMPLE_C; 1825 1826 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1827 tex.inst = opcode; 1828 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1829 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 1830 tex.src_gpr = src_gpr; 1831 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1832 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 1833 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 1834 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 1835 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 1836 tex.src_sel_x = 0; 1837 tex.src_sel_y = 1; 1838 tex.src_sel_z = 2; 1839 tex.src_sel_w = 3; 1840 1841 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1842 tex.src_sel_x = 1; 1843 tex.src_sel_y = 0; 1844 tex.src_sel_z = 3; 1845 tex.src_sel_w = 1; 1846 } 1847 1848 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1849 tex.coord_type_x = 1; 1850 tex.coord_type_y = 1; 1851 tex.coord_type_z = 1; 1852 tex.coord_type_w = 1; 1853 } 1854 1855 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { 1856 tex.coord_type_z = 0; 1857 tex.src_sel_z = 1; 1858 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) 1859 tex.coord_type_z = 0; 1860 1861 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 1862 tex.src_sel_w = 2; 1863 1864 r = r600_bc_add_tex(ctx->bc, &tex); 1865 if (r) 1866 return r; 1867 1868 /* add shadow ambient support - gallium doesn't do it yet */ 1869 return 0; 1870} 1871 1872static int tgsi_lrp(struct r600_shader_ctx *ctx) 1873{ 1874 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1875 struct r600_bc_alu alu; 1876 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1877 unsigned i; 1878 int r; 1879 1880 /* optimize if it's just an equal balance */ 1881 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 1882 for (i = 0; i < lasti + 1; i++) { 1883 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1884 continue; 1885 1886 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1887 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1888 r600_bc_src(&alu.src[0], &ctx->src[1], i); 1889 r600_bc_src(&alu.src[1], &ctx->src[2], i); 1890 alu.omod = 3; 1891 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1892 alu.dst.chan = i; 1893 if (i == lasti) { 1894 alu.last = 1; 1895 } 1896 r = r600_bc_add_alu(ctx->bc, &alu); 1897 if (r) 1898 return r; 1899 } 1900 return 0; 1901 } 1902 1903 /* 1 - src0 */ 1904 for (i = 0; i < lasti + 1; i++) { 1905 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1906 continue; 1907 1908 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1909 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1910 alu.src[0].sel = V_SQ_ALU_SRC_1; 1911 alu.src[0].chan = 0; 1912 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1913 alu.src[1].neg = 1; 1914 alu.dst.sel = ctx->temp_reg; 1915 alu.dst.chan = i; 1916 if (i == lasti) { 1917 alu.last = 1; 1918 } 1919 alu.dst.write = 1; 1920 r = r600_bc_add_alu(ctx->bc, &alu); 1921 if (r) 1922 return r; 1923 } 1924 1925 /* (1 - src0) * src2 */ 1926 for (i = 0; i < lasti + 1; i++) { 1927 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1928 continue; 1929 1930 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1931 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1932 alu.src[0].sel = ctx->temp_reg; 1933 alu.src[0].chan = i; 1934 r600_bc_src(&alu.src[1], &ctx->src[2], i); 1935 alu.dst.sel = ctx->temp_reg; 1936 alu.dst.chan = i; 1937 if (i == lasti) { 1938 alu.last = 1; 1939 } 1940 alu.dst.write = 1; 1941 r = r600_bc_add_alu(ctx->bc, &alu); 1942 if (r) 1943 return r; 1944 } 1945 1946 /* src0 * src1 + (1 - src0) * src2 */ 1947 for (i = 0; i < lasti + 1; i++) { 1948 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1949 continue; 1950 1951 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1952 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1953 alu.is_op3 = 1; 1954 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1955 r600_bc_src(&alu.src[1], &ctx->src[1], i); 1956 alu.src[2].sel = ctx->temp_reg; 1957 alu.src[2].chan = i; 1958 1959 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1960 alu.dst.chan = i; 1961 if (i == lasti) { 1962 alu.last = 1; 1963 } 1964 r = r600_bc_add_alu(ctx->bc, &alu); 1965 if (r) 1966 return r; 1967 } 1968 return 0; 1969} 1970 1971static int tgsi_cmp(struct r600_shader_ctx *ctx) 1972{ 1973 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1974 struct r600_bc_alu alu; 1975 int i, r; 1976 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1977 1978 for (i = 0; i < lasti + 1; i++) { 1979 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1980 continue; 1981 1982 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1983 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 1984 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1985 r600_bc_src(&alu.src[1], &ctx->src[2], i); 1986 r600_bc_src(&alu.src[2], &ctx->src[1], i); 1987 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1988 alu.dst.chan = i; 1989 alu.dst.write = 1; 1990 alu.is_op3 = 1; 1991 if (i == lasti) 1992 alu.last = 1; 1993 r = r600_bc_add_alu(ctx->bc, &alu); 1994 if (r) 1995 return r; 1996 } 1997 return 0; 1998} 1999 2000static int tgsi_xpd(struct r600_shader_ctx *ctx) 2001{ 2002 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2003 struct r600_bc_alu alu; 2004 uint32_t use_temp = 0; 2005 int i, r; 2006 2007 if (inst->Dst[0].Register.WriteMask != 0xf) 2008 use_temp = 1; 2009 2010 for (i = 0; i < 4; i++) { 2011 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2012 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2013 2014 switch (i) { 2015 case 0: 2016 r600_bc_src(&alu.src[0], &ctx->src[0], 2); 2017 break; 2018 case 1: 2019 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2020 break; 2021 case 2: 2022 r600_bc_src(&alu.src[0], &ctx->src[0], 1); 2023 break; 2024 case 3: 2025 alu.src[0].sel = V_SQ_ALU_SRC_0; 2026 alu.src[0].chan = i; 2027 } 2028 2029 switch (i) { 2030 case 0: 2031 r600_bc_src(&alu.src[1], &ctx->src[1], 1); 2032 break; 2033 case 1: 2034 r600_bc_src(&alu.src[1], &ctx->src[1], 2); 2035 break; 2036 case 2: 2037 r600_bc_src(&alu.src[1], &ctx->src[1], 0); 2038 break; 2039 case 3: 2040 alu.src[1].sel = V_SQ_ALU_SRC_0; 2041 alu.src[1].chan = i; 2042 } 2043 2044 alu.dst.sel = ctx->temp_reg; 2045 alu.dst.chan = i; 2046 alu.dst.write = 1; 2047 2048 if (i == 3) 2049 alu.last = 1; 2050 r = r600_bc_add_alu(ctx->bc, &alu); 2051 if (r) 2052 return r; 2053 } 2054 2055 for (i = 0; i < 4; i++) { 2056 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2057 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2058 2059 switch (i) { 2060 case 0: 2061 r600_bc_src(&alu.src[0], &ctx->src[0], 1); 2062 break; 2063 case 1: 2064 r600_bc_src(&alu.src[0], &ctx->src[0], 2); 2065 break; 2066 case 2: 2067 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2068 break; 2069 case 3: 2070 alu.src[0].sel = V_SQ_ALU_SRC_0; 2071 alu.src[0].chan = i; 2072 } 2073 2074 switch (i) { 2075 case 0: 2076 r600_bc_src(&alu.src[1], &ctx->src[1], 2); 2077 break; 2078 case 1: 2079 r600_bc_src(&alu.src[1], &ctx->src[1], 0); 2080 break; 2081 case 2: 2082 r600_bc_src(&alu.src[1], &ctx->src[1], 1); 2083 break; 2084 case 3: 2085 alu.src[1].sel = V_SQ_ALU_SRC_0; 2086 alu.src[1].chan = i; 2087 } 2088 2089 alu.src[2].sel = ctx->temp_reg; 2090 alu.src[2].neg = 1; 2091 alu.src[2].chan = i; 2092 2093 if (use_temp) 2094 alu.dst.sel = ctx->temp_reg; 2095 else 2096 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2097 alu.dst.chan = i; 2098 alu.dst.write = 1; 2099 alu.is_op3 = 1; 2100 if (i == 3) 2101 alu.last = 1; 2102 r = r600_bc_add_alu(ctx->bc, &alu); 2103 if (r) 2104 return r; 2105 } 2106 if (use_temp) 2107 return tgsi_helper_copy(ctx, inst); 2108 return 0; 2109} 2110 2111static int tgsi_exp(struct r600_shader_ctx *ctx) 2112{ 2113 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2114 struct r600_bc_alu alu; 2115 int r; 2116 2117 /* result.x = 2^floor(src); */ 2118 if (inst->Dst[0].Register.WriteMask & 1) { 2119 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2120 2121 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2122 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2123 2124 alu.dst.sel = ctx->temp_reg; 2125 alu.dst.chan = 0; 2126 alu.dst.write = 1; 2127 alu.last = 1; 2128 r = r600_bc_add_alu(ctx->bc, &alu); 2129 if (r) 2130 return r; 2131 2132 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2133 alu.src[0].sel = ctx->temp_reg; 2134 alu.src[0].chan = 0; 2135 2136 alu.dst.sel = ctx->temp_reg; 2137 alu.dst.chan = 0; 2138 alu.dst.write = 1; 2139 alu.last = 1; 2140 r = r600_bc_add_alu(ctx->bc, &alu); 2141 if (r) 2142 return r; 2143 } 2144 2145 /* result.y = tmp - floor(tmp); */ 2146 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2147 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2148 2149 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2150 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2151 2152 alu.dst.sel = ctx->temp_reg; 2153// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2154// if (r) 2155// return r; 2156 alu.dst.write = 1; 2157 alu.dst.chan = 1; 2158 2159 alu.last = 1; 2160 2161 r = r600_bc_add_alu(ctx->bc, &alu); 2162 if (r) 2163 return r; 2164 } 2165 2166 /* result.z = RoughApprox2ToX(tmp);*/ 2167 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2168 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2169 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2170 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2171 2172 alu.dst.sel = ctx->temp_reg; 2173 alu.dst.write = 1; 2174 alu.dst.chan = 2; 2175 2176 alu.last = 1; 2177 2178 r = r600_bc_add_alu(ctx->bc, &alu); 2179 if (r) 2180 return r; 2181 } 2182 2183 /* result.w = 1.0;*/ 2184 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2185 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2186 2187 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2188 alu.src[0].sel = V_SQ_ALU_SRC_1; 2189 alu.src[0].chan = 0; 2190 2191 alu.dst.sel = ctx->temp_reg; 2192 alu.dst.chan = 3; 2193 alu.dst.write = 1; 2194 alu.last = 1; 2195 r = r600_bc_add_alu(ctx->bc, &alu); 2196 if (r) 2197 return r; 2198 } 2199 return tgsi_helper_copy(ctx, inst); 2200} 2201 2202static int tgsi_log(struct r600_shader_ctx *ctx) 2203{ 2204 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2205 struct r600_bc_alu alu; 2206 int r; 2207 2208 /* result.x = floor(log2(src)); */ 2209 if (inst->Dst[0].Register.WriteMask & 1) { 2210 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2211 2212 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2213 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2214 2215 alu.dst.sel = ctx->temp_reg; 2216 alu.dst.chan = 0; 2217 alu.dst.write = 1; 2218 alu.last = 1; 2219 r = r600_bc_add_alu(ctx->bc, &alu); 2220 if (r) 2221 return r; 2222 2223 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2224 alu.src[0].sel = ctx->temp_reg; 2225 alu.src[0].chan = 0; 2226 2227 alu.dst.sel = ctx->temp_reg; 2228 alu.dst.chan = 0; 2229 alu.dst.write = 1; 2230 alu.last = 1; 2231 2232 r = r600_bc_add_alu(ctx->bc, &alu); 2233 if (r) 2234 return r; 2235 } 2236 2237 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2238 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2239 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2240 2241 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2242 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2243 2244 alu.dst.sel = ctx->temp_reg; 2245 alu.dst.chan = 1; 2246 alu.dst.write = 1; 2247 alu.last = 1; 2248 2249 r = r600_bc_add_alu(ctx->bc, &alu); 2250 if (r) 2251 return r; 2252 2253 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2254 2255 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2256 alu.src[0].sel = ctx->temp_reg; 2257 alu.src[0].chan = 1; 2258 2259 alu.dst.sel = ctx->temp_reg; 2260 alu.dst.chan = 1; 2261 alu.dst.write = 1; 2262 alu.last = 1; 2263 2264 r = r600_bc_add_alu(ctx->bc, &alu); 2265 if (r) 2266 return r; 2267 2268 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2269 2270 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2271 alu.src[0].sel = ctx->temp_reg; 2272 alu.src[0].chan = 1; 2273 2274 alu.dst.sel = ctx->temp_reg; 2275 alu.dst.chan = 1; 2276 alu.dst.write = 1; 2277 alu.last = 1; 2278 2279 r = r600_bc_add_alu(ctx->bc, &alu); 2280 if (r) 2281 return r; 2282 2283 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2284 2285 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2286 alu.src[0].sel = ctx->temp_reg; 2287 alu.src[0].chan = 1; 2288 2289 alu.dst.sel = ctx->temp_reg; 2290 alu.dst.chan = 1; 2291 alu.dst.write = 1; 2292 alu.last = 1; 2293 2294 r = r600_bc_add_alu(ctx->bc, &alu); 2295 if (r) 2296 return r; 2297 2298 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2299 2300 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2301 2302 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2303 2304 alu.src[1].sel = ctx->temp_reg; 2305 alu.src[1].chan = 1; 2306 2307 alu.dst.sel = ctx->temp_reg; 2308 alu.dst.chan = 1; 2309 alu.dst.write = 1; 2310 alu.last = 1; 2311 2312 r = r600_bc_add_alu(ctx->bc, &alu); 2313 if (r) 2314 return r; 2315 } 2316 2317 /* result.z = log2(src);*/ 2318 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2319 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2320 2321 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2322 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2323 2324 alu.dst.sel = ctx->temp_reg; 2325 alu.dst.write = 1; 2326 alu.dst.chan = 2; 2327 alu.last = 1; 2328 2329 r = r600_bc_add_alu(ctx->bc, &alu); 2330 if (r) 2331 return r; 2332 } 2333 2334 /* result.w = 1.0; */ 2335 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2336 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2337 2338 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2339 alu.src[0].sel = V_SQ_ALU_SRC_1; 2340 alu.src[0].chan = 0; 2341 2342 alu.dst.sel = ctx->temp_reg; 2343 alu.dst.chan = 3; 2344 alu.dst.write = 1; 2345 alu.last = 1; 2346 2347 r = r600_bc_add_alu(ctx->bc, &alu); 2348 if (r) 2349 return r; 2350 } 2351 2352 return tgsi_helper_copy(ctx, inst); 2353} 2354 2355static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2356{ 2357 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2358 struct r600_bc_alu alu; 2359 int r; 2360 2361 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2362 2363 switch (inst->Instruction.Opcode) { 2364 case TGSI_OPCODE_ARL: 2365 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2366 break; 2367 case TGSI_OPCODE_ARR: 2368 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2369 break; 2370 default: 2371 assert(0); 2372 return -1; 2373 } 2374 2375 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2376 alu.last = 1; 2377 alu.dst.sel = ctx->ar_reg; 2378 alu.dst.write = 1; 2379 r = r600_bc_add_alu(ctx->bc, &alu); 2380 if (r) 2381 return r; 2382 2383 /* TODO: Note that the MOVA can be avoided if we never use AR for 2384 * indexing non-CB registers in the current ALU clause. Similarly, we 2385 * need to load AR from ar_reg again if we started a new clause 2386 * between ARL and AR usage. The easy way to do that is to remove 2387 * the MOVA here, and load it for the first AR access after ar_reg 2388 * has been modified in each clause. */ 2389 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2390 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2391 alu.src[0].sel = ctx->ar_reg; 2392 alu.src[0].chan = 0; 2393 alu.last = 1; 2394 r = r600_bc_add_alu(ctx->bc, &alu); 2395 if (r) 2396 return r; 2397 return 0; 2398} 2399static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2400{ 2401 /* TODO from r600c, ar values don't persist between clauses */ 2402 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2403 struct r600_bc_alu alu; 2404 int r; 2405 2406 switch (inst->Instruction.Opcode) { 2407 case TGSI_OPCODE_ARL: 2408 memset(&alu, 0, sizeof(alu)); 2409 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 2410 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2411 alu.dst.sel = ctx->ar_reg; 2412 alu.dst.write = 1; 2413 alu.last = 1; 2414 2415 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2416 return r; 2417 2418 memset(&alu, 0, sizeof(alu)); 2419 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2420 alu.src[0].sel = ctx->ar_reg; 2421 alu.dst.sel = ctx->ar_reg; 2422 alu.dst.write = 1; 2423 alu.last = 1; 2424 2425 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2426 return r; 2427 break; 2428 case TGSI_OPCODE_ARR: 2429 memset(&alu, 0, sizeof(alu)); 2430 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2431 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2432 alu.dst.sel = ctx->ar_reg; 2433 alu.dst.write = 1; 2434 alu.last = 1; 2435 2436 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2437 return r; 2438 break; 2439 default: 2440 assert(0); 2441 return -1; 2442 } 2443 2444 memset(&alu, 0, sizeof(alu)); 2445 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2446 alu.src[0].sel = ctx->ar_reg; 2447 alu.last = 1; 2448 2449 r = r600_bc_add_alu(ctx->bc, &alu); 2450 if (r) 2451 return r; 2452 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2453 return 0; 2454} 2455 2456static int tgsi_opdst(struct r600_shader_ctx *ctx) 2457{ 2458 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2459 struct r600_bc_alu alu; 2460 int i, r = 0; 2461 2462 for (i = 0; i < 4; i++) { 2463 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2464 2465 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2466 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2467 2468 if (i == 0 || i == 3) { 2469 alu.src[0].sel = V_SQ_ALU_SRC_1; 2470 } else { 2471 r600_bc_src(&alu.src[0], &ctx->src[0], i); 2472 } 2473 2474 if (i == 0 || i == 2) { 2475 alu.src[1].sel = V_SQ_ALU_SRC_1; 2476 } else { 2477 r600_bc_src(&alu.src[1], &ctx->src[1], i); 2478 } 2479 if (i == 3) 2480 alu.last = 1; 2481 r = r600_bc_add_alu(ctx->bc, &alu); 2482 if (r) 2483 return r; 2484 } 2485 return 0; 2486} 2487 2488static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2489{ 2490 struct r600_bc_alu alu; 2491 int r; 2492 2493 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2494 alu.inst = opcode; 2495 alu.predicate = 1; 2496 2497 alu.dst.sel = ctx->temp_reg; 2498 alu.dst.write = 1; 2499 alu.dst.chan = 0; 2500 2501 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2502 alu.src[1].sel = V_SQ_ALU_SRC_0; 2503 alu.src[1].chan = 0; 2504 2505 alu.last = 1; 2506 2507 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2508 if (r) 2509 return r; 2510 return 0; 2511} 2512 2513static int pops(struct r600_shader_ctx *ctx, int pops) 2514{ 2515 int alu_pop = 3; 2516 if (ctx->bc->cf_last) { 2517 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) 2518 alu_pop = 0; 2519 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) 2520 alu_pop = 1; 2521 } 2522 alu_pop += pops; 2523 if (alu_pop == 1) { 2524 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; 2525 ctx->bc->force_add_cf = 1; 2526 } else if (alu_pop == 2) { 2527 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; 2528 ctx->bc->force_add_cf = 1; 2529 } else { 2530 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2531 ctx->bc->cf_last->pop_count = pops; 2532 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 2533 } 2534 return 0; 2535} 2536 2537static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2538{ 2539 switch(reason) { 2540 case FC_PUSH_VPM: 2541 ctx->bc->callstack[ctx->bc->call_sp].current--; 2542 break; 2543 case FC_PUSH_WQM: 2544 case FC_LOOP: 2545 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2546 break; 2547 case FC_REP: 2548 /* TOODO : for 16 vp asic should -= 2; */ 2549 ctx->bc->callstack[ctx->bc->call_sp].current --; 2550 break; 2551 } 2552} 2553 2554static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2555{ 2556 if (check_max_only) { 2557 int diff; 2558 switch (reason) { 2559 case FC_PUSH_VPM: 2560 diff = 1; 2561 break; 2562 case FC_PUSH_WQM: 2563 diff = 4; 2564 break; 2565 default: 2566 assert(0); 2567 diff = 0; 2568 } 2569 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2570 ctx->bc->callstack[ctx->bc->call_sp].max) { 2571 ctx->bc->callstack[ctx->bc->call_sp].max = 2572 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2573 } 2574 return; 2575 } 2576 switch (reason) { 2577 case FC_PUSH_VPM: 2578 ctx->bc->callstack[ctx->bc->call_sp].current++; 2579 break; 2580 case FC_PUSH_WQM: 2581 case FC_LOOP: 2582 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2583 break; 2584 case FC_REP: 2585 ctx->bc->callstack[ctx->bc->call_sp].current++; 2586 break; 2587 } 2588 2589 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2590 ctx->bc->callstack[ctx->bc->call_sp].max) { 2591 ctx->bc->callstack[ctx->bc->call_sp].max = 2592 ctx->bc->callstack[ctx->bc->call_sp].current; 2593 } 2594} 2595 2596static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2597{ 2598 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2599 2600 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2601 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2602 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2603 sp->num_mid++; 2604} 2605 2606static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2607{ 2608 ctx->bc->fc_sp++; 2609 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2610 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2611} 2612 2613static void fc_poplevel(struct r600_shader_ctx *ctx) 2614{ 2615 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2616 if (sp->mid) { 2617 free(sp->mid); 2618 sp->mid = NULL; 2619 } 2620 sp->num_mid = 0; 2621 sp->start = NULL; 2622 sp->type = 0; 2623 ctx->bc->fc_sp--; 2624} 2625 2626#if 0 2627static int emit_return(struct r600_shader_ctx *ctx) 2628{ 2629 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2630 return 0; 2631} 2632 2633static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2634{ 2635 2636 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2637 ctx->bc->cf_last->pop_count = pops; 2638 /* TODO work out offset */ 2639 return 0; 2640} 2641 2642static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2643{ 2644 return 0; 2645} 2646 2647static void emit_testflag(struct r600_shader_ctx *ctx) 2648{ 2649 2650} 2651 2652static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2653{ 2654 emit_testflag(ctx); 2655 emit_jump_to_offset(ctx, 1, 4); 2656 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2657 pops(ctx, ifidx + 1); 2658 emit_return(ctx); 2659} 2660 2661static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2662{ 2663 emit_testflag(ctx); 2664 2665 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2666 ctx->bc->cf_last->pop_count = 1; 2667 2668 fc_set_mid(ctx, fc_sp); 2669 2670 pops(ctx, 1); 2671} 2672#endif 2673 2674static int tgsi_if(struct r600_shader_ctx *ctx) 2675{ 2676 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2677 2678 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2679 2680 fc_pushlevel(ctx, FC_IF); 2681 2682 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2683 return 0; 2684} 2685 2686static int tgsi_else(struct r600_shader_ctx *ctx) 2687{ 2688 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2689 ctx->bc->cf_last->pop_count = 1; 2690 2691 fc_set_mid(ctx, ctx->bc->fc_sp); 2692 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2693 return 0; 2694} 2695 2696static int tgsi_endif(struct r600_shader_ctx *ctx) 2697{ 2698 pops(ctx, 1); 2699 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2700 R600_ERR("if/endif unbalanced in shader\n"); 2701 return -1; 2702 } 2703 2704 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2705 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2706 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2707 } else { 2708 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2709 } 2710 fc_poplevel(ctx); 2711 2712 callstack_decrease_current(ctx, FC_PUSH_VPM); 2713 return 0; 2714} 2715 2716static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2717{ 2718 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2719 2720 fc_pushlevel(ctx, FC_LOOP); 2721 2722 /* check stack depth */ 2723 callstack_check_depth(ctx, FC_LOOP, 0); 2724 return 0; 2725} 2726 2727static int tgsi_endloop(struct r600_shader_ctx *ctx) 2728{ 2729 int i; 2730 2731 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 2732 2733 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2734 R600_ERR("loop/endloop in shader code are not paired.\n"); 2735 return -EINVAL; 2736 } 2737 2738 /* fixup loop pointers - from r600isa 2739 LOOP END points to CF after LOOP START, 2740 LOOP START point to CF after LOOP END 2741 BRK/CONT point to LOOP END CF 2742 */ 2743 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2744 2745 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2746 2747 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2748 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2749 } 2750 /* TODO add LOOPRET support */ 2751 fc_poplevel(ctx); 2752 callstack_decrease_current(ctx, FC_LOOP); 2753 return 0; 2754} 2755 2756static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2757{ 2758 unsigned int fscp; 2759 2760 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2761 { 2762 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2763 break; 2764 } 2765 2766 if (fscp == 0) { 2767 R600_ERR("Break not inside loop/endloop pair\n"); 2768 return -EINVAL; 2769 } 2770 2771 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2772 ctx->bc->cf_last->pop_count = 1; 2773 2774 fc_set_mid(ctx, fscp); 2775 2776 pops(ctx, 1); 2777 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2778 return 0; 2779} 2780 2781static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 2782 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 2783 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2784 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2785 2786 /* FIXME: 2787 * For state trackers other than OpenGL, we'll want to use 2788 * _RECIP_IEEE instead. 2789 */ 2790 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 2791 2792 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 2793 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2794 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 2795 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2796 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2797 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2798 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2799 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2800 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2801 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2802 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2803 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2804 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2805 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2806 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2807 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2808 /* gap */ 2809 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2810 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2811 /* gap */ 2812 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2813 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2814 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2815 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2816 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2817 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2818 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2819 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2820 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2821 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2822 /* gap */ 2823 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2824 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2825 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2826 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2827 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2828 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2829 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2830 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2831 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2832 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2833 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2834 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2835 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2836 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2837 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2838 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2839 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2840 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2841 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2842 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2843 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2844 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2845 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2846 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2847 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2848 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2849 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2850 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2851 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2852 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 2853 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2854 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2855 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2856 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2857 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2858 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2859 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2860 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2861 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2862 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2863 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2864 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2865 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2866 /* gap */ 2867 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2868 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2869 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2870 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2871 /* gap */ 2872 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2873 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2874 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2875 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2876 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2877 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2878 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2879 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 2880 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2881 /* gap */ 2882 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2883 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2884 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2885 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2886 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2887 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2888 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2889 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2890 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2891 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2892 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2893 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2894 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2895 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2896 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2897 /* gap */ 2898 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2899 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2900 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2901 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2902 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2903 /* gap */ 2904 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2905 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2906 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2907 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2908 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2909 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2910 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2911 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2912 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2913 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2914 /* gap */ 2915 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2916 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2917 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2918 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2919 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2920 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2921 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2922 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2923 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2924 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2925 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2926 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2927 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2928 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2929 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2930 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2931 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2932 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2933 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2934 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2935 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2936 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2937 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2938 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2939 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2940 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2941 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2942 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2943}; 2944 2945static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 2946 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 2947 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2948 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2949 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 2950 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 2951 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2952 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2953 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2954 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2955 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2956 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2957 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2958 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2959 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2960 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2961 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2962 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2963 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2964 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2965 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2966 /* gap */ 2967 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2968 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2969 /* gap */ 2970 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2971 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2972 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2973 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2974 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2975 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2976 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2977 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2978 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2979 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2980 /* gap */ 2981 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2982 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2983 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2984 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2985 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2986 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2987 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2988 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2989 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2990 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2991 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2992 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2993 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2994 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2995 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2996 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2997 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2998 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2999 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3000 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3001 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3002 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3003 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3004 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3005 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3006 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3007 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3008 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3009 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3010 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3011 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3012 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3013 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3014 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3015 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3016 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3017 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3018 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3019 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3020 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3021 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3022 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3023 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3024 /* gap */ 3025 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3026 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3027 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3028 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3029 /* gap */ 3030 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3031 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3032 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3033 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3034 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3035 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3036 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3037 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3038 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3039 /* gap */ 3040 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3041 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3042 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3043 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3044 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3045 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3046 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3047 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3048 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3049 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3050 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3051 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3052 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3053 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3054 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3055 /* gap */ 3056 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3057 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3058 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3059 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3060 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3061 /* gap */ 3062 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3063 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3064 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3065 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3066 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3067 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3068 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3069 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3070 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3071 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3072 /* gap */ 3073 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3074 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3075 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3076 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3077 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3078 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3079 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3080 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3081 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3082 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3083 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3084 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3085 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3086 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3087 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3088 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3089 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3090 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3091 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3092 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3093 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3094 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3095 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3096 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3097 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3098 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3099 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3100 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3101}; 3102