r600_shader.c revision 0a6f09a76a416b8672e149c520aa5bef33174223
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_pipe.h" 29#include "r600_asm.h" 30#include "r600_sq.h" 31#include "r600_formats.h" 32#include "r600_opcodes.h" 33#include "r600d.h" 34#include <stdio.h> 35#include <errno.h> 36 37static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) 38{ 39 struct r600_pipe_state *rstate = &shader->rstate; 40 struct r600_shader *rshader = &shader->shader; 41 unsigned spi_vs_out_id[10]; 42 unsigned i, tmp; 43 44 /* clear previous register */ 45 rstate->nregs = 0; 46 47 /* so far never got proper semantic id from tgsi */ 48 /* FIXME better to move this in config things so they get emited 49 * only one time per cs 50 */ 51 for (i = 0; i < 10; i++) { 52 spi_vs_out_id[i] = 0; 53 } 54 for (i = 0; i < 32; i++) { 55 tmp = i << ((i & 3) * 8); 56 spi_vs_out_id[i / 4] |= tmp; 57 } 58 for (i = 0; i < 10; i++) { 59 r600_pipe_state_add_reg(rstate, 60 R_028614_SPI_VS_OUT_ID_0 + i * 4, 61 spi_vs_out_id[i], 0xFFFFFFFF, NULL); 62 } 63 64 r600_pipe_state_add_reg(rstate, 65 R_0286C4_SPI_VS_OUT_CONFIG, 66 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), 67 0xFFFFFFFF, NULL); 68 r600_pipe_state_add_reg(rstate, 69 R_028868_SQ_PGM_RESOURCES_VS, 70 S_028868_NUM_GPRS(rshader->bc.ngpr) | 71 S_028868_STACK_SIZE(rshader->bc.nstack), 72 0xFFFFFFFF, NULL); 73 r600_pipe_state_add_reg(rstate, 74 R_0288D0_SQ_PGM_CF_OFFSET_VS, 75 0x00000000, 0xFFFFFFFF, NULL); 76 r600_pipe_state_add_reg(rstate, 77 R_028858_SQ_PGM_START_VS, 78 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 79 80 r600_pipe_state_add_reg(rstate, 81 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, 82 0xFFFFFFFF, NULL); 83 84} 85 86int r600_find_vs_semantic_index(struct r600_shader *vs, 87 struct r600_shader *ps, int id) 88{ 89 struct r600_shader_io *input = &ps->input[id]; 90 91 for (int i = 0; i < vs->noutput; i++) { 92 if (input->name == vs->output[i].name && 93 input->sid == vs->output[i].sid) { 94 return i - 1; 95 } 96 } 97 return 0; 98} 99 100static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) 101{ 102 struct r600_pipe_state *rstate = &shader->rstate; 103 struct r600_shader *rshader = &shader->shader; 104 unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control; 105 int pos_index = -1, face_index = -1; 106 107 rstate->nregs = 0; 108 109 for (i = 0; i < rshader->ninput; i++) { 110 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 111 pos_index = i; 112 if (rshader->input[i].name == TGSI_SEMANTIC_FACE) 113 face_index = i; 114 } 115 116 db_shader_control = 0; 117 for (i = 0; i < rshader->noutput; i++) { 118 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 119 db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); 120 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 121 db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(1); 122 } 123 if (rshader->uses_kill) 124 db_shader_control |= S_02880C_KILL_ENABLE(1); 125 126 exports_ps = 0; 127 num_cout = 0; 128 for (i = 0; i < rshader->noutput; i++) { 129 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 130 exports_ps |= 1; 131 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { 132 num_cout++; 133 } 134 } 135 exports_ps |= S_028854_EXPORT_COLORS(num_cout); 136 if (!exports_ps) { 137 /* always at least export 1 component per pixel */ 138 exports_ps = 2; 139 } 140 141 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | 142 S_0286CC_PERSP_GRADIENT_ENA(1); 143 spi_input_z = 0; 144 if (pos_index != -1) { 145 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | 146 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | 147 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | 148 S_0286CC_BARYC_SAMPLE_CNTL(1)); 149 spi_input_z |= 1; 150 } 151 152 spi_ps_in_control_1 = 0; 153 if (face_index != -1) { 154 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | 155 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); 156 } 157 158 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); 159 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); 160 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); 161 r600_pipe_state_add_reg(rstate, 162 R_028840_SQ_PGM_START_PS, 163 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 164 r600_pipe_state_add_reg(rstate, 165 R_028850_SQ_PGM_RESOURCES_PS, 166 S_028868_NUM_GPRS(rshader->bc.ngpr) | 167 S_028868_STACK_SIZE(rshader->bc.nstack), 168 0xFFFFFFFF, NULL); 169 r600_pipe_state_add_reg(rstate, 170 R_028854_SQ_PGM_EXPORTS_PS, 171 exports_ps, 0xFFFFFFFF, NULL); 172 r600_pipe_state_add_reg(rstate, 173 R_0288CC_SQ_PGM_CF_OFFSET_PS, 174 0x00000000, 0xFFFFFFFF, NULL); 175 176 if (rshader->fs_write_all) { 177 r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, 178 S_028808_MULTIWRITE_ENABLE(1), 179 S_028808_MULTIWRITE_ENABLE(1), 180 NULL); 181 } 182 /* only set some bits here, the other bits are set in the dsa state */ 183 r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, 184 db_shader_control, 185 S_02880C_Z_EXPORT_ENABLE(1) | 186 S_02880C_STENCIL_REF_EXPORT_ENABLE(1) | 187 S_02880C_KILL_ENABLE(1), 188 NULL); 189 190 r600_pipe_state_add_reg(rstate, 191 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, 192 0xFFFFFFFF, NULL); 193} 194 195static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 196{ 197 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 198 struct r600_shader *rshader = &shader->shader; 199 void *ptr; 200 201 /* copy new shader */ 202 if (shader->bo == NULL) { 203 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0); 204 if (shader->bo == NULL) { 205 return -ENOMEM; 206 } 207 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); 208 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); 209 r600_bo_unmap(rctx->radeon, shader->bo); 210 } 211 /* build state */ 212 switch (rshader->processor_type) { 213 case TGSI_PROCESSOR_VERTEX: 214 if (rshader->family >= CHIP_CEDAR) { 215 evergreen_pipe_shader_vs(ctx, shader); 216 } else { 217 r600_pipe_shader_vs(ctx, shader); 218 } 219 break; 220 case TGSI_PROCESSOR_FRAGMENT: 221 if (rshader->family >= CHIP_CEDAR) { 222 evergreen_pipe_shader_ps(ctx, shader); 223 } else { 224 r600_pipe_shader_ps(ctx, shader); 225 } 226 break; 227 default: 228 return -EINVAL; 229 } 230 return 0; 231} 232 233static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 234 235int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) 236{ 237 static int dump_shaders = -1; 238 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 239 int r; 240 241 /* Would like some magic "get_bool_option_once" routine. 242 */ 243 if (dump_shaders == -1) 244 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 245 246 if (dump_shaders) { 247 fprintf(stderr, "--------------------------------------------------------------\n"); 248 tgsi_dump(tokens, 0); 249 } 250 shader->shader.family = r600_get_family(rctx->radeon); 251 r = r600_shader_from_tgsi(tokens, &shader->shader); 252 if (r) { 253 R600_ERR("translation from TGSI failed !\n"); 254 return r; 255 } 256 r = r600_bc_build(&shader->shader.bc); 257 if (r) { 258 R600_ERR("building bytecode failed !\n"); 259 return r; 260 } 261 if (dump_shaders) { 262 r600_bc_dump(&shader->shader.bc); 263 fprintf(stderr, "______________________________________________________________\n"); 264 } 265 return r600_pipe_shader(ctx, shader); 266} 267 268void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 269{ 270 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 271 272 r600_bo_reference(rctx->radeon, &shader->bo, NULL); 273 r600_bc_clear(&shader->shader.bc); 274} 275 276/* 277 * tgsi -> r600 shader 278 */ 279struct r600_shader_tgsi_instruction; 280 281struct r600_shader_src { 282 unsigned sel; 283 unsigned swizzle[4]; 284 unsigned neg; 285 unsigned abs; 286 unsigned rel; 287 uint32_t value[4]; 288}; 289 290struct r600_shader_ctx { 291 struct tgsi_shader_info info; 292 struct tgsi_parse_context parse; 293 const struct tgsi_token *tokens; 294 unsigned type; 295 unsigned file_offset[TGSI_FILE_COUNT]; 296 unsigned temp_reg; 297 unsigned ar_reg; 298 struct r600_shader_tgsi_instruction *inst_info; 299 struct r600_bc *bc; 300 struct r600_shader *shader; 301 struct r600_shader_src src[3]; 302 u32 *literals; 303 u32 nliterals; 304 u32 max_driver_temp_used; 305 /* needed for evergreen interpolation */ 306 boolean input_centroid; 307 boolean input_linear; 308 boolean input_perspective; 309 int num_interp_gpr; 310}; 311 312struct r600_shader_tgsi_instruction { 313 unsigned tgsi_opcode; 314 unsigned is_op3; 315 unsigned r600_opcode; 316 int (*process)(struct r600_shader_ctx *ctx); 317}; 318 319static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 320static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 321 322static int tgsi_is_supported(struct r600_shader_ctx *ctx) 323{ 324 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 325 int j; 326 327 if (i->Instruction.NumDstRegs > 1) { 328 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 329 return -EINVAL; 330 } 331 if (i->Instruction.Predicate) { 332 R600_ERR("predicate unsupported\n"); 333 return -EINVAL; 334 } 335#if 0 336 if (i->Instruction.Label) { 337 R600_ERR("label unsupported\n"); 338 return -EINVAL; 339 } 340#endif 341 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 342 if (i->Src[j].Register.Dimension) { 343 R600_ERR("unsupported src %d (dimension %d)\n", j, 344 i->Src[j].Register.Dimension); 345 return -EINVAL; 346 } 347 } 348 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 349 if (i->Dst[j].Register.Dimension) { 350 R600_ERR("unsupported dst (dimension)\n"); 351 return -EINVAL; 352 } 353 } 354 return 0; 355} 356 357static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 358{ 359 int i, r; 360 struct r600_bc_alu alu; 361 int gpr = 0, base_chan = 0; 362 int ij_index = 0; 363 364 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 365 ij_index = 0; 366 if (ctx->shader->input[input].centroid) 367 ij_index++; 368 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 369 ij_index = 0; 370 /* if we have perspective add one */ 371 if (ctx->input_perspective) { 372 ij_index++; 373 /* if we have perspective centroid */ 374 if (ctx->input_centroid) 375 ij_index++; 376 } 377 if (ctx->shader->input[input].centroid) 378 ij_index++; 379 } 380 381 /* work out gpr and base_chan from index */ 382 gpr = ij_index / 2; 383 base_chan = (2 * (ij_index % 2)) + 1; 384 385 for (i = 0; i < 8; i++) { 386 memset(&alu, 0, sizeof(struct r600_bc_alu)); 387 388 if (i < 4) 389 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 390 else 391 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 392 393 if ((i > 1) && (i < 6)) { 394 alu.dst.sel = ctx->shader->input[input].gpr; 395 alu.dst.write = 1; 396 } 397 398 alu.dst.chan = i % 4; 399 400 alu.src[0].sel = gpr; 401 alu.src[0].chan = (base_chan - (i % 2)); 402 403 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 404 405 alu.bank_swizzle_force = SQ_ALU_VEC_210; 406 if ((i % 4) == 3) 407 alu.last = 1; 408 r = r600_bc_add_alu(ctx->bc, &alu); 409 if (r) 410 return r; 411 } 412 return 0; 413} 414 415 416static int tgsi_declaration(struct r600_shader_ctx *ctx) 417{ 418 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 419 unsigned i; 420 int r; 421 422 switch (d->Declaration.File) { 423 case TGSI_FILE_INPUT: 424 i = ctx->shader->ninput++; 425 ctx->shader->input[i].name = d->Semantic.Name; 426 ctx->shader->input[i].sid = d->Semantic.Index; 427 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 428 ctx->shader->input[i].centroid = d->Declaration.Centroid; 429 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 430 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) { 431 /* turn input into interpolate on EG */ 432 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { 433 if (ctx->shader->input[i].interpolate > 0) { 434 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 435 evergreen_interp_alu(ctx, i); 436 } 437 } 438 } 439 break; 440 case TGSI_FILE_OUTPUT: 441 i = ctx->shader->noutput++; 442 ctx->shader->output[i].name = d->Semantic.Name; 443 ctx->shader->output[i].sid = d->Semantic.Index; 444 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 445 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 446 break; 447 case TGSI_FILE_CONSTANT: 448 case TGSI_FILE_TEMPORARY: 449 case TGSI_FILE_SAMPLER: 450 case TGSI_FILE_ADDRESS: 451 break; 452 453 case TGSI_FILE_SYSTEM_VALUE: 454 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 455 struct r600_bc_alu alu; 456 memset(&alu, 0, sizeof(struct r600_bc_alu)); 457 458 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 459 alu.src[0].sel = 0; 460 alu.src[0].chan = 3; 461 462 alu.dst.sel = 0; 463 alu.dst.chan = 3; 464 alu.dst.write = 1; 465 alu.last = 1; 466 467 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 468 return r; 469 break; 470 } 471 472 default: 473 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 474 return -EINVAL; 475 } 476 return 0; 477} 478 479static int r600_get_temp(struct r600_shader_ctx *ctx) 480{ 481 return ctx->temp_reg + ctx->max_driver_temp_used++; 482} 483 484/* 485 * for evergreen we need to scan the shader to find the number of GPRs we need to 486 * reserve for interpolation. 487 * 488 * we need to know if we are going to emit 489 * any centroid inputs 490 * if perspective and linear are required 491*/ 492static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 493{ 494 int i; 495 int num_baryc; 496 497 ctx->input_linear = FALSE; 498 ctx->input_perspective = FALSE; 499 ctx->input_centroid = FALSE; 500 ctx->num_interp_gpr = 1; 501 502 /* any centroid inputs */ 503 for (i = 0; i < ctx->info.num_inputs; i++) { 504 /* skip position/face */ 505 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 506 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 507 continue; 508 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 509 ctx->input_linear = TRUE; 510 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 511 ctx->input_perspective = TRUE; 512 if (ctx->info.input_centroid[i]) 513 ctx->input_centroid = TRUE; 514 } 515 516 num_baryc = 0; 517 /* ignoring sample for now */ 518 if (ctx->input_perspective) 519 num_baryc++; 520 if (ctx->input_linear) 521 num_baryc++; 522 if (ctx->input_centroid) 523 num_baryc *= 2; 524 525 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 526 527 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 528 return ctx->num_interp_gpr; 529} 530 531static void tgsi_src(struct r600_shader_ctx *ctx, 532 const struct tgsi_full_src_register *tgsi_src, 533 struct r600_shader_src *r600_src) 534{ 535 memset(r600_src, 0, sizeof(*r600_src)); 536 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 537 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 538 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 539 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 540 r600_src->neg = tgsi_src->Register.Negate; 541 r600_src->abs = tgsi_src->Register.Absolute; 542 543 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 544 int index; 545 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 546 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 547 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 548 549 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 550 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 551 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 552 return; 553 } 554 index = tgsi_src->Register.Index; 555 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 556 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 557 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 558 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ 559 r600_src->swizzle[0] = 3; 560 r600_src->swizzle[1] = 3; 561 r600_src->swizzle[2] = 3; 562 r600_src->swizzle[3] = 3; 563 r600_src->sel = 0; 564 } else { 565 if (tgsi_src->Register.Indirect) 566 r600_src->rel = V_SQ_REL_RELATIVE; 567 r600_src->sel = tgsi_src->Register.Index; 568 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 569 } 570} 571 572static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) 573{ 574 struct r600_bc_vtx vtx; 575 unsigned int ar_reg; 576 int r; 577 578 if (offset) { 579 struct r600_bc_alu alu; 580 581 memset(&alu, 0, sizeof(alu)); 582 583 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 584 alu.src[0].sel = ctx->ar_reg; 585 586 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 587 alu.src[1].value = offset; 588 589 alu.dst.sel = dst_reg; 590 alu.dst.write = 1; 591 alu.last = 1; 592 593 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 594 return r; 595 596 ar_reg = dst_reg; 597 } else { 598 ar_reg = ctx->ar_reg; 599 } 600 601 memset(&vtx, 0, sizeof(vtx)); 602 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 603 vtx.src_gpr = ar_reg; 604 vtx.mega_fetch_count = 16; 605 vtx.dst_gpr = dst_reg; 606 vtx.dst_sel_x = 0; /* SEL_X */ 607 vtx.dst_sel_y = 1; /* SEL_Y */ 608 vtx.dst_sel_z = 2; /* SEL_Z */ 609 vtx.dst_sel_w = 3; /* SEL_W */ 610 vtx.data_format = FMT_32_32_32_32_FLOAT; 611 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 612 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 613 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 614 615 if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) 616 return r; 617 618 return 0; 619} 620 621static int tgsi_split_constant(struct r600_shader_ctx *ctx) 622{ 623 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 624 struct r600_bc_alu alu; 625 int i, j, k, nconst, r; 626 627 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 628 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 629 nconst++; 630 } 631 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 632 } 633 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 634 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 635 continue; 636 } 637 638 if (ctx->src[i].rel) { 639 int treg = r600_get_temp(ctx); 640 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) 641 return r; 642 643 ctx->src[i].sel = treg; 644 ctx->src[i].rel = 0; 645 j--; 646 } else if (j > 0) { 647 int treg = r600_get_temp(ctx); 648 for (k = 0; k < 4; k++) { 649 memset(&alu, 0, sizeof(struct r600_bc_alu)); 650 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 651 alu.src[0].sel = ctx->src[i].sel; 652 alu.src[0].chan = k; 653 alu.src[0].rel = ctx->src[i].rel; 654 alu.dst.sel = treg; 655 alu.dst.chan = k; 656 alu.dst.write = 1; 657 if (k == 3) 658 alu.last = 1; 659 r = r600_bc_add_alu(ctx->bc, &alu); 660 if (r) 661 return r; 662 } 663 ctx->src[i].sel = treg; 664 ctx->src[i].rel =0; 665 j--; 666 } 667 } 668 return 0; 669} 670 671/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 672static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 673{ 674 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 675 struct r600_bc_alu alu; 676 int i, j, k, nliteral, r; 677 678 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 679 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 680 nliteral++; 681 } 682 } 683 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 684 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 685 int treg = r600_get_temp(ctx); 686 for (k = 0; k < 4; k++) { 687 memset(&alu, 0, sizeof(struct r600_bc_alu)); 688 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 689 alu.src[0].sel = ctx->src[i].sel; 690 alu.src[0].chan = k; 691 alu.src[0].value = ctx->src[i].value[k]; 692 alu.dst.sel = treg; 693 alu.dst.chan = k; 694 alu.dst.write = 1; 695 if (k == 3) 696 alu.last = 1; 697 r = r600_bc_add_alu(ctx->bc, &alu); 698 if (r) 699 return r; 700 } 701 ctx->src[i].sel = treg; 702 j--; 703 } 704 } 705 return 0; 706} 707 708static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 709{ 710 struct tgsi_full_immediate *immediate; 711 struct tgsi_full_property *property; 712 struct r600_shader_ctx ctx; 713 struct r600_bc_output output[32]; 714 unsigned output_done, noutput; 715 unsigned opcode; 716 int i, r = 0, pos0; 717 718 ctx.bc = &shader->bc; 719 ctx.shader = shader; 720 r = r600_bc_init(ctx.bc, shader->family); 721 if (r) 722 return r; 723 ctx.tokens = tokens; 724 tgsi_scan_shader(tokens, &ctx.info); 725 tgsi_parse_init(&ctx.parse, tokens); 726 ctx.type = ctx.parse.FullHeader.Processor.Processor; 727 shader->processor_type = ctx.type; 728 ctx.bc->type = shader->processor_type; 729 730 /* register allocations */ 731 /* Values [0,127] correspond to GPR[0..127]. 732 * Values [128,159] correspond to constant buffer bank 0 733 * Values [160,191] correspond to constant buffer bank 1 734 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 735 * Values [256,287] correspond to constant buffer bank 2 (EG) 736 * Values [288,319] correspond to constant buffer bank 3 (EG) 737 * Other special values are shown in the list below. 738 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 739 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 740 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 741 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 742 * 248 SQ_ALU_SRC_0: special constant 0.0. 743 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 744 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 745 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 746 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 747 * 253 SQ_ALU_SRC_LITERAL: literal constant. 748 * 254 SQ_ALU_SRC_PV: previous vector result. 749 * 255 SQ_ALU_SRC_PS: previous scalar result. 750 */ 751 for (i = 0; i < TGSI_FILE_COUNT; i++) { 752 ctx.file_offset[i] = 0; 753 } 754 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 755 ctx.file_offset[TGSI_FILE_INPUT] = 1; 756 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { 757 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 758 } else { 759 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 760 } 761 } 762 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) { 763 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 764 } 765 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 766 ctx.info.file_count[TGSI_FILE_INPUT]; 767 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 768 ctx.info.file_count[TGSI_FILE_OUTPUT]; 769 770 /* Outside the GPR range. This will be translated to one of the 771 * kcache banks later. */ 772 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 773 774 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 775 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 776 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 777 ctx.temp_reg = ctx.ar_reg + 1; 778 779 ctx.nliterals = 0; 780 ctx.literals = NULL; 781 shader->fs_write_all = FALSE; 782 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 783 tgsi_parse_token(&ctx.parse); 784 switch (ctx.parse.FullToken.Token.Type) { 785 case TGSI_TOKEN_TYPE_IMMEDIATE: 786 immediate = &ctx.parse.FullToken.FullImmediate; 787 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 788 if(ctx.literals == NULL) { 789 r = -ENOMEM; 790 goto out_err; 791 } 792 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 793 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 794 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 795 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 796 ctx.nliterals++; 797 break; 798 case TGSI_TOKEN_TYPE_DECLARATION: 799 r = tgsi_declaration(&ctx); 800 if (r) 801 goto out_err; 802 break; 803 case TGSI_TOKEN_TYPE_INSTRUCTION: 804 r = tgsi_is_supported(&ctx); 805 if (r) 806 goto out_err; 807 ctx.max_driver_temp_used = 0; 808 /* reserve first tmp for everyone */ 809 r600_get_temp(&ctx); 810 811 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 812 if ((r = tgsi_split_constant(&ctx))) 813 goto out_err; 814 if ((r = tgsi_split_literal_constant(&ctx))) 815 goto out_err; 816 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) 817 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 818 else 819 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 820 r = ctx.inst_info->process(&ctx); 821 if (r) 822 goto out_err; 823 break; 824 case TGSI_TOKEN_TYPE_PROPERTY: 825 property = &ctx.parse.FullToken.FullProperty; 826 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { 827 if (property->u[0].Data == 1) 828 shader->fs_write_all = TRUE; 829 } 830 break; 831 default: 832 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 833 r = -EINVAL; 834 goto out_err; 835 } 836 } 837 /* export output */ 838 noutput = shader->noutput; 839 for (i = 0, pos0 = 0; i < noutput; i++) { 840 memset(&output[i], 0, sizeof(struct r600_bc_output)); 841 output[i].gpr = shader->output[i].gpr; 842 output[i].elem_size = 3; 843 output[i].swizzle_x = 0; 844 output[i].swizzle_y = 1; 845 output[i].swizzle_z = 2; 846 output[i].swizzle_w = 3; 847 output[i].burst_count = 1; 848 output[i].barrier = 1; 849 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 850 output[i].array_base = i - pos0; 851 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 852 switch (ctx.type) { 853 case TGSI_PROCESSOR_VERTEX: 854 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 855 output[i].array_base = 60; 856 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 857 /* position doesn't count in array_base */ 858 pos0++; 859 } 860 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 861 output[i].array_base = 61; 862 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 863 /* position doesn't count in array_base */ 864 pos0++; 865 } 866 break; 867 case TGSI_PROCESSOR_FRAGMENT: 868 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 869 output[i].array_base = shader->output[i].sid; 870 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 871 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 872 output[i].array_base = 61; 873 output[i].swizzle_x = 2; 874 output[i].swizzle_y = 7; 875 output[i].swizzle_z = output[i].swizzle_w = 7; 876 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 877 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 878 output[i].array_base = 61; 879 output[i].swizzle_x = 7; 880 output[i].swizzle_y = 1; 881 output[i].swizzle_z = output[i].swizzle_w = 7; 882 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 883 } else { 884 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 885 r = -EINVAL; 886 goto out_err; 887 } 888 break; 889 default: 890 R600_ERR("unsupported processor type %d\n", ctx.type); 891 r = -EINVAL; 892 goto out_err; 893 } 894 } 895 /* add fake param output for vertex shader if no param is exported */ 896 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 897 for (i = 0, pos0 = 0; i < noutput; i++) { 898 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 899 pos0 = 1; 900 break; 901 } 902 } 903 if (!pos0) { 904 memset(&output[i], 0, sizeof(struct r600_bc_output)); 905 output[i].gpr = 0; 906 output[i].elem_size = 3; 907 output[i].swizzle_x = 0; 908 output[i].swizzle_y = 1; 909 output[i].swizzle_z = 2; 910 output[i].swizzle_w = 3; 911 output[i].burst_count = 1; 912 output[i].barrier = 1; 913 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 914 output[i].array_base = 0; 915 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 916 noutput++; 917 } 918 } 919 /* add fake pixel export */ 920 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 921 memset(&output[0], 0, sizeof(struct r600_bc_output)); 922 output[0].gpr = 0; 923 output[0].elem_size = 3; 924 output[0].swizzle_x = 7; 925 output[0].swizzle_y = 7; 926 output[0].swizzle_z = 7; 927 output[0].swizzle_w = 7; 928 output[0].burst_count = 1; 929 output[0].barrier = 1; 930 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 931 output[0].array_base = 0; 932 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 933 noutput++; 934 } 935 /* set export done on last export of each type */ 936 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 937 if (i == (noutput - 1)) { 938 output[i].end_of_program = 1; 939 } 940 if (!(output_done & (1 << output[i].type))) { 941 output_done |= (1 << output[i].type); 942 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 943 } 944 } 945 /* add output to bytecode */ 946 for (i = 0; i < noutput; i++) { 947 r = r600_bc_add_output(ctx.bc, &output[i]); 948 if (r) 949 goto out_err; 950 } 951 free(ctx.literals); 952 tgsi_parse_free(&ctx.parse); 953 return 0; 954out_err: 955 free(ctx.literals); 956 tgsi_parse_free(&ctx.parse); 957 return r; 958} 959 960static int tgsi_unsupported(struct r600_shader_ctx *ctx) 961{ 962 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 963 return -EINVAL; 964} 965 966static int tgsi_end(struct r600_shader_ctx *ctx) 967{ 968 return 0; 969} 970 971static void r600_bc_src(struct r600_bc_alu_src *bc_src, 972 const struct r600_shader_src *shader_src, 973 unsigned chan) 974{ 975 bc_src->sel = shader_src->sel; 976 bc_src->chan = shader_src->swizzle[chan]; 977 bc_src->neg = shader_src->neg; 978 bc_src->abs = shader_src->abs; 979 bc_src->rel = shader_src->rel; 980 bc_src->value = shader_src->value[bc_src->chan]; 981} 982 983static void tgsi_dst(struct r600_shader_ctx *ctx, 984 const struct tgsi_full_dst_register *tgsi_dst, 985 unsigned swizzle, 986 struct r600_bc_alu_dst *r600_dst) 987{ 988 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 989 990 r600_dst->sel = tgsi_dst->Register.Index; 991 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 992 r600_dst->chan = swizzle; 993 r600_dst->write = 1; 994 if (tgsi_dst->Register.Indirect) 995 r600_dst->rel = V_SQ_REL_RELATIVE; 996 if (inst->Instruction.Saturate) { 997 r600_dst->clamp = 1; 998 } 999} 1000 1001static int tgsi_last_instruction(unsigned writemask) 1002{ 1003 int i, lasti = 0; 1004 1005 for (i = 0; i < 4; i++) { 1006 if (writemask & (1 << i)) { 1007 lasti = i; 1008 } 1009 } 1010 return lasti; 1011} 1012 1013static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 1014{ 1015 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1016 struct r600_bc_alu alu; 1017 int i, j, r; 1018 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1019 1020 for (i = 0; i < lasti + 1; i++) { 1021 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1022 continue; 1023 1024 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1025 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1026 1027 alu.inst = ctx->inst_info->r600_opcode; 1028 if (!swap) { 1029 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1030 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1031 } 1032 } else { 1033 r600_bc_src(&alu.src[0], &ctx->src[1], i); 1034 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1035 } 1036 /* handle some special cases */ 1037 switch (ctx->inst_info->tgsi_opcode) { 1038 case TGSI_OPCODE_SUB: 1039 alu.src[1].neg = 1; 1040 break; 1041 case TGSI_OPCODE_ABS: 1042 alu.src[0].abs = 1; 1043 break; 1044 default: 1045 break; 1046 } 1047 if (i == lasti) { 1048 alu.last = 1; 1049 } 1050 r = r600_bc_add_alu(ctx->bc, &alu); 1051 if (r) 1052 return r; 1053 } 1054 return 0; 1055} 1056 1057static int tgsi_op2(struct r600_shader_ctx *ctx) 1058{ 1059 return tgsi_op2_s(ctx, 0); 1060} 1061 1062static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1063{ 1064 return tgsi_op2_s(ctx, 1); 1065} 1066 1067/* 1068 * r600 - trunc to -PI..PI range 1069 * r700 - normalize by dividing by 2PI 1070 * see fdo bug 27901 1071 */ 1072static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 1073{ 1074 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 1075 static float double_pi = 3.1415926535 * 2; 1076 static float neg_pi = -3.1415926535; 1077 1078 int r; 1079 struct r600_bc_alu alu; 1080 1081 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1082 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1083 alu.is_op3 = 1; 1084 1085 alu.dst.chan = 0; 1086 alu.dst.sel = ctx->temp_reg; 1087 alu.dst.write = 1; 1088 1089 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1090 1091 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1092 alu.src[1].chan = 0; 1093 alu.src[1].value = *(uint32_t *)&half_inv_pi; 1094 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1095 alu.src[2].chan = 0; 1096 alu.last = 1; 1097 r = r600_bc_add_alu(ctx->bc, &alu); 1098 if (r) 1099 return r; 1100 1101 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1102 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1103 1104 alu.dst.chan = 0; 1105 alu.dst.sel = ctx->temp_reg; 1106 alu.dst.write = 1; 1107 1108 alu.src[0].sel = ctx->temp_reg; 1109 alu.src[0].chan = 0; 1110 alu.last = 1; 1111 r = r600_bc_add_alu(ctx->bc, &alu); 1112 if (r) 1113 return r; 1114 1115 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1116 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1117 alu.is_op3 = 1; 1118 1119 alu.dst.chan = 0; 1120 alu.dst.sel = ctx->temp_reg; 1121 alu.dst.write = 1; 1122 1123 alu.src[0].sel = ctx->temp_reg; 1124 alu.src[0].chan = 0; 1125 1126 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1127 alu.src[1].chan = 0; 1128 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1129 alu.src[2].chan = 0; 1130 1131 if (ctx->bc->chiprev == CHIPREV_R600) { 1132 alu.src[1].value = *(uint32_t *)&double_pi; 1133 alu.src[2].value = *(uint32_t *)&neg_pi; 1134 } else { 1135 alu.src[1].sel = V_SQ_ALU_SRC_1; 1136 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1137 alu.src[2].neg = 1; 1138 } 1139 1140 alu.last = 1; 1141 r = r600_bc_add_alu(ctx->bc, &alu); 1142 if (r) 1143 return r; 1144 return 0; 1145} 1146 1147static int tgsi_trig(struct r600_shader_ctx *ctx) 1148{ 1149 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1150 struct r600_bc_alu alu; 1151 int i, r; 1152 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1153 1154 r = tgsi_setup_trig(ctx); 1155 if (r) 1156 return r; 1157 1158 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1159 alu.inst = ctx->inst_info->r600_opcode; 1160 alu.dst.chan = 0; 1161 alu.dst.sel = ctx->temp_reg; 1162 alu.dst.write = 1; 1163 1164 alu.src[0].sel = ctx->temp_reg; 1165 alu.src[0].chan = 0; 1166 alu.last = 1; 1167 r = r600_bc_add_alu(ctx->bc, &alu); 1168 if (r) 1169 return r; 1170 1171 /* replicate result */ 1172 for (i = 0; i < lasti + 1; i++) { 1173 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1174 continue; 1175 1176 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1177 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1178 1179 alu.src[0].sel = ctx->temp_reg; 1180 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1181 if (i == lasti) 1182 alu.last = 1; 1183 r = r600_bc_add_alu(ctx->bc, &alu); 1184 if (r) 1185 return r; 1186 } 1187 return 0; 1188} 1189 1190static int tgsi_scs(struct r600_shader_ctx *ctx) 1191{ 1192 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1193 struct r600_bc_alu alu; 1194 int r; 1195 1196 /* We'll only need the trig stuff if we are going to write to the 1197 * X or Y components of the destination vector. 1198 */ 1199 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1200 r = tgsi_setup_trig(ctx); 1201 if (r) 1202 return r; 1203 } 1204 1205 /* dst.x = COS */ 1206 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1207 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1208 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1209 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1210 1211 alu.src[0].sel = ctx->temp_reg; 1212 alu.src[0].chan = 0; 1213 alu.last = 1; 1214 r = r600_bc_add_alu(ctx->bc, &alu); 1215 if (r) 1216 return r; 1217 } 1218 1219 /* dst.y = SIN */ 1220 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1221 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1222 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1223 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1224 1225 alu.src[0].sel = ctx->temp_reg; 1226 alu.src[0].chan = 0; 1227 alu.last = 1; 1228 r = r600_bc_add_alu(ctx->bc, &alu); 1229 if (r) 1230 return r; 1231 } 1232 1233 /* dst.z = 0.0; */ 1234 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1235 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1236 1237 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1238 1239 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1240 1241 alu.src[0].sel = V_SQ_ALU_SRC_0; 1242 alu.src[0].chan = 0; 1243 1244 alu.last = 1; 1245 1246 r = r600_bc_add_alu(ctx->bc, &alu); 1247 if (r) 1248 return r; 1249 } 1250 1251 /* dst.w = 1.0; */ 1252 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1253 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1254 1255 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1256 1257 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1258 1259 alu.src[0].sel = V_SQ_ALU_SRC_1; 1260 alu.src[0].chan = 0; 1261 1262 alu.last = 1; 1263 1264 r = r600_bc_add_alu(ctx->bc, &alu); 1265 if (r) 1266 return r; 1267 } 1268 1269 return 0; 1270} 1271 1272static int tgsi_kill(struct r600_shader_ctx *ctx) 1273{ 1274 struct r600_bc_alu alu; 1275 int i, r; 1276 1277 for (i = 0; i < 4; i++) { 1278 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1279 alu.inst = ctx->inst_info->r600_opcode; 1280 1281 alu.dst.chan = i; 1282 1283 alu.src[0].sel = V_SQ_ALU_SRC_0; 1284 1285 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1286 alu.src[1].sel = V_SQ_ALU_SRC_1; 1287 alu.src[1].neg = 1; 1288 } else { 1289 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1290 } 1291 if (i == 3) { 1292 alu.last = 1; 1293 } 1294 r = r600_bc_add_alu(ctx->bc, &alu); 1295 if (r) 1296 return r; 1297 } 1298 1299 /* kill must be last in ALU */ 1300 ctx->bc->force_add_cf = 1; 1301 ctx->shader->uses_kill = TRUE; 1302 return 0; 1303} 1304 1305static int tgsi_lit(struct r600_shader_ctx *ctx) 1306{ 1307 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1308 struct r600_bc_alu alu; 1309 int r; 1310 1311 /* dst.x, <- 1.0 */ 1312 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1313 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1314 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1315 alu.src[0].chan = 0; 1316 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1317 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1318 r = r600_bc_add_alu(ctx->bc, &alu); 1319 if (r) 1320 return r; 1321 1322 /* dst.y = max(src.x, 0.0) */ 1323 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1324 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1325 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1326 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1327 alu.src[1].chan = 0; 1328 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1329 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1330 r = r600_bc_add_alu(ctx->bc, &alu); 1331 if (r) 1332 return r; 1333 1334 /* dst.w, <- 1.0 */ 1335 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1336 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1337 alu.src[0].sel = V_SQ_ALU_SRC_1; 1338 alu.src[0].chan = 0; 1339 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1340 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1341 alu.last = 1; 1342 r = r600_bc_add_alu(ctx->bc, &alu); 1343 if (r) 1344 return r; 1345 1346 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1347 { 1348 int chan; 1349 int sel; 1350 1351 /* dst.z = log(src.y) */ 1352 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1353 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1354 r600_bc_src(&alu.src[0], &ctx->src[0], 1); 1355 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1356 alu.last = 1; 1357 r = r600_bc_add_alu(ctx->bc, &alu); 1358 if (r) 1359 return r; 1360 1361 chan = alu.dst.chan; 1362 sel = alu.dst.sel; 1363 1364 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1365 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1366 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1367 r600_bc_src(&alu.src[0], &ctx->src[0], 3); 1368 alu.src[1].sel = sel; 1369 alu.src[1].chan = chan; 1370 1371 r600_bc_src(&alu.src[2], &ctx->src[0], 0); 1372 alu.dst.sel = ctx->temp_reg; 1373 alu.dst.chan = 0; 1374 alu.dst.write = 1; 1375 alu.is_op3 = 1; 1376 alu.last = 1; 1377 r = r600_bc_add_alu(ctx->bc, &alu); 1378 if (r) 1379 return r; 1380 1381 /* dst.z = exp(tmp.x) */ 1382 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1383 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1384 alu.src[0].sel = ctx->temp_reg; 1385 alu.src[0].chan = 0; 1386 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1387 alu.last = 1; 1388 r = r600_bc_add_alu(ctx->bc, &alu); 1389 if (r) 1390 return r; 1391 } 1392 return 0; 1393} 1394 1395static int tgsi_rsq(struct r600_shader_ctx *ctx) 1396{ 1397 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1398 struct r600_bc_alu alu; 1399 int i, r; 1400 1401 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1402 1403 /* FIXME: 1404 * For state trackers other than OpenGL, we'll want to use 1405 * _RECIPSQRT_IEEE instead. 1406 */ 1407 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1408 1409 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1410 r600_bc_src(&alu.src[i], &ctx->src[i], 0); 1411 alu.src[i].abs = 1; 1412 } 1413 alu.dst.sel = ctx->temp_reg; 1414 alu.dst.write = 1; 1415 alu.last = 1; 1416 r = r600_bc_add_alu(ctx->bc, &alu); 1417 if (r) 1418 return r; 1419 /* replicate result */ 1420 return tgsi_helper_tempx_replicate(ctx); 1421} 1422 1423static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1424{ 1425 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1426 struct r600_bc_alu alu; 1427 int i, r; 1428 1429 for (i = 0; i < 4; i++) { 1430 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1431 alu.src[0].sel = ctx->temp_reg; 1432 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1433 alu.dst.chan = i; 1434 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1435 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1436 if (i == 3) 1437 alu.last = 1; 1438 r = r600_bc_add_alu(ctx->bc, &alu); 1439 if (r) 1440 return r; 1441 } 1442 return 0; 1443} 1444 1445static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1446{ 1447 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1448 struct r600_bc_alu alu; 1449 int i, r; 1450 1451 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1452 alu.inst = ctx->inst_info->r600_opcode; 1453 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1454 r600_bc_src(&alu.src[i], &ctx->src[i], 0); 1455 } 1456 alu.dst.sel = ctx->temp_reg; 1457 alu.dst.write = 1; 1458 alu.last = 1; 1459 r = r600_bc_add_alu(ctx->bc, &alu); 1460 if (r) 1461 return r; 1462 /* replicate result */ 1463 return tgsi_helper_tempx_replicate(ctx); 1464} 1465 1466static int tgsi_pow(struct r600_shader_ctx *ctx) 1467{ 1468 struct r600_bc_alu alu; 1469 int r; 1470 1471 /* LOG2(a) */ 1472 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1473 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1474 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1475 alu.dst.sel = ctx->temp_reg; 1476 alu.dst.write = 1; 1477 alu.last = 1; 1478 r = r600_bc_add_alu(ctx->bc, &alu); 1479 if (r) 1480 return r; 1481 /* b * LOG2(a) */ 1482 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1483 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1484 r600_bc_src(&alu.src[0], &ctx->src[1], 0); 1485 alu.src[1].sel = ctx->temp_reg; 1486 alu.dst.sel = ctx->temp_reg; 1487 alu.dst.write = 1; 1488 alu.last = 1; 1489 r = r600_bc_add_alu(ctx->bc, &alu); 1490 if (r) 1491 return r; 1492 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1493 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1494 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1495 alu.src[0].sel = ctx->temp_reg; 1496 alu.dst.sel = ctx->temp_reg; 1497 alu.dst.write = 1; 1498 alu.last = 1; 1499 r = r600_bc_add_alu(ctx->bc, &alu); 1500 if (r) 1501 return r; 1502 return tgsi_helper_tempx_replicate(ctx); 1503} 1504 1505static int tgsi_ssg(struct r600_shader_ctx *ctx) 1506{ 1507 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1508 struct r600_bc_alu alu; 1509 int i, r; 1510 1511 /* tmp = (src > 0 ? 1 : src) */ 1512 for (i = 0; i < 4; i++) { 1513 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1514 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1515 alu.is_op3 = 1; 1516 1517 alu.dst.sel = ctx->temp_reg; 1518 alu.dst.chan = i; 1519 1520 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1521 alu.src[1].sel = V_SQ_ALU_SRC_1; 1522 r600_bc_src(&alu.src[2], &ctx->src[0], i); 1523 1524 if (i == 3) 1525 alu.last = 1; 1526 r = r600_bc_add_alu(ctx->bc, &alu); 1527 if (r) 1528 return r; 1529 } 1530 1531 /* dst = (-tmp > 0 ? -1 : tmp) */ 1532 for (i = 0; i < 4; i++) { 1533 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1534 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1535 alu.is_op3 = 1; 1536 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1537 1538 alu.src[0].sel = ctx->temp_reg; 1539 alu.src[0].chan = i; 1540 alu.src[0].neg = 1; 1541 1542 alu.src[1].sel = V_SQ_ALU_SRC_1; 1543 alu.src[1].neg = 1; 1544 1545 alu.src[2].sel = ctx->temp_reg; 1546 alu.src[2].chan = i; 1547 1548 if (i == 3) 1549 alu.last = 1; 1550 r = r600_bc_add_alu(ctx->bc, &alu); 1551 if (r) 1552 return r; 1553 } 1554 return 0; 1555} 1556 1557static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1558{ 1559 struct r600_bc_alu alu; 1560 int i, r; 1561 1562 for (i = 0; i < 4; i++) { 1563 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1564 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1565 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1566 alu.dst.chan = i; 1567 } else { 1568 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1569 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1570 alu.src[0].sel = ctx->temp_reg; 1571 alu.src[0].chan = i; 1572 } 1573 if (i == 3) { 1574 alu.last = 1; 1575 } 1576 r = r600_bc_add_alu(ctx->bc, &alu); 1577 if (r) 1578 return r; 1579 } 1580 return 0; 1581} 1582 1583static int tgsi_op3(struct r600_shader_ctx *ctx) 1584{ 1585 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1586 struct r600_bc_alu alu; 1587 int i, j, r; 1588 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1589 1590 for (i = 0; i < lasti + 1; i++) { 1591 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1592 continue; 1593 1594 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1595 alu.inst = ctx->inst_info->r600_opcode; 1596 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1597 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1598 } 1599 1600 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1601 alu.dst.chan = i; 1602 alu.dst.write = 1; 1603 alu.is_op3 = 1; 1604 if (i == lasti) { 1605 alu.last = 1; 1606 } 1607 r = r600_bc_add_alu(ctx->bc, &alu); 1608 if (r) 1609 return r; 1610 } 1611 return 0; 1612} 1613 1614static int tgsi_dp(struct r600_shader_ctx *ctx) 1615{ 1616 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1617 struct r600_bc_alu alu; 1618 int i, j, r; 1619 1620 for (i = 0; i < 4; i++) { 1621 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1622 alu.inst = ctx->inst_info->r600_opcode; 1623 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1624 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1625 } 1626 1627 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1628 alu.dst.chan = i; 1629 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1630 /* handle some special cases */ 1631 switch (ctx->inst_info->tgsi_opcode) { 1632 case TGSI_OPCODE_DP2: 1633 if (i > 1) { 1634 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1635 alu.src[0].chan = alu.src[1].chan = 0; 1636 } 1637 break; 1638 case TGSI_OPCODE_DP3: 1639 if (i > 2) { 1640 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1641 alu.src[0].chan = alu.src[1].chan = 0; 1642 } 1643 break; 1644 case TGSI_OPCODE_DPH: 1645 if (i == 3) { 1646 alu.src[0].sel = V_SQ_ALU_SRC_1; 1647 alu.src[0].chan = 0; 1648 alu.src[0].neg = 0; 1649 } 1650 break; 1651 default: 1652 break; 1653 } 1654 if (i == 3) { 1655 alu.last = 1; 1656 } 1657 r = r600_bc_add_alu(ctx->bc, &alu); 1658 if (r) 1659 return r; 1660 } 1661 return 0; 1662} 1663 1664static int tgsi_tex(struct r600_shader_ctx *ctx) 1665{ 1666 static float one_point_five = 1.5f; 1667 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1668 struct r600_bc_tex tex; 1669 struct r600_bc_alu alu; 1670 unsigned src_gpr; 1671 int r, i; 1672 int opcode; 1673 boolean src_not_temp = 1674 inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && 1675 inst->Src[0].Register.File != TGSI_FILE_INPUT; 1676 1677 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1678 1679 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1680 /* Add perspective divide */ 1681 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1682 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1683 r600_bc_src(&alu.src[0], &ctx->src[0], 3); 1684 1685 alu.dst.sel = ctx->temp_reg; 1686 alu.dst.chan = 3; 1687 alu.last = 1; 1688 alu.dst.write = 1; 1689 r = r600_bc_add_alu(ctx->bc, &alu); 1690 if (r) 1691 return r; 1692 1693 for (i = 0; i < 3; i++) { 1694 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1695 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1696 alu.src[0].sel = ctx->temp_reg; 1697 alu.src[0].chan = 3; 1698 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1699 alu.dst.sel = ctx->temp_reg; 1700 alu.dst.chan = i; 1701 alu.dst.write = 1; 1702 r = r600_bc_add_alu(ctx->bc, &alu); 1703 if (r) 1704 return r; 1705 } 1706 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1707 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1708 alu.src[0].sel = V_SQ_ALU_SRC_1; 1709 alu.src[0].chan = 0; 1710 alu.dst.sel = ctx->temp_reg; 1711 alu.dst.chan = 3; 1712 alu.last = 1; 1713 alu.dst.write = 1; 1714 r = r600_bc_add_alu(ctx->bc, &alu); 1715 if (r) 1716 return r; 1717 src_not_temp = FALSE; 1718 src_gpr = ctx->temp_reg; 1719 } 1720 1721 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1722 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 1723 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 1724 1725 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1726 for (i = 0; i < 4; i++) { 1727 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1728 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1729 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 1730 r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 1731 alu.dst.sel = ctx->temp_reg; 1732 alu.dst.chan = i; 1733 if (i == 3) 1734 alu.last = 1; 1735 alu.dst.write = 1; 1736 r = r600_bc_add_alu(ctx->bc, &alu); 1737 if (r) 1738 return r; 1739 } 1740 1741 /* tmp1.z = RCP_e(|tmp1.z|) */ 1742 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1743 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1744 alu.src[0].sel = ctx->temp_reg; 1745 alu.src[0].chan = 2; 1746 alu.src[0].abs = 1; 1747 alu.dst.sel = ctx->temp_reg; 1748 alu.dst.chan = 2; 1749 alu.dst.write = 1; 1750 alu.last = 1; 1751 r = r600_bc_add_alu(ctx->bc, &alu); 1752 if (r) 1753 return r; 1754 1755 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1756 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1757 * muladd has no writemask, have to use another temp 1758 */ 1759 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1760 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1761 alu.is_op3 = 1; 1762 1763 alu.src[0].sel = ctx->temp_reg; 1764 alu.src[0].chan = 0; 1765 alu.src[1].sel = ctx->temp_reg; 1766 alu.src[1].chan = 2; 1767 1768 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1769 alu.src[2].chan = 0; 1770 alu.src[2].value = *(uint32_t *)&one_point_five; 1771 1772 alu.dst.sel = ctx->temp_reg; 1773 alu.dst.chan = 0; 1774 alu.dst.write = 1; 1775 1776 r = r600_bc_add_alu(ctx->bc, &alu); 1777 if (r) 1778 return r; 1779 1780 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1781 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1782 alu.is_op3 = 1; 1783 1784 alu.src[0].sel = ctx->temp_reg; 1785 alu.src[0].chan = 1; 1786 alu.src[1].sel = ctx->temp_reg; 1787 alu.src[1].chan = 2; 1788 1789 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1790 alu.src[2].chan = 0; 1791 alu.src[2].value = *(uint32_t *)&one_point_five; 1792 1793 alu.dst.sel = ctx->temp_reg; 1794 alu.dst.chan = 1; 1795 alu.dst.write = 1; 1796 1797 alu.last = 1; 1798 r = r600_bc_add_alu(ctx->bc, &alu); 1799 if (r) 1800 return r; 1801 1802 src_not_temp = FALSE; 1803 src_gpr = ctx->temp_reg; 1804 } 1805 1806 if (src_not_temp) { 1807 for (i = 0; i < 4; i++) { 1808 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1809 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1810 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1811 alu.dst.sel = ctx->temp_reg; 1812 alu.dst.chan = i; 1813 if (i == 3) 1814 alu.last = 1; 1815 alu.dst.write = 1; 1816 r = r600_bc_add_alu(ctx->bc, &alu); 1817 if (r) 1818 return r; 1819 } 1820 src_gpr = ctx->temp_reg; 1821 } 1822 1823 opcode = ctx->inst_info->r600_opcode; 1824 if (opcode == SQ_TEX_INST_SAMPLE && 1825 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 1826 opcode = SQ_TEX_INST_SAMPLE_C; 1827 1828 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1829 tex.inst = opcode; 1830 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1831 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 1832 tex.src_gpr = src_gpr; 1833 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1834 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 1835 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 1836 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 1837 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 1838 tex.src_sel_x = 0; 1839 tex.src_sel_y = 1; 1840 tex.src_sel_z = 2; 1841 tex.src_sel_w = 3; 1842 1843 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1844 tex.src_sel_x = 1; 1845 tex.src_sel_y = 0; 1846 tex.src_sel_z = 3; 1847 tex.src_sel_w = 1; 1848 } 1849 1850 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1851 tex.coord_type_x = 1; 1852 tex.coord_type_y = 1; 1853 tex.coord_type_z = 1; 1854 tex.coord_type_w = 1; 1855 } 1856 1857 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { 1858 tex.coord_type_z = 0; 1859 tex.src_sel_z = 1; 1860 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) 1861 tex.coord_type_z = 0; 1862 1863 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 1864 tex.src_sel_w = 2; 1865 1866 r = r600_bc_add_tex(ctx->bc, &tex); 1867 if (r) 1868 return r; 1869 1870 /* add shadow ambient support - gallium doesn't do it yet */ 1871 return 0; 1872} 1873 1874static int tgsi_lrp(struct r600_shader_ctx *ctx) 1875{ 1876 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1877 struct r600_bc_alu alu; 1878 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1879 unsigned i; 1880 int r; 1881 1882 /* optimize if it's just an equal balance */ 1883 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 1884 for (i = 0; i < lasti + 1; i++) { 1885 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1886 continue; 1887 1888 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1889 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1890 r600_bc_src(&alu.src[0], &ctx->src[1], i); 1891 r600_bc_src(&alu.src[1], &ctx->src[2], i); 1892 alu.omod = 3; 1893 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1894 alu.dst.chan = i; 1895 if (i == lasti) { 1896 alu.last = 1; 1897 } 1898 r = r600_bc_add_alu(ctx->bc, &alu); 1899 if (r) 1900 return r; 1901 } 1902 return 0; 1903 } 1904 1905 /* 1 - src0 */ 1906 for (i = 0; i < lasti + 1; i++) { 1907 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1908 continue; 1909 1910 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1911 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1912 alu.src[0].sel = V_SQ_ALU_SRC_1; 1913 alu.src[0].chan = 0; 1914 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1915 alu.src[1].neg = 1; 1916 alu.dst.sel = ctx->temp_reg; 1917 alu.dst.chan = i; 1918 if (i == lasti) { 1919 alu.last = 1; 1920 } 1921 alu.dst.write = 1; 1922 r = r600_bc_add_alu(ctx->bc, &alu); 1923 if (r) 1924 return r; 1925 } 1926 1927 /* (1 - src0) * src2 */ 1928 for (i = 0; i < lasti + 1; i++) { 1929 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1930 continue; 1931 1932 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1933 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1934 alu.src[0].sel = ctx->temp_reg; 1935 alu.src[0].chan = i; 1936 r600_bc_src(&alu.src[1], &ctx->src[2], i); 1937 alu.dst.sel = ctx->temp_reg; 1938 alu.dst.chan = i; 1939 if (i == lasti) { 1940 alu.last = 1; 1941 } 1942 alu.dst.write = 1; 1943 r = r600_bc_add_alu(ctx->bc, &alu); 1944 if (r) 1945 return r; 1946 } 1947 1948 /* src0 * src1 + (1 - src0) * src2 */ 1949 for (i = 0; i < lasti + 1; i++) { 1950 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1951 continue; 1952 1953 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1954 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1955 alu.is_op3 = 1; 1956 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1957 r600_bc_src(&alu.src[1], &ctx->src[1], i); 1958 alu.src[2].sel = ctx->temp_reg; 1959 alu.src[2].chan = i; 1960 1961 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1962 alu.dst.chan = i; 1963 if (i == lasti) { 1964 alu.last = 1; 1965 } 1966 r = r600_bc_add_alu(ctx->bc, &alu); 1967 if (r) 1968 return r; 1969 } 1970 return 0; 1971} 1972 1973static int tgsi_cmp(struct r600_shader_ctx *ctx) 1974{ 1975 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1976 struct r600_bc_alu alu; 1977 int i, r; 1978 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1979 1980 for (i = 0; i < lasti + 1; i++) { 1981 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1982 continue; 1983 1984 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1985 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 1986 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1987 r600_bc_src(&alu.src[1], &ctx->src[2], i); 1988 r600_bc_src(&alu.src[2], &ctx->src[1], i); 1989 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1990 alu.dst.chan = i; 1991 alu.dst.write = 1; 1992 alu.is_op3 = 1; 1993 if (i == lasti) 1994 alu.last = 1; 1995 r = r600_bc_add_alu(ctx->bc, &alu); 1996 if (r) 1997 return r; 1998 } 1999 return 0; 2000} 2001 2002static int tgsi_xpd(struct r600_shader_ctx *ctx) 2003{ 2004 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2005 static const unsigned int src0_swizzle[] = {2, 0, 1}; 2006 static const unsigned int src1_swizzle[] = {1, 2, 0}; 2007 struct r600_bc_alu alu; 2008 uint32_t use_temp = 0; 2009 int i, r; 2010 2011 if (inst->Dst[0].Register.WriteMask != 0xf) 2012 use_temp = 1; 2013 2014 for (i = 0; i < 4; i++) { 2015 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2016 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2017 if (i < 3) { 2018 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 2019 r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 2020 } else { 2021 alu.src[0].sel = V_SQ_ALU_SRC_0; 2022 alu.src[0].chan = i; 2023 alu.src[1].sel = V_SQ_ALU_SRC_0; 2024 alu.src[1].chan = i; 2025 } 2026 2027 alu.dst.sel = ctx->temp_reg; 2028 alu.dst.chan = i; 2029 alu.dst.write = 1; 2030 2031 if (i == 3) 2032 alu.last = 1; 2033 r = r600_bc_add_alu(ctx->bc, &alu); 2034 if (r) 2035 return r; 2036 } 2037 2038 for (i = 0; i < 4; i++) { 2039 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2040 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2041 2042 if (i < 3) { 2043 r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 2044 r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 2045 } else { 2046 alu.src[0].sel = V_SQ_ALU_SRC_0; 2047 alu.src[0].chan = i; 2048 alu.src[1].sel = V_SQ_ALU_SRC_0; 2049 alu.src[1].chan = i; 2050 } 2051 2052 alu.src[2].sel = ctx->temp_reg; 2053 alu.src[2].neg = 1; 2054 alu.src[2].chan = i; 2055 2056 if (use_temp) 2057 alu.dst.sel = ctx->temp_reg; 2058 else 2059 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2060 alu.dst.chan = i; 2061 alu.dst.write = 1; 2062 alu.is_op3 = 1; 2063 if (i == 3) 2064 alu.last = 1; 2065 r = r600_bc_add_alu(ctx->bc, &alu); 2066 if (r) 2067 return r; 2068 } 2069 if (use_temp) 2070 return tgsi_helper_copy(ctx, inst); 2071 return 0; 2072} 2073 2074static int tgsi_exp(struct r600_shader_ctx *ctx) 2075{ 2076 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2077 struct r600_bc_alu alu; 2078 int r; 2079 2080 /* result.x = 2^floor(src); */ 2081 if (inst->Dst[0].Register.WriteMask & 1) { 2082 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2083 2084 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2085 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2086 2087 alu.dst.sel = ctx->temp_reg; 2088 alu.dst.chan = 0; 2089 alu.dst.write = 1; 2090 alu.last = 1; 2091 r = r600_bc_add_alu(ctx->bc, &alu); 2092 if (r) 2093 return r; 2094 2095 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2096 alu.src[0].sel = ctx->temp_reg; 2097 alu.src[0].chan = 0; 2098 2099 alu.dst.sel = ctx->temp_reg; 2100 alu.dst.chan = 0; 2101 alu.dst.write = 1; 2102 alu.last = 1; 2103 r = r600_bc_add_alu(ctx->bc, &alu); 2104 if (r) 2105 return r; 2106 } 2107 2108 /* result.y = tmp - floor(tmp); */ 2109 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2110 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2111 2112 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2113 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2114 2115 alu.dst.sel = ctx->temp_reg; 2116// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2117// if (r) 2118// return r; 2119 alu.dst.write = 1; 2120 alu.dst.chan = 1; 2121 2122 alu.last = 1; 2123 2124 r = r600_bc_add_alu(ctx->bc, &alu); 2125 if (r) 2126 return r; 2127 } 2128 2129 /* result.z = RoughApprox2ToX(tmp);*/ 2130 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2131 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2132 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2133 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2134 2135 alu.dst.sel = ctx->temp_reg; 2136 alu.dst.write = 1; 2137 alu.dst.chan = 2; 2138 2139 alu.last = 1; 2140 2141 r = r600_bc_add_alu(ctx->bc, &alu); 2142 if (r) 2143 return r; 2144 } 2145 2146 /* result.w = 1.0;*/ 2147 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2148 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2149 2150 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2151 alu.src[0].sel = V_SQ_ALU_SRC_1; 2152 alu.src[0].chan = 0; 2153 2154 alu.dst.sel = ctx->temp_reg; 2155 alu.dst.chan = 3; 2156 alu.dst.write = 1; 2157 alu.last = 1; 2158 r = r600_bc_add_alu(ctx->bc, &alu); 2159 if (r) 2160 return r; 2161 } 2162 return tgsi_helper_copy(ctx, inst); 2163} 2164 2165static int tgsi_log(struct r600_shader_ctx *ctx) 2166{ 2167 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2168 struct r600_bc_alu alu; 2169 int r; 2170 2171 /* result.x = floor(log2(src)); */ 2172 if (inst->Dst[0].Register.WriteMask & 1) { 2173 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2174 2175 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2176 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2177 2178 alu.dst.sel = ctx->temp_reg; 2179 alu.dst.chan = 0; 2180 alu.dst.write = 1; 2181 alu.last = 1; 2182 r = r600_bc_add_alu(ctx->bc, &alu); 2183 if (r) 2184 return r; 2185 2186 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2187 alu.src[0].sel = ctx->temp_reg; 2188 alu.src[0].chan = 0; 2189 2190 alu.dst.sel = ctx->temp_reg; 2191 alu.dst.chan = 0; 2192 alu.dst.write = 1; 2193 alu.last = 1; 2194 2195 r = r600_bc_add_alu(ctx->bc, &alu); 2196 if (r) 2197 return r; 2198 } 2199 2200 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2201 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2202 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2203 2204 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2205 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2206 2207 alu.dst.sel = ctx->temp_reg; 2208 alu.dst.chan = 1; 2209 alu.dst.write = 1; 2210 alu.last = 1; 2211 2212 r = r600_bc_add_alu(ctx->bc, &alu); 2213 if (r) 2214 return r; 2215 2216 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2217 2218 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2219 alu.src[0].sel = ctx->temp_reg; 2220 alu.src[0].chan = 1; 2221 2222 alu.dst.sel = ctx->temp_reg; 2223 alu.dst.chan = 1; 2224 alu.dst.write = 1; 2225 alu.last = 1; 2226 2227 r = r600_bc_add_alu(ctx->bc, &alu); 2228 if (r) 2229 return r; 2230 2231 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2232 2233 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2234 alu.src[0].sel = ctx->temp_reg; 2235 alu.src[0].chan = 1; 2236 2237 alu.dst.sel = ctx->temp_reg; 2238 alu.dst.chan = 1; 2239 alu.dst.write = 1; 2240 alu.last = 1; 2241 2242 r = r600_bc_add_alu(ctx->bc, &alu); 2243 if (r) 2244 return r; 2245 2246 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2247 2248 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2249 alu.src[0].sel = ctx->temp_reg; 2250 alu.src[0].chan = 1; 2251 2252 alu.dst.sel = ctx->temp_reg; 2253 alu.dst.chan = 1; 2254 alu.dst.write = 1; 2255 alu.last = 1; 2256 2257 r = r600_bc_add_alu(ctx->bc, &alu); 2258 if (r) 2259 return r; 2260 2261 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2262 2263 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2264 2265 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2266 2267 alu.src[1].sel = ctx->temp_reg; 2268 alu.src[1].chan = 1; 2269 2270 alu.dst.sel = ctx->temp_reg; 2271 alu.dst.chan = 1; 2272 alu.dst.write = 1; 2273 alu.last = 1; 2274 2275 r = r600_bc_add_alu(ctx->bc, &alu); 2276 if (r) 2277 return r; 2278 } 2279 2280 /* result.z = log2(src);*/ 2281 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2282 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2283 2284 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2285 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2286 2287 alu.dst.sel = ctx->temp_reg; 2288 alu.dst.write = 1; 2289 alu.dst.chan = 2; 2290 alu.last = 1; 2291 2292 r = r600_bc_add_alu(ctx->bc, &alu); 2293 if (r) 2294 return r; 2295 } 2296 2297 /* result.w = 1.0; */ 2298 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2299 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2300 2301 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2302 alu.src[0].sel = V_SQ_ALU_SRC_1; 2303 alu.src[0].chan = 0; 2304 2305 alu.dst.sel = ctx->temp_reg; 2306 alu.dst.chan = 3; 2307 alu.dst.write = 1; 2308 alu.last = 1; 2309 2310 r = r600_bc_add_alu(ctx->bc, &alu); 2311 if (r) 2312 return r; 2313 } 2314 2315 return tgsi_helper_copy(ctx, inst); 2316} 2317 2318static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2319{ 2320 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2321 struct r600_bc_alu alu; 2322 int r; 2323 2324 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2325 2326 switch (inst->Instruction.Opcode) { 2327 case TGSI_OPCODE_ARL: 2328 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2329 break; 2330 case TGSI_OPCODE_ARR: 2331 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2332 break; 2333 default: 2334 assert(0); 2335 return -1; 2336 } 2337 2338 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2339 alu.last = 1; 2340 alu.dst.sel = ctx->ar_reg; 2341 alu.dst.write = 1; 2342 r = r600_bc_add_alu(ctx->bc, &alu); 2343 if (r) 2344 return r; 2345 2346 /* TODO: Note that the MOVA can be avoided if we never use AR for 2347 * indexing non-CB registers in the current ALU clause. Similarly, we 2348 * need to load AR from ar_reg again if we started a new clause 2349 * between ARL and AR usage. The easy way to do that is to remove 2350 * the MOVA here, and load it for the first AR access after ar_reg 2351 * has been modified in each clause. */ 2352 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2353 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2354 alu.src[0].sel = ctx->ar_reg; 2355 alu.src[0].chan = 0; 2356 alu.last = 1; 2357 r = r600_bc_add_alu(ctx->bc, &alu); 2358 if (r) 2359 return r; 2360 return 0; 2361} 2362static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2363{ 2364 /* TODO from r600c, ar values don't persist between clauses */ 2365 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2366 struct r600_bc_alu alu; 2367 int r; 2368 2369 switch (inst->Instruction.Opcode) { 2370 case TGSI_OPCODE_ARL: 2371 memset(&alu, 0, sizeof(alu)); 2372 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 2373 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2374 alu.dst.sel = ctx->ar_reg; 2375 alu.dst.write = 1; 2376 alu.last = 1; 2377 2378 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2379 return r; 2380 2381 memset(&alu, 0, sizeof(alu)); 2382 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2383 alu.src[0].sel = ctx->ar_reg; 2384 alu.dst.sel = ctx->ar_reg; 2385 alu.dst.write = 1; 2386 alu.last = 1; 2387 2388 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2389 return r; 2390 break; 2391 case TGSI_OPCODE_ARR: 2392 memset(&alu, 0, sizeof(alu)); 2393 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2394 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2395 alu.dst.sel = ctx->ar_reg; 2396 alu.dst.write = 1; 2397 alu.last = 1; 2398 2399 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2400 return r; 2401 break; 2402 default: 2403 assert(0); 2404 return -1; 2405 } 2406 2407 memset(&alu, 0, sizeof(alu)); 2408 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2409 alu.src[0].sel = ctx->ar_reg; 2410 alu.last = 1; 2411 2412 r = r600_bc_add_alu(ctx->bc, &alu); 2413 if (r) 2414 return r; 2415 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2416 return 0; 2417} 2418 2419static int tgsi_opdst(struct r600_shader_ctx *ctx) 2420{ 2421 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2422 struct r600_bc_alu alu; 2423 int i, r = 0; 2424 2425 for (i = 0; i < 4; i++) { 2426 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2427 2428 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2429 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2430 2431 if (i == 0 || i == 3) { 2432 alu.src[0].sel = V_SQ_ALU_SRC_1; 2433 } else { 2434 r600_bc_src(&alu.src[0], &ctx->src[0], i); 2435 } 2436 2437 if (i == 0 || i == 2) { 2438 alu.src[1].sel = V_SQ_ALU_SRC_1; 2439 } else { 2440 r600_bc_src(&alu.src[1], &ctx->src[1], i); 2441 } 2442 if (i == 3) 2443 alu.last = 1; 2444 r = r600_bc_add_alu(ctx->bc, &alu); 2445 if (r) 2446 return r; 2447 } 2448 return 0; 2449} 2450 2451static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2452{ 2453 struct r600_bc_alu alu; 2454 int r; 2455 2456 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2457 alu.inst = opcode; 2458 alu.predicate = 1; 2459 2460 alu.dst.sel = ctx->temp_reg; 2461 alu.dst.write = 1; 2462 alu.dst.chan = 0; 2463 2464 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2465 alu.src[1].sel = V_SQ_ALU_SRC_0; 2466 alu.src[1].chan = 0; 2467 2468 alu.last = 1; 2469 2470 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2471 if (r) 2472 return r; 2473 return 0; 2474} 2475 2476static int pops(struct r600_shader_ctx *ctx, int pops) 2477{ 2478 int alu_pop = 3; 2479 if (ctx->bc->cf_last) { 2480 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) 2481 alu_pop = 0; 2482 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) 2483 alu_pop = 1; 2484 } 2485 alu_pop += pops; 2486 if (alu_pop == 1) { 2487 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; 2488 ctx->bc->force_add_cf = 1; 2489 } else if (alu_pop == 2) { 2490 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; 2491 ctx->bc->force_add_cf = 1; 2492 } else { 2493 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2494 ctx->bc->cf_last->pop_count = pops; 2495 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 2496 } 2497 return 0; 2498} 2499 2500static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2501{ 2502 switch(reason) { 2503 case FC_PUSH_VPM: 2504 ctx->bc->callstack[ctx->bc->call_sp].current--; 2505 break; 2506 case FC_PUSH_WQM: 2507 case FC_LOOP: 2508 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2509 break; 2510 case FC_REP: 2511 /* TOODO : for 16 vp asic should -= 2; */ 2512 ctx->bc->callstack[ctx->bc->call_sp].current --; 2513 break; 2514 } 2515} 2516 2517static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2518{ 2519 if (check_max_only) { 2520 int diff; 2521 switch (reason) { 2522 case FC_PUSH_VPM: 2523 diff = 1; 2524 break; 2525 case FC_PUSH_WQM: 2526 diff = 4; 2527 break; 2528 default: 2529 assert(0); 2530 diff = 0; 2531 } 2532 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2533 ctx->bc->callstack[ctx->bc->call_sp].max) { 2534 ctx->bc->callstack[ctx->bc->call_sp].max = 2535 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2536 } 2537 return; 2538 } 2539 switch (reason) { 2540 case FC_PUSH_VPM: 2541 ctx->bc->callstack[ctx->bc->call_sp].current++; 2542 break; 2543 case FC_PUSH_WQM: 2544 case FC_LOOP: 2545 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2546 break; 2547 case FC_REP: 2548 ctx->bc->callstack[ctx->bc->call_sp].current++; 2549 break; 2550 } 2551 2552 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2553 ctx->bc->callstack[ctx->bc->call_sp].max) { 2554 ctx->bc->callstack[ctx->bc->call_sp].max = 2555 ctx->bc->callstack[ctx->bc->call_sp].current; 2556 } 2557} 2558 2559static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2560{ 2561 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2562 2563 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2564 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2565 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2566 sp->num_mid++; 2567} 2568 2569static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2570{ 2571 ctx->bc->fc_sp++; 2572 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2573 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2574} 2575 2576static void fc_poplevel(struct r600_shader_ctx *ctx) 2577{ 2578 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2579 if (sp->mid) { 2580 free(sp->mid); 2581 sp->mid = NULL; 2582 } 2583 sp->num_mid = 0; 2584 sp->start = NULL; 2585 sp->type = 0; 2586 ctx->bc->fc_sp--; 2587} 2588 2589#if 0 2590static int emit_return(struct r600_shader_ctx *ctx) 2591{ 2592 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2593 return 0; 2594} 2595 2596static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2597{ 2598 2599 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2600 ctx->bc->cf_last->pop_count = pops; 2601 /* TODO work out offset */ 2602 return 0; 2603} 2604 2605static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2606{ 2607 return 0; 2608} 2609 2610static void emit_testflag(struct r600_shader_ctx *ctx) 2611{ 2612 2613} 2614 2615static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2616{ 2617 emit_testflag(ctx); 2618 emit_jump_to_offset(ctx, 1, 4); 2619 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2620 pops(ctx, ifidx + 1); 2621 emit_return(ctx); 2622} 2623 2624static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2625{ 2626 emit_testflag(ctx); 2627 2628 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2629 ctx->bc->cf_last->pop_count = 1; 2630 2631 fc_set_mid(ctx, fc_sp); 2632 2633 pops(ctx, 1); 2634} 2635#endif 2636 2637static int tgsi_if(struct r600_shader_ctx *ctx) 2638{ 2639 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2640 2641 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2642 2643 fc_pushlevel(ctx, FC_IF); 2644 2645 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2646 return 0; 2647} 2648 2649static int tgsi_else(struct r600_shader_ctx *ctx) 2650{ 2651 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2652 ctx->bc->cf_last->pop_count = 1; 2653 2654 fc_set_mid(ctx, ctx->bc->fc_sp); 2655 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2656 return 0; 2657} 2658 2659static int tgsi_endif(struct r600_shader_ctx *ctx) 2660{ 2661 pops(ctx, 1); 2662 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2663 R600_ERR("if/endif unbalanced in shader\n"); 2664 return -1; 2665 } 2666 2667 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2668 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2669 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2670 } else { 2671 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2672 } 2673 fc_poplevel(ctx); 2674 2675 callstack_decrease_current(ctx, FC_PUSH_VPM); 2676 return 0; 2677} 2678 2679static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2680{ 2681 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2682 2683 fc_pushlevel(ctx, FC_LOOP); 2684 2685 /* check stack depth */ 2686 callstack_check_depth(ctx, FC_LOOP, 0); 2687 return 0; 2688} 2689 2690static int tgsi_endloop(struct r600_shader_ctx *ctx) 2691{ 2692 int i; 2693 2694 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 2695 2696 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2697 R600_ERR("loop/endloop in shader code are not paired.\n"); 2698 return -EINVAL; 2699 } 2700 2701 /* fixup loop pointers - from r600isa 2702 LOOP END points to CF after LOOP START, 2703 LOOP START point to CF after LOOP END 2704 BRK/CONT point to LOOP END CF 2705 */ 2706 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2707 2708 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2709 2710 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2711 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2712 } 2713 /* TODO add LOOPRET support */ 2714 fc_poplevel(ctx); 2715 callstack_decrease_current(ctx, FC_LOOP); 2716 return 0; 2717} 2718 2719static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2720{ 2721 unsigned int fscp; 2722 2723 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2724 { 2725 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2726 break; 2727 } 2728 2729 if (fscp == 0) { 2730 R600_ERR("Break not inside loop/endloop pair\n"); 2731 return -EINVAL; 2732 } 2733 2734 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2735 ctx->bc->cf_last->pop_count = 1; 2736 2737 fc_set_mid(ctx, fscp); 2738 2739 pops(ctx, 1); 2740 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2741 return 0; 2742} 2743 2744static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 2745 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 2746 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2747 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2748 2749 /* FIXME: 2750 * For state trackers other than OpenGL, we'll want to use 2751 * _RECIP_IEEE instead. 2752 */ 2753 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 2754 2755 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 2756 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2757 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 2758 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2759 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2760 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2761 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2762 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2763 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2764 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2765 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2766 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2767 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2768 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2769 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2770 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2771 /* gap */ 2772 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2773 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2774 /* gap */ 2775 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2776 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2777 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2778 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2779 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2780 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2781 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2782 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2783 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2784 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2785 /* gap */ 2786 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2787 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2788 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2789 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2790 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2791 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2792 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2793 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2794 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2795 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2796 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2797 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2798 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2799 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2800 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2801 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2802 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2803 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2804 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2805 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2806 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2807 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2808 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2809 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2810 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2811 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2812 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2813 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2814 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2815 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 2816 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2817 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2818 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2819 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2820 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2821 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2822 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2823 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2824 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2825 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2826 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2827 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2828 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2829 /* gap */ 2830 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2831 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2832 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2833 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2834 /* gap */ 2835 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2836 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2837 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2838 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2839 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2840 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2841 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2842 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 2843 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2844 /* gap */ 2845 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2846 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2847 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2848 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2849 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2850 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2851 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2852 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2853 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2854 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2855 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2856 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2857 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2858 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2859 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2860 /* gap */ 2861 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2862 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2863 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2864 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2865 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2866 /* gap */ 2867 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2868 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2869 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2870 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2871 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2872 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2873 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2874 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2875 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2876 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2877 /* gap */ 2878 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2879 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2880 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2881 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2882 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2883 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2884 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2885 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2886 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2887 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2888 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2889 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2890 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2891 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2892 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2893 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2894 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2895 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2896 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2897 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2898 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2899 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2900 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2901 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2902 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2903 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2904 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2905 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2906}; 2907 2908static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 2909 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 2910 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2911 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2912 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 2913 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 2914 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2915 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2916 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2917 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2918 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2919 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2920 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2921 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2922 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2923 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2924 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2925 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2926 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2927 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2928 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2929 /* gap */ 2930 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2931 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2932 /* gap */ 2933 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2934 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2935 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2936 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2937 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2938 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2939 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2940 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2941 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2942 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2943 /* gap */ 2944 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2945 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2946 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2947 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2948 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2949 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2950 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2951 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2952 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2953 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2954 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2955 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2956 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2957 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2958 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2959 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2960 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2961 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2962 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2963 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2964 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2965 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2966 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2967 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2968 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2969 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2970 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2971 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2972 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2973 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 2974 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2975 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2976 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2977 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2978 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2979 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2980 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2981 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2982 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2983 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2984 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2985 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2986 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2987 /* gap */ 2988 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2989 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2990 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2991 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2992 /* gap */ 2993 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2994 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2995 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2996 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2997 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2998 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2999 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3000 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3001 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3002 /* gap */ 3003 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3004 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3005 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3006 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3007 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3008 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3009 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3010 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3011 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3012 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3013 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3014 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3015 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3016 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3017 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3018 /* gap */ 3019 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3020 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3021 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3022 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3023 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3024 /* gap */ 3025 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3026 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3027 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3028 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3029 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3030 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3031 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3032 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3033 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3034 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3035 /* gap */ 3036 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3037 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3038 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3039 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3040 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3041 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3042 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3043 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3044 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3045 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3046 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3047 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3048 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3049 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3050 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3051 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3052 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3053 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3054 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3055 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3056 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3057 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3058 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3059 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3060 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3061 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3062 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3063 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3064}; 3065