r600_shader.c revision d84ab821c5f5bfe9f6a57e434af9ca06d54f45b3
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_info.h" 25#include "tgsi/tgsi_parse.h" 26#include "tgsi/tgsi_scan.h" 27#include "tgsi/tgsi_dump.h" 28#include "util/u_format.h" 29#include "r600_pipe.h" 30#include "r600_asm.h" 31#include "r600_sq.h" 32#include "r600_formats.h" 33#include "r600_opcodes.h" 34#include "r600d.h" 35#include <stdio.h> 36#include <errno.h> 37#include <byteswap.h> 38 39/* CAYMAN notes 40Why CAYMAN got loops for lots of instructions is explained here. 41 42-These 8xx t-slot only ops are implemented in all vector slots. 43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 44These 8xx t-slot only opcodes become vector ops, with all four 45slots expecting the arguments on sources a and b. Result is 46broadcast to all channels. 47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT 48These 8xx t-slot only opcodes become vector ops in the z, y, and 49x slots. 50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 52SQRT_IEEE/_64 53SIN/COS 54The w slot may have an independent co-issued operation, or if the 55result is required to be in the w slot, the opcode above may be 56issued in the w slot as well. 57The compiler must issue the source argument to slots z, y, and x 58*/ 59 60static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 61{ 62 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 63 struct r600_shader *rshader = &shader->shader; 64 uint32_t *ptr; 65 int i; 66 67 /* copy new shader */ 68 if (shader->bo == NULL) { 69 shader->bo = (struct r600_resource*) 70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4); 71 if (shader->bo == NULL) { 72 return -ENOMEM; 73 } 74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->buf, rctx->ctx.cs, PIPE_TRANSFER_WRITE); 75 if (R600_BIG_ENDIAN) { 76 for (i = 0; i < rshader->bc.ndw; ++i) { 77 ptr[i] = bswap_32(rshader->bc.bytecode[i]); 78 } 79 } else { 80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr)); 81 } 82 rctx->ws->buffer_unmap(shader->bo->buf); 83 } 84 /* build state */ 85 switch (rshader->processor_type) { 86 case TGSI_PROCESSOR_VERTEX: 87 if (rctx->chip_class >= EVERGREEN) { 88 evergreen_pipe_shader_vs(ctx, shader); 89 } else { 90 r600_pipe_shader_vs(ctx, shader); 91 } 92 break; 93 case TGSI_PROCESSOR_FRAGMENT: 94 if (rctx->chip_class >= EVERGREEN) { 95 evergreen_pipe_shader_ps(ctx, shader); 96 } else { 97 r600_pipe_shader_ps(ctx, shader); 98 } 99 break; 100 default: 101 return -EINVAL; 102 } 103 return 0; 104} 105 106static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader); 107 108int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader) 109{ 110 static int dump_shaders = -1; 111 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 112 int r; 113 114 /* Would like some magic "get_bool_option_once" routine. 115 */ 116 if (dump_shaders == -1) 117 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 118 119 if (dump_shaders) { 120 fprintf(stderr, "--------------------------------------------------------------\n"); 121 tgsi_dump(shader->tokens, 0); 122 123 if (shader->so.num_outputs) { 124 unsigned i; 125 fprintf(stderr, "STREAMOUT\n"); 126 for (i = 0; i < shader->so.num_outputs; i++) { 127 unsigned mask = ((1 << shader->so.output[i].num_components) - 1) << 128 shader->so.output[i].start_component; 129 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i, 130 shader->so.output[i].output_buffer, shader->so.output[i].register_index, 131 mask & 1 ? "x" : "_", 132 (mask >> 1) & 1 ? "y" : "_", 133 (mask >> 2) & 1 ? "z" : "_", 134 (mask >> 3) & 1 ? "w" : "_"); 135 } 136 } 137 } 138 r = r600_shader_from_tgsi(rctx, shader); 139 if (r) { 140 R600_ERR("translation from TGSI failed !\n"); 141 return r; 142 } 143 r = r600_bytecode_build(&shader->shader.bc); 144 if (r) { 145 R600_ERR("building bytecode failed !\n"); 146 return r; 147 } 148 if (dump_shaders) { 149 r600_bytecode_dump(&shader->shader.bc); 150 fprintf(stderr, "______________________________________________________________\n"); 151 } 152 return r600_pipe_shader(ctx, shader); 153} 154 155void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 156{ 157 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 158 r600_bytecode_clear(&shader->shader.bc); 159 160 memset(&shader->shader,0,sizeof(struct r600_shader)); 161} 162 163/* 164 * tgsi -> r600 shader 165 */ 166struct r600_shader_tgsi_instruction; 167 168struct r600_shader_src { 169 unsigned sel; 170 unsigned swizzle[4]; 171 unsigned neg; 172 unsigned abs; 173 unsigned rel; 174 uint32_t value[4]; 175}; 176 177struct r600_shader_ctx { 178 struct tgsi_shader_info info; 179 struct tgsi_parse_context parse; 180 const struct tgsi_token *tokens; 181 unsigned type; 182 unsigned file_offset[TGSI_FILE_COUNT]; 183 unsigned temp_reg; 184 struct r600_shader_tgsi_instruction *inst_info; 185 struct r600_bytecode *bc; 186 struct r600_shader *shader; 187 struct r600_shader_src src[4]; 188 u32 *literals; 189 u32 nliterals; 190 u32 max_driver_temp_used; 191 /* needed for evergreen interpolation */ 192 boolean input_centroid; 193 boolean input_linear; 194 boolean input_perspective; 195 int num_interp_gpr; 196}; 197 198struct r600_shader_tgsi_instruction { 199 unsigned tgsi_opcode; 200 unsigned is_op3; 201 unsigned r600_opcode; 202 int (*process)(struct r600_shader_ctx *ctx); 203}; 204 205static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 206static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 207 208static int tgsi_is_supported(struct r600_shader_ctx *ctx) 209{ 210 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 211 int j; 212 213 if (i->Instruction.NumDstRegs > 1) { 214 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 215 return -EINVAL; 216 } 217 if (i->Instruction.Predicate) { 218 R600_ERR("predicate unsupported\n"); 219 return -EINVAL; 220 } 221#if 0 222 if (i->Instruction.Label) { 223 R600_ERR("label unsupported\n"); 224 return -EINVAL; 225 } 226#endif 227 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 228 if (i->Src[j].Register.Dimension) { 229 R600_ERR("unsupported src %d (dimension %d)\n", j, 230 i->Src[j].Register.Dimension); 231 return -EINVAL; 232 } 233 } 234 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 235 if (i->Dst[j].Register.Dimension) { 236 R600_ERR("unsupported dst (dimension)\n"); 237 return -EINVAL; 238 } 239 } 240 return 0; 241} 242 243static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 244{ 245 int i, r; 246 struct r600_bytecode_alu alu; 247 int gpr = 0, base_chan = 0; 248 int ij_index = 0; 249 250 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 251 ij_index = 0; 252 if (ctx->shader->input[input].centroid) 253 ij_index++; 254 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 255 ij_index = 0; 256 /* if we have perspective add one */ 257 if (ctx->input_perspective) { 258 ij_index++; 259 /* if we have perspective centroid */ 260 if (ctx->input_centroid) 261 ij_index++; 262 } 263 if (ctx->shader->input[input].centroid) 264 ij_index++; 265 } 266 267 /* work out gpr and base_chan from index */ 268 gpr = ij_index / 2; 269 base_chan = (2 * (ij_index % 2)) + 1; 270 271 for (i = 0; i < 8; i++) { 272 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 273 274 if (i < 4) 275 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 276 else 277 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 278 279 if ((i > 1) && (i < 6)) { 280 alu.dst.sel = ctx->shader->input[input].gpr; 281 alu.dst.write = 1; 282 } 283 284 alu.dst.chan = i % 4; 285 286 alu.src[0].sel = gpr; 287 alu.src[0].chan = (base_chan - (i % 2)); 288 289 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 290 291 alu.bank_swizzle_force = SQ_ALU_VEC_210; 292 if ((i % 4) == 3) 293 alu.last = 1; 294 r = r600_bytecode_add_alu(ctx->bc, &alu); 295 if (r) 296 return r; 297 } 298 return 0; 299} 300 301static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 302{ 303 int i, r; 304 struct r600_bytecode_alu alu; 305 306 for (i = 0; i < 4; i++) { 307 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 308 309 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_LOAD_P0; 310 311 alu.dst.sel = ctx->shader->input[input].gpr; 312 alu.dst.write = 1; 313 314 alu.dst.chan = i; 315 316 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 317 alu.src[0].chan = i; 318 319 if (i == 3) 320 alu.last = 1; 321 r = r600_bytecode_add_alu(ctx->bc, &alu); 322 if (r) 323 return r; 324 } 325 return 0; 326} 327 328/* 329 * Special export handling in shaders 330 * 331 * shader export ARRAY_BASE for EXPORT_POS: 332 * 60 is position 333 * 61 is misc vector 334 * 62, 63 are clip distance vectors 335 * 336 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 337 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 338 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 339 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 340 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 341 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 342 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 343 * exclusive from render target index) 344 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 345 * 346 * 347 * shader export ARRAY_BASE for EXPORT_PIXEL: 348 * 0-7 CB targets 349 * 61 computed Z vector 350 * 351 * The use of the values exported in the computed Z vector are controlled 352 * by DB_SHADER_CONTROL: 353 * Z_EXPORT_ENABLE - Z as a float in RED 354 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 355 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 356 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 357 * DB_SOURCE_FORMAT - export control restrictions 358 * 359 */ 360 361 362/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 363static int r600_spi_sid(struct r600_shader_io * io) 364{ 365 int index, name = io->name; 366 367 /* These params are handled differently, they don't need 368 * semantic indices, so we'll use 0 for them. 369 */ 370 if (name == TGSI_SEMANTIC_POSITION || 371 name == TGSI_SEMANTIC_PSIZE || 372 name == TGSI_SEMANTIC_FACE) 373 index = 0; 374 else { 375 if (name == TGSI_SEMANTIC_GENERIC) { 376 /* For generic params simply use sid from tgsi */ 377 index = io->sid; 378 } else { 379 380 /* FIXME: two-side rendering is broken in r600g, this will 381 * keep old functionality */ 382 if (name == TGSI_SEMANTIC_BCOLOR) 383 name = TGSI_SEMANTIC_COLOR; 384 385 /* For non-generic params - pack name and sid into 8 bits */ 386 index = 0x80 | (name<<3) | (io->sid); 387 } 388 389 /* Make sure that all really used indices have nonzero value, so 390 * we can just compare it to 0 later instead of comparing the name 391 * with different values to detect special cases. */ 392 index++; 393 } 394 395 return index; 396}; 397 398static int tgsi_declaration(struct r600_shader_ctx *ctx) 399{ 400 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 401 unsigned i; 402 int r; 403 404 switch (d->Declaration.File) { 405 case TGSI_FILE_INPUT: 406 i = ctx->shader->ninput++; 407 ctx->shader->input[i].name = d->Semantic.Name; 408 ctx->shader->input[i].sid = d->Semantic.Index; 409 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 410 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 411 ctx->shader->input[i].centroid = d->Declaration.Centroid; 412 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First; 413 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) { 414 /* turn input into interpolate on EG */ 415 if (ctx->shader->input[i].spi_sid) { 416 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 417 if (ctx->shader->input[i].interpolate > 0) { 418 evergreen_interp_alu(ctx, i); 419 } else { 420 evergreen_interp_flat(ctx, i); 421 } 422 } 423 } 424 break; 425 case TGSI_FILE_OUTPUT: 426 i = ctx->shader->noutput++; 427 ctx->shader->output[i].name = d->Semantic.Name; 428 ctx->shader->output[i].sid = d->Semantic.Index; 429 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 430 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; 431 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 432 break; 433 case TGSI_FILE_CONSTANT: 434 case TGSI_FILE_TEMPORARY: 435 case TGSI_FILE_SAMPLER: 436 case TGSI_FILE_ADDRESS: 437 break; 438 439 case TGSI_FILE_SYSTEM_VALUE: 440 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 441 struct r600_bytecode_alu alu; 442 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 443 444 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 445 alu.src[0].sel = 0; 446 alu.src[0].chan = 3; 447 448 alu.dst.sel = 0; 449 alu.dst.chan = 3; 450 alu.dst.write = 1; 451 alu.last = 1; 452 453 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 454 return r; 455 break; 456 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 457 break; 458 default: 459 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 460 return -EINVAL; 461 } 462 return 0; 463} 464 465static int r600_get_temp(struct r600_shader_ctx *ctx) 466{ 467 return ctx->temp_reg + ctx->max_driver_temp_used++; 468} 469 470/* 471 * for evergreen we need to scan the shader to find the number of GPRs we need to 472 * reserve for interpolation. 473 * 474 * we need to know if we are going to emit 475 * any centroid inputs 476 * if perspective and linear are required 477*/ 478static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 479{ 480 int i; 481 int num_baryc; 482 483 ctx->input_linear = FALSE; 484 ctx->input_perspective = FALSE; 485 ctx->input_centroid = FALSE; 486 ctx->num_interp_gpr = 1; 487 488 /* any centroid inputs */ 489 for (i = 0; i < ctx->info.num_inputs; i++) { 490 /* skip position/face */ 491 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 492 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 493 continue; 494 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 495 ctx->input_linear = TRUE; 496 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 497 ctx->input_perspective = TRUE; 498 if (ctx->info.input_centroid[i]) 499 ctx->input_centroid = TRUE; 500 } 501 502 num_baryc = 0; 503 /* ignoring sample for now */ 504 if (ctx->input_perspective) 505 num_baryc++; 506 if (ctx->input_linear) 507 num_baryc++; 508 if (ctx->input_centroid) 509 num_baryc *= 2; 510 511 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 512 513 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 514 return ctx->num_interp_gpr; 515} 516 517static void tgsi_src(struct r600_shader_ctx *ctx, 518 const struct tgsi_full_src_register *tgsi_src, 519 struct r600_shader_src *r600_src) 520{ 521 memset(r600_src, 0, sizeof(*r600_src)); 522 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 523 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 524 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 525 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 526 r600_src->neg = tgsi_src->Register.Negate; 527 r600_src->abs = tgsi_src->Register.Absolute; 528 529 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 530 int index; 531 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 532 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 533 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 534 535 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 536 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 537 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 538 return; 539 } 540 index = tgsi_src->Register.Index; 541 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 542 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 543 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 544 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 545 r600_src->swizzle[0] = 3; 546 r600_src->swizzle[1] = 3; 547 r600_src->swizzle[2] = 3; 548 r600_src->swizzle[3] = 3; 549 r600_src->sel = 0; 550 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 551 r600_src->swizzle[0] = 0; 552 r600_src->swizzle[1] = 0; 553 r600_src->swizzle[2] = 0; 554 r600_src->swizzle[3] = 0; 555 r600_src->sel = 0; 556 } 557 } else { 558 if (tgsi_src->Register.Indirect) 559 r600_src->rel = V_SQ_REL_RELATIVE; 560 r600_src->sel = tgsi_src->Register.Index; 561 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 562 } 563} 564 565static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) 566{ 567 struct r600_bytecode_vtx vtx; 568 unsigned int ar_reg; 569 int r; 570 571 if (offset) { 572 struct r600_bytecode_alu alu; 573 574 memset(&alu, 0, sizeof(alu)); 575 576 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 577 alu.src[0].sel = ctx->bc->ar_reg; 578 579 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 580 alu.src[1].value = offset; 581 582 alu.dst.sel = dst_reg; 583 alu.dst.write = 1; 584 alu.last = 1; 585 586 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 587 return r; 588 589 ar_reg = dst_reg; 590 } else { 591 ar_reg = ctx->bc->ar_reg; 592 } 593 594 memset(&vtx, 0, sizeof(vtx)); 595 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 596 vtx.src_gpr = ar_reg; 597 vtx.mega_fetch_count = 16; 598 vtx.dst_gpr = dst_reg; 599 vtx.dst_sel_x = 0; /* SEL_X */ 600 vtx.dst_sel_y = 1; /* SEL_Y */ 601 vtx.dst_sel_z = 2; /* SEL_Z */ 602 vtx.dst_sel_w = 3; /* SEL_W */ 603 vtx.data_format = FMT_32_32_32_32_FLOAT; 604 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 605 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 606 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 607 vtx.endian = r600_endian_swap(32); 608 609 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 610 return r; 611 612 return 0; 613} 614 615static int tgsi_split_constant(struct r600_shader_ctx *ctx) 616{ 617 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 618 struct r600_bytecode_alu alu; 619 int i, j, k, nconst, r; 620 621 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 622 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 623 nconst++; 624 } 625 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 626 } 627 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 628 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 629 continue; 630 } 631 632 if (ctx->src[i].rel) { 633 int treg = r600_get_temp(ctx); 634 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) 635 return r; 636 637 ctx->src[i].sel = treg; 638 ctx->src[i].rel = 0; 639 j--; 640 } else if (j > 0) { 641 int treg = r600_get_temp(ctx); 642 for (k = 0; k < 4; k++) { 643 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 644 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 645 alu.src[0].sel = ctx->src[i].sel; 646 alu.src[0].chan = k; 647 alu.src[0].rel = ctx->src[i].rel; 648 alu.dst.sel = treg; 649 alu.dst.chan = k; 650 alu.dst.write = 1; 651 if (k == 3) 652 alu.last = 1; 653 r = r600_bytecode_add_alu(ctx->bc, &alu); 654 if (r) 655 return r; 656 } 657 ctx->src[i].sel = treg; 658 ctx->src[i].rel =0; 659 j--; 660 } 661 } 662 return 0; 663} 664 665/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 666static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 667{ 668 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 669 struct r600_bytecode_alu alu; 670 int i, j, k, nliteral, r; 671 672 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 673 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 674 nliteral++; 675 } 676 } 677 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 678 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 679 int treg = r600_get_temp(ctx); 680 for (k = 0; k < 4; k++) { 681 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 682 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 683 alu.src[0].sel = ctx->src[i].sel; 684 alu.src[0].chan = k; 685 alu.src[0].value = ctx->src[i].value[k]; 686 alu.dst.sel = treg; 687 alu.dst.chan = k; 688 alu.dst.write = 1; 689 if (k == 3) 690 alu.last = 1; 691 r = r600_bytecode_add_alu(ctx->bc, &alu); 692 if (r) 693 return r; 694 } 695 ctx->src[i].sel = treg; 696 j--; 697 } 698 } 699 return 0; 700} 701 702static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader) 703{ 704 struct r600_shader *shader = &pipeshader->shader; 705 struct tgsi_token *tokens = pipeshader->tokens; 706 struct pipe_stream_output_info so = pipeshader->so; 707 struct tgsi_full_immediate *immediate; 708 struct tgsi_full_property *property; 709 struct r600_shader_ctx ctx; 710 struct r600_bytecode_output output[32]; 711 unsigned output_done, noutput; 712 unsigned opcode; 713 int i, j, r = 0, pos0; 714 715 ctx.bc = &shader->bc; 716 ctx.shader = shader; 717 r600_bytecode_init(ctx.bc, rctx->chip_class); 718 ctx.tokens = tokens; 719 tgsi_scan_shader(tokens, &ctx.info); 720 tgsi_parse_init(&ctx.parse, tokens); 721 ctx.type = ctx.parse.FullHeader.Processor.Processor; 722 shader->processor_type = ctx.type; 723 ctx.bc->type = shader->processor_type; 724 725 shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) || 726 ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color)); 727 728 shader->nr_cbufs = rctx->nr_cbufs; 729 730 /* register allocations */ 731 /* Values [0,127] correspond to GPR[0..127]. 732 * Values [128,159] correspond to constant buffer bank 0 733 * Values [160,191] correspond to constant buffer bank 1 734 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 735 * Values [256,287] correspond to constant buffer bank 2 (EG) 736 * Values [288,319] correspond to constant buffer bank 3 (EG) 737 * Other special values are shown in the list below. 738 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 739 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 740 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 741 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 742 * 248 SQ_ALU_SRC_0: special constant 0.0. 743 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 744 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 745 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 746 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 747 * 253 SQ_ALU_SRC_LITERAL: literal constant. 748 * 254 SQ_ALU_SRC_PV: previous vector result. 749 * 255 SQ_ALU_SRC_PS: previous scalar result. 750 */ 751 for (i = 0; i < TGSI_FILE_COUNT; i++) { 752 ctx.file_offset[i] = 0; 753 } 754 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 755 ctx.file_offset[TGSI_FILE_INPUT] = 1; 756 if (ctx.bc->chip_class >= EVERGREEN) { 757 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 758 } else { 759 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 760 } 761 } 762 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { 763 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 764 } 765 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 766 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 767 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 768 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 769 770 /* Outside the GPR range. This will be translated to one of the 771 * kcache banks later. */ 772 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 773 774 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 775 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 776 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 777 ctx.temp_reg = ctx.bc->ar_reg + 1; 778 779 ctx.nliterals = 0; 780 ctx.literals = NULL; 781 shader->fs_write_all = FALSE; 782 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 783 tgsi_parse_token(&ctx.parse); 784 switch (ctx.parse.FullToken.Token.Type) { 785 case TGSI_TOKEN_TYPE_IMMEDIATE: 786 immediate = &ctx.parse.FullToken.FullImmediate; 787 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 788 if(ctx.literals == NULL) { 789 r = -ENOMEM; 790 goto out_err; 791 } 792 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 793 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 794 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 795 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 796 ctx.nliterals++; 797 break; 798 case TGSI_TOKEN_TYPE_DECLARATION: 799 r = tgsi_declaration(&ctx); 800 if (r) 801 goto out_err; 802 break; 803 case TGSI_TOKEN_TYPE_INSTRUCTION: 804 r = tgsi_is_supported(&ctx); 805 if (r) 806 goto out_err; 807 ctx.max_driver_temp_used = 0; 808 /* reserve first tmp for everyone */ 809 r600_get_temp(&ctx); 810 811 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 812 if ((r = tgsi_split_constant(&ctx))) 813 goto out_err; 814 if ((r = tgsi_split_literal_constant(&ctx))) 815 goto out_err; 816 if (ctx.bc->chip_class == CAYMAN) 817 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 818 else if (ctx.bc->chip_class >= EVERGREEN) 819 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 820 else 821 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 822 r = ctx.inst_info->process(&ctx); 823 if (r) 824 goto out_err; 825 break; 826 case TGSI_TOKEN_TYPE_PROPERTY: 827 property = &ctx.parse.FullToken.FullProperty; 828 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { 829 if (property->u[0].Data == 1) 830 shader->fs_write_all = TRUE; 831 } 832 break; 833 default: 834 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 835 r = -EINVAL; 836 goto out_err; 837 } 838 } 839 840 noutput = shader->noutput; 841 842 /* clamp color outputs */ 843 if (shader->clamp_color) { 844 for (i = 0; i < noutput; i++) { 845 if (shader->output[i].name == TGSI_SEMANTIC_COLOR || 846 shader->output[i].name == TGSI_SEMANTIC_BCOLOR) { 847 848 int j; 849 for (j = 0; j < 4; j++) { 850 struct r600_bytecode_alu alu; 851 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 852 853 /* MOV_SAT R, R */ 854 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 855 alu.dst.sel = shader->output[i].gpr; 856 alu.dst.chan = j; 857 alu.dst.write = 1; 858 alu.dst.clamp = 1; 859 alu.src[0].sel = alu.dst.sel; 860 alu.src[0].chan = j; 861 862 if (j == 3) { 863 alu.last = 1; 864 } 865 r = r600_bytecode_add_alu(ctx.bc, &alu); 866 if (r) 867 return r; 868 } 869 } 870 } 871 } 872 873 /* Add stream outputs. */ 874 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) { 875 for (i = 0; i < so.num_outputs; i++) { 876 struct r600_bytecode_output output; 877 878 if (so.output[i].output_buffer >= 4) { 879 R600_ERR("exceeded the max number of stream output buffers, got: %d\n", 880 so.output[i].output_buffer); 881 r = -EINVAL; 882 goto out_err; 883 } 884 if (so.output[i].start_component) { 885 R600_ERR("stream_output - start_component cannot be non-zero\n"); 886 r = -EINVAL; 887 goto out_err; 888 } 889 890 memset(&output, 0, sizeof(struct r600_bytecode_output)); 891 output.gpr = shader->output[so.output[i].register_index].gpr; 892 output.elem_size = 0; 893 output.array_base = so.output[i].dst_offset; 894 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 895 output.burst_count = 1; 896 output.barrier = 1; 897 output.array_size = 0; 898 output.comp_mask = (1 << so.output[i].num_components) - 1; 899 if (ctx.bc->chip_class >= EVERGREEN) { 900 switch (so.output[i].output_buffer) { 901 case 0: 902 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0; 903 break; 904 case 1: 905 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1; 906 break; 907 case 2: 908 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2; 909 break; 910 case 3: 911 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3; 912 break; 913 } 914 } else { 915 switch (so.output[i].output_buffer) { 916 case 0: 917 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0; 918 break; 919 case 1: 920 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1; 921 break; 922 case 2: 923 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2; 924 break; 925 case 3: 926 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3; 927 break; 928 } 929 } 930 r = r600_bytecode_add_output(ctx.bc, &output); 931 if (r) 932 goto out_err; 933 } 934 } 935 936 /* export output */ 937 j = 0; 938 for (i = 0, pos0 = 0; i < noutput; i++) { 939 memset(&output[i], 0, sizeof(struct r600_bytecode_output)); 940 output[i + j].gpr = shader->output[i].gpr; 941 output[i + j].elem_size = 3; 942 output[i + j].swizzle_x = 0; 943 output[i + j].swizzle_y = 1; 944 output[i + j].swizzle_z = 2; 945 output[i + j].swizzle_w = 3; 946 output[i + j].burst_count = 1; 947 output[i + j].barrier = 1; 948 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 949 output[i + j].array_base = i - pos0; 950 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 951 switch (ctx.type) { 952 case TGSI_PROCESSOR_VERTEX: 953 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 954 output[i + j].array_base = 60; 955 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 956 /* position doesn't count in array_base */ 957 pos0++; 958 } 959 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 960 output[i + j].array_base = 61; 961 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 962 /* position doesn't count in array_base */ 963 pos0++; 964 } 965 break; 966 case TGSI_PROCESSOR_FRAGMENT: 967 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 968 output[i + j].array_base = shader->output[i].sid; 969 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 970 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { 971 for (j = 1; j < shader->nr_cbufs; j++) { 972 memset(&output[i + j], 0, sizeof(struct r600_bytecode_output)); 973 output[i + j].gpr = shader->output[i].gpr; 974 output[i + j].elem_size = 3; 975 output[i + j].swizzle_x = 0; 976 output[i + j].swizzle_y = 1; 977 output[i + j].swizzle_z = 2; 978 output[i + j].swizzle_w = 3; 979 output[i + j].burst_count = 1; 980 output[i + j].barrier = 1; 981 output[i + j].array_base = shader->output[i].sid + j; 982 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 983 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 984 } 985 j = shader->nr_cbufs-1; 986 } 987 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 988 output[i + j].array_base = 61; 989 output[i + j].swizzle_x = 2; 990 output[i + j].swizzle_y = 7; 991 output[i + j].swizzle_z = output[i + j].swizzle_w = 7; 992 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 993 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 994 output[i + j].array_base = 61; 995 output[i + j].swizzle_x = 7; 996 output[i + j].swizzle_y = 1; 997 output[i + j].swizzle_z = output[i + j].swizzle_w = 7; 998 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 999 } else { 1000 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 1001 r = -EINVAL; 1002 goto out_err; 1003 } 1004 break; 1005 default: 1006 R600_ERR("unsupported processor type %d\n", ctx.type); 1007 r = -EINVAL; 1008 goto out_err; 1009 } 1010 } 1011 noutput += j; 1012 /* add fake param output for vertex shader if no param is exported */ 1013 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 1014 for (i = 0, pos0 = 0; i < noutput; i++) { 1015 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 1016 pos0 = 1; 1017 break; 1018 } 1019 } 1020 if (!pos0) { 1021 memset(&output[i], 0, sizeof(struct r600_bytecode_output)); 1022 output[i].gpr = 0; 1023 output[i].elem_size = 3; 1024 output[i].swizzle_x = 7; 1025 output[i].swizzle_y = 7; 1026 output[i].swizzle_z = 7; 1027 output[i].swizzle_w = 7; 1028 output[i].burst_count = 1; 1029 output[i].barrier = 1; 1030 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1031 output[i].array_base = 0; 1032 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1033 noutput++; 1034 } 1035 } 1036 /* add fake pixel export */ 1037 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 1038 memset(&output[0], 0, sizeof(struct r600_bytecode_output)); 1039 output[0].gpr = 0; 1040 output[0].elem_size = 3; 1041 output[0].swizzle_x = 7; 1042 output[0].swizzle_y = 7; 1043 output[0].swizzle_z = 7; 1044 output[0].swizzle_w = 7; 1045 output[0].burst_count = 1; 1046 output[0].barrier = 1; 1047 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1048 output[0].array_base = 0; 1049 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1050 noutput++; 1051 } 1052 /* set export done on last export of each type */ 1053 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 1054 if (ctx.bc->chip_class < CAYMAN) { 1055 if (i == (noutput - 1)) { 1056 output[i].end_of_program = 1; 1057 } 1058 } 1059 if (!(output_done & (1 << output[i].type))) { 1060 output_done |= (1 << output[i].type); 1061 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 1062 } 1063 } 1064 /* add output to bytecode */ 1065 for (i = 0; i < noutput; i++) { 1066 r = r600_bytecode_add_output(ctx.bc, &output[i]); 1067 if (r) 1068 goto out_err; 1069 } 1070 /* add program end */ 1071 if (ctx.bc->chip_class == CAYMAN) 1072 cm_bytecode_add_cf_end(ctx.bc); 1073 1074 free(ctx.literals); 1075 tgsi_parse_free(&ctx.parse); 1076 return 0; 1077out_err: 1078 free(ctx.literals); 1079 tgsi_parse_free(&ctx.parse); 1080 return r; 1081} 1082 1083static int tgsi_unsupported(struct r600_shader_ctx *ctx) 1084{ 1085 R600_ERR("%s tgsi opcode unsupported\n", 1086 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode)); 1087 return -EINVAL; 1088} 1089 1090static int tgsi_end(struct r600_shader_ctx *ctx) 1091{ 1092 return 0; 1093} 1094 1095static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 1096 const struct r600_shader_src *shader_src, 1097 unsigned chan) 1098{ 1099 bc_src->sel = shader_src->sel; 1100 bc_src->chan = shader_src->swizzle[chan]; 1101 bc_src->neg = shader_src->neg; 1102 bc_src->abs = shader_src->abs; 1103 bc_src->rel = shader_src->rel; 1104 bc_src->value = shader_src->value[bc_src->chan]; 1105} 1106 1107static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 1108{ 1109 bc_src->abs = 1; 1110 bc_src->neg = 0; 1111} 1112 1113static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 1114{ 1115 bc_src->neg = !bc_src->neg; 1116} 1117 1118static void tgsi_dst(struct r600_shader_ctx *ctx, 1119 const struct tgsi_full_dst_register *tgsi_dst, 1120 unsigned swizzle, 1121 struct r600_bytecode_alu_dst *r600_dst) 1122{ 1123 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1124 1125 r600_dst->sel = tgsi_dst->Register.Index; 1126 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 1127 r600_dst->chan = swizzle; 1128 r600_dst->write = 1; 1129 if (tgsi_dst->Register.Indirect) 1130 r600_dst->rel = V_SQ_REL_RELATIVE; 1131 if (inst->Instruction.Saturate) { 1132 r600_dst->clamp = 1; 1133 } 1134} 1135 1136static int tgsi_last_instruction(unsigned writemask) 1137{ 1138 int i, lasti = 0; 1139 1140 for (i = 0; i < 4; i++) { 1141 if (writemask & (1 << i)) { 1142 lasti = i; 1143 } 1144 } 1145 return lasti; 1146} 1147 1148static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 1149{ 1150 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1151 struct r600_bytecode_alu alu; 1152 int i, j, r; 1153 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1154 1155 for (i = 0; i < lasti + 1; i++) { 1156 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1157 continue; 1158 1159 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1160 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1161 1162 alu.inst = ctx->inst_info->r600_opcode; 1163 if (!swap) { 1164 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1165 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1166 } 1167 } else { 1168 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 1169 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1170 } 1171 /* handle some special cases */ 1172 switch (ctx->inst_info->tgsi_opcode) { 1173 case TGSI_OPCODE_SUB: 1174 r600_bytecode_src_toggle_neg(&alu.src[1]); 1175 break; 1176 case TGSI_OPCODE_ABS: 1177 r600_bytecode_src_set_abs(&alu.src[0]); 1178 break; 1179 default: 1180 break; 1181 } 1182 if (i == lasti || trans_only) { 1183 alu.last = 1; 1184 } 1185 r = r600_bytecode_add_alu(ctx->bc, &alu); 1186 if (r) 1187 return r; 1188 } 1189 return 0; 1190} 1191 1192static int tgsi_op2(struct r600_shader_ctx *ctx) 1193{ 1194 return tgsi_op2_s(ctx, 0, 0); 1195} 1196 1197static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1198{ 1199 return tgsi_op2_s(ctx, 1, 0); 1200} 1201 1202static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 1203{ 1204 return tgsi_op2_s(ctx, 0, 1); 1205} 1206 1207static int tgsi_ineg(struct r600_shader_ctx *ctx) 1208{ 1209 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1210 struct r600_bytecode_alu alu; 1211 int i, r; 1212 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1213 1214 for (i = 0; i < lasti + 1; i++) { 1215 1216 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1217 continue; 1218 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1219 alu.inst = ctx->inst_info->r600_opcode; 1220 1221 alu.src[0].sel = V_SQ_ALU_SRC_0; 1222 1223 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1224 1225 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1226 1227 if (i == lasti) { 1228 alu.last = 1; 1229 } 1230 r = r600_bytecode_add_alu(ctx->bc, &alu); 1231 if (r) 1232 return r; 1233 } 1234 return 0; 1235 1236} 1237 1238static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 1239{ 1240 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1241 int i, j, r; 1242 struct r600_bytecode_alu alu; 1243 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1244 1245 for (i = 0 ; i < last_slot; i++) { 1246 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1247 alu.inst = ctx->inst_info->r600_opcode; 1248 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1249 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 1250 } 1251 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1252 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1253 1254 if (i == last_slot - 1) 1255 alu.last = 1; 1256 r = r600_bytecode_add_alu(ctx->bc, &alu); 1257 if (r) 1258 return r; 1259 } 1260 return 0; 1261} 1262 1263/* 1264 * r600 - trunc to -PI..PI range 1265 * r700 - normalize by dividing by 2PI 1266 * see fdo bug 27901 1267 */ 1268static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 1269{ 1270 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 1271 static float double_pi = 3.1415926535 * 2; 1272 static float neg_pi = -3.1415926535; 1273 1274 int r; 1275 struct r600_bytecode_alu alu; 1276 1277 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1278 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1279 alu.is_op3 = 1; 1280 1281 alu.dst.chan = 0; 1282 alu.dst.sel = ctx->temp_reg; 1283 alu.dst.write = 1; 1284 1285 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1286 1287 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1288 alu.src[1].chan = 0; 1289 alu.src[1].value = *(uint32_t *)&half_inv_pi; 1290 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1291 alu.src[2].chan = 0; 1292 alu.last = 1; 1293 r = r600_bytecode_add_alu(ctx->bc, &alu); 1294 if (r) 1295 return r; 1296 1297 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1298 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1299 1300 alu.dst.chan = 0; 1301 alu.dst.sel = ctx->temp_reg; 1302 alu.dst.write = 1; 1303 1304 alu.src[0].sel = ctx->temp_reg; 1305 alu.src[0].chan = 0; 1306 alu.last = 1; 1307 r = r600_bytecode_add_alu(ctx->bc, &alu); 1308 if (r) 1309 return r; 1310 1311 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1312 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1313 alu.is_op3 = 1; 1314 1315 alu.dst.chan = 0; 1316 alu.dst.sel = ctx->temp_reg; 1317 alu.dst.write = 1; 1318 1319 alu.src[0].sel = ctx->temp_reg; 1320 alu.src[0].chan = 0; 1321 1322 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1323 alu.src[1].chan = 0; 1324 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1325 alu.src[2].chan = 0; 1326 1327 if (ctx->bc->chip_class == R600) { 1328 alu.src[1].value = *(uint32_t *)&double_pi; 1329 alu.src[2].value = *(uint32_t *)&neg_pi; 1330 } else { 1331 alu.src[1].sel = V_SQ_ALU_SRC_1; 1332 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1333 alu.src[2].neg = 1; 1334 } 1335 1336 alu.last = 1; 1337 r = r600_bytecode_add_alu(ctx->bc, &alu); 1338 if (r) 1339 return r; 1340 return 0; 1341} 1342 1343static int cayman_trig(struct r600_shader_ctx *ctx) 1344{ 1345 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1346 struct r600_bytecode_alu alu; 1347 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1348 int i, r; 1349 1350 r = tgsi_setup_trig(ctx); 1351 if (r) 1352 return r; 1353 1354 1355 for (i = 0; i < last_slot; i++) { 1356 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1357 alu.inst = ctx->inst_info->r600_opcode; 1358 alu.dst.chan = i; 1359 1360 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1361 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1362 1363 alu.src[0].sel = ctx->temp_reg; 1364 alu.src[0].chan = 0; 1365 if (i == last_slot - 1) 1366 alu.last = 1; 1367 r = r600_bytecode_add_alu(ctx->bc, &alu); 1368 if (r) 1369 return r; 1370 } 1371 return 0; 1372} 1373 1374static int tgsi_trig(struct r600_shader_ctx *ctx) 1375{ 1376 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1377 struct r600_bytecode_alu alu; 1378 int i, r; 1379 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1380 1381 r = tgsi_setup_trig(ctx); 1382 if (r) 1383 return r; 1384 1385 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1386 alu.inst = ctx->inst_info->r600_opcode; 1387 alu.dst.chan = 0; 1388 alu.dst.sel = ctx->temp_reg; 1389 alu.dst.write = 1; 1390 1391 alu.src[0].sel = ctx->temp_reg; 1392 alu.src[0].chan = 0; 1393 alu.last = 1; 1394 r = r600_bytecode_add_alu(ctx->bc, &alu); 1395 if (r) 1396 return r; 1397 1398 /* replicate result */ 1399 for (i = 0; i < lasti + 1; i++) { 1400 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1401 continue; 1402 1403 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1404 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1405 1406 alu.src[0].sel = ctx->temp_reg; 1407 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1408 if (i == lasti) 1409 alu.last = 1; 1410 r = r600_bytecode_add_alu(ctx->bc, &alu); 1411 if (r) 1412 return r; 1413 } 1414 return 0; 1415} 1416 1417static int tgsi_scs(struct r600_shader_ctx *ctx) 1418{ 1419 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1420 struct r600_bytecode_alu alu; 1421 int i, r; 1422 1423 /* We'll only need the trig stuff if we are going to write to the 1424 * X or Y components of the destination vector. 1425 */ 1426 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1427 r = tgsi_setup_trig(ctx); 1428 if (r) 1429 return r; 1430 } 1431 1432 /* dst.x = COS */ 1433 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1434 if (ctx->bc->chip_class == CAYMAN) { 1435 for (i = 0 ; i < 3; i++) { 1436 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1437 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1438 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1439 1440 if (i == 0) 1441 alu.dst.write = 1; 1442 else 1443 alu.dst.write = 0; 1444 alu.src[0].sel = ctx->temp_reg; 1445 alu.src[0].chan = 0; 1446 if (i == 2) 1447 alu.last = 1; 1448 r = r600_bytecode_add_alu(ctx->bc, &alu); 1449 if (r) 1450 return r; 1451 } 1452 } else { 1453 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1454 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1455 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1456 1457 alu.src[0].sel = ctx->temp_reg; 1458 alu.src[0].chan = 0; 1459 alu.last = 1; 1460 r = r600_bytecode_add_alu(ctx->bc, &alu); 1461 if (r) 1462 return r; 1463 } 1464 } 1465 1466 /* dst.y = SIN */ 1467 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1468 if (ctx->bc->chip_class == CAYMAN) { 1469 for (i = 0 ; i < 3; i++) { 1470 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1471 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1472 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1473 if (i == 1) 1474 alu.dst.write = 1; 1475 else 1476 alu.dst.write = 0; 1477 alu.src[0].sel = ctx->temp_reg; 1478 alu.src[0].chan = 0; 1479 if (i == 2) 1480 alu.last = 1; 1481 r = r600_bytecode_add_alu(ctx->bc, &alu); 1482 if (r) 1483 return r; 1484 } 1485 } else { 1486 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1487 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1488 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1489 1490 alu.src[0].sel = ctx->temp_reg; 1491 alu.src[0].chan = 0; 1492 alu.last = 1; 1493 r = r600_bytecode_add_alu(ctx->bc, &alu); 1494 if (r) 1495 return r; 1496 } 1497 } 1498 1499 /* dst.z = 0.0; */ 1500 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1501 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1502 1503 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1504 1505 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1506 1507 alu.src[0].sel = V_SQ_ALU_SRC_0; 1508 alu.src[0].chan = 0; 1509 1510 alu.last = 1; 1511 1512 r = r600_bytecode_add_alu(ctx->bc, &alu); 1513 if (r) 1514 return r; 1515 } 1516 1517 /* dst.w = 1.0; */ 1518 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1519 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1520 1521 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1522 1523 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1524 1525 alu.src[0].sel = V_SQ_ALU_SRC_1; 1526 alu.src[0].chan = 0; 1527 1528 alu.last = 1; 1529 1530 r = r600_bytecode_add_alu(ctx->bc, &alu); 1531 if (r) 1532 return r; 1533 } 1534 1535 return 0; 1536} 1537 1538static int tgsi_kill(struct r600_shader_ctx *ctx) 1539{ 1540 struct r600_bytecode_alu alu; 1541 int i, r; 1542 1543 for (i = 0; i < 4; i++) { 1544 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1545 alu.inst = ctx->inst_info->r600_opcode; 1546 1547 alu.dst.chan = i; 1548 1549 alu.src[0].sel = V_SQ_ALU_SRC_0; 1550 1551 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1552 alu.src[1].sel = V_SQ_ALU_SRC_1; 1553 alu.src[1].neg = 1; 1554 } else { 1555 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1556 } 1557 if (i == 3) { 1558 alu.last = 1; 1559 } 1560 r = r600_bytecode_add_alu(ctx->bc, &alu); 1561 if (r) 1562 return r; 1563 } 1564 1565 /* kill must be last in ALU */ 1566 ctx->bc->force_add_cf = 1; 1567 ctx->shader->uses_kill = TRUE; 1568 return 0; 1569} 1570 1571static int tgsi_lit(struct r600_shader_ctx *ctx) 1572{ 1573 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1574 struct r600_bytecode_alu alu; 1575 int r; 1576 1577 /* tmp.x = max(src.y, 0.0) */ 1578 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1579 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1580 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 1581 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1582 alu.src[1].chan = 1; 1583 1584 alu.dst.sel = ctx->temp_reg; 1585 alu.dst.chan = 0; 1586 alu.dst.write = 1; 1587 1588 alu.last = 1; 1589 r = r600_bytecode_add_alu(ctx->bc, &alu); 1590 if (r) 1591 return r; 1592 1593 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1594 { 1595 int chan; 1596 int sel; 1597 int i; 1598 1599 if (ctx->bc->chip_class == CAYMAN) { 1600 for (i = 0; i < 3; i++) { 1601 /* tmp.z = log(tmp.x) */ 1602 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1603 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1604 alu.src[0].sel = ctx->temp_reg; 1605 alu.src[0].chan = 0; 1606 alu.dst.sel = ctx->temp_reg; 1607 alu.dst.chan = i; 1608 if (i == 2) { 1609 alu.dst.write = 1; 1610 alu.last = 1; 1611 } else 1612 alu.dst.write = 0; 1613 1614 r = r600_bytecode_add_alu(ctx->bc, &alu); 1615 if (r) 1616 return r; 1617 } 1618 } else { 1619 /* tmp.z = log(tmp.x) */ 1620 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1621 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1622 alu.src[0].sel = ctx->temp_reg; 1623 alu.src[0].chan = 0; 1624 alu.dst.sel = ctx->temp_reg; 1625 alu.dst.chan = 2; 1626 alu.dst.write = 1; 1627 alu.last = 1; 1628 r = r600_bytecode_add_alu(ctx->bc, &alu); 1629 if (r) 1630 return r; 1631 } 1632 1633 chan = alu.dst.chan; 1634 sel = alu.dst.sel; 1635 1636 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 1637 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1638 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1639 alu.src[0].sel = sel; 1640 alu.src[0].chan = chan; 1641 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 1642 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 1643 alu.dst.sel = ctx->temp_reg; 1644 alu.dst.chan = 0; 1645 alu.dst.write = 1; 1646 alu.is_op3 = 1; 1647 alu.last = 1; 1648 r = r600_bytecode_add_alu(ctx->bc, &alu); 1649 if (r) 1650 return r; 1651 1652 if (ctx->bc->chip_class == CAYMAN) { 1653 for (i = 0; i < 3; i++) { 1654 /* dst.z = exp(tmp.x) */ 1655 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1656 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1657 alu.src[0].sel = ctx->temp_reg; 1658 alu.src[0].chan = 0; 1659 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1660 if (i == 2) { 1661 alu.dst.write = 1; 1662 alu.last = 1; 1663 } else 1664 alu.dst.write = 0; 1665 r = r600_bytecode_add_alu(ctx->bc, &alu); 1666 if (r) 1667 return r; 1668 } 1669 } else { 1670 /* dst.z = exp(tmp.x) */ 1671 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1672 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1673 alu.src[0].sel = ctx->temp_reg; 1674 alu.src[0].chan = 0; 1675 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1676 alu.last = 1; 1677 r = r600_bytecode_add_alu(ctx->bc, &alu); 1678 if (r) 1679 return r; 1680 } 1681 } 1682 1683 /* dst.x, <- 1.0 */ 1684 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1685 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1686 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1687 alu.src[0].chan = 0; 1688 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1689 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1690 r = r600_bytecode_add_alu(ctx->bc, &alu); 1691 if (r) 1692 return r; 1693 1694 /* dst.y = max(src.x, 0.0) */ 1695 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1696 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1697 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1698 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1699 alu.src[1].chan = 0; 1700 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1701 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1702 r = r600_bytecode_add_alu(ctx->bc, &alu); 1703 if (r) 1704 return r; 1705 1706 /* dst.w, <- 1.0 */ 1707 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1708 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1709 alu.src[0].sel = V_SQ_ALU_SRC_1; 1710 alu.src[0].chan = 0; 1711 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1712 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1713 alu.last = 1; 1714 r = r600_bytecode_add_alu(ctx->bc, &alu); 1715 if (r) 1716 return r; 1717 1718 return 0; 1719} 1720 1721static int tgsi_rsq(struct r600_shader_ctx *ctx) 1722{ 1723 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1724 struct r600_bytecode_alu alu; 1725 int i, r; 1726 1727 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1728 1729 /* FIXME: 1730 * For state trackers other than OpenGL, we'll want to use 1731 * _RECIPSQRT_IEEE instead. 1732 */ 1733 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1734 1735 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1736 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 1737 r600_bytecode_src_set_abs(&alu.src[i]); 1738 } 1739 alu.dst.sel = ctx->temp_reg; 1740 alu.dst.write = 1; 1741 alu.last = 1; 1742 r = r600_bytecode_add_alu(ctx->bc, &alu); 1743 if (r) 1744 return r; 1745 /* replicate result */ 1746 return tgsi_helper_tempx_replicate(ctx); 1747} 1748 1749static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1750{ 1751 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1752 struct r600_bytecode_alu alu; 1753 int i, r; 1754 1755 for (i = 0; i < 4; i++) { 1756 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1757 alu.src[0].sel = ctx->temp_reg; 1758 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1759 alu.dst.chan = i; 1760 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1761 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1762 if (i == 3) 1763 alu.last = 1; 1764 r = r600_bytecode_add_alu(ctx->bc, &alu); 1765 if (r) 1766 return r; 1767 } 1768 return 0; 1769} 1770 1771static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1772{ 1773 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1774 struct r600_bytecode_alu alu; 1775 int i, r; 1776 1777 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1778 alu.inst = ctx->inst_info->r600_opcode; 1779 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1780 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 1781 } 1782 alu.dst.sel = ctx->temp_reg; 1783 alu.dst.write = 1; 1784 alu.last = 1; 1785 r = r600_bytecode_add_alu(ctx->bc, &alu); 1786 if (r) 1787 return r; 1788 /* replicate result */ 1789 return tgsi_helper_tempx_replicate(ctx); 1790} 1791 1792static int cayman_pow(struct r600_shader_ctx *ctx) 1793{ 1794 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1795 int i, r; 1796 struct r600_bytecode_alu alu; 1797 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1798 1799 for (i = 0; i < 3; i++) { 1800 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1801 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1802 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1803 alu.dst.sel = ctx->temp_reg; 1804 alu.dst.chan = i; 1805 alu.dst.write = 1; 1806 if (i == 2) 1807 alu.last = 1; 1808 r = r600_bytecode_add_alu(ctx->bc, &alu); 1809 if (r) 1810 return r; 1811 } 1812 1813 /* b * LOG2(a) */ 1814 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1815 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1816 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 1817 alu.src[1].sel = ctx->temp_reg; 1818 alu.dst.sel = ctx->temp_reg; 1819 alu.dst.write = 1; 1820 alu.last = 1; 1821 r = r600_bytecode_add_alu(ctx->bc, &alu); 1822 if (r) 1823 return r; 1824 1825 for (i = 0; i < last_slot; i++) { 1826 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1827 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1828 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1829 alu.src[0].sel = ctx->temp_reg; 1830 1831 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1832 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1833 if (i == last_slot - 1) 1834 alu.last = 1; 1835 r = r600_bytecode_add_alu(ctx->bc, &alu); 1836 if (r) 1837 return r; 1838 } 1839 return 0; 1840} 1841 1842static int tgsi_pow(struct r600_shader_ctx *ctx) 1843{ 1844 struct r600_bytecode_alu alu; 1845 int r; 1846 1847 /* LOG2(a) */ 1848 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1849 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1850 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1851 alu.dst.sel = ctx->temp_reg; 1852 alu.dst.write = 1; 1853 alu.last = 1; 1854 r = r600_bytecode_add_alu(ctx->bc, &alu); 1855 if (r) 1856 return r; 1857 /* b * LOG2(a) */ 1858 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1859 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1860 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 1861 alu.src[1].sel = ctx->temp_reg; 1862 alu.dst.sel = ctx->temp_reg; 1863 alu.dst.write = 1; 1864 alu.last = 1; 1865 r = r600_bytecode_add_alu(ctx->bc, &alu); 1866 if (r) 1867 return r; 1868 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1869 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1870 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1871 alu.src[0].sel = ctx->temp_reg; 1872 alu.dst.sel = ctx->temp_reg; 1873 alu.dst.write = 1; 1874 alu.last = 1; 1875 r = r600_bytecode_add_alu(ctx->bc, &alu); 1876 if (r) 1877 return r; 1878 return tgsi_helper_tempx_replicate(ctx); 1879} 1880 1881static int tgsi_idiv(struct r600_shader_ctx *ctx) 1882{ 1883 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1884 struct r600_bytecode_alu alu; 1885 int i, r; 1886 unsigned write_mask = inst->Dst[0].Register.WriteMask; 1887 int last_inst = tgsi_last_instruction(write_mask); 1888 int tmp0 = ctx->temp_reg; 1889 int tmp1 = r600_get_temp(ctx); 1890 int unsigned_op = (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_UDIV); 1891 1892 /* tmp0 = float(src0) */ 1893 for (i = 0; i < 4; i++) { 1894 if (!(write_mask & (1<<i))) 1895 continue; 1896 1897 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1898 1899 if (unsigned_op) 1900 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT); 1901 else 1902 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 1903 1904 alu.dst.sel = tmp0; 1905 alu.dst.chan = i; 1906 alu.dst.write = 1; 1907 1908 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 1909 alu.last = 1; 1910 r = r600_bytecode_add_alu(ctx->bc, &alu); 1911 if (r) 1912 return r; 1913 } 1914 1915 if (!unsigned_op) { 1916 /* tmp1 = tmp0>=0 ? 0.5 : -0.5 for int*/ 1917 for (i = 0; i < 4; i++) { 1918 if (!(write_mask & (1<<i))) 1919 continue; 1920 1921 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1922 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 1923 alu.is_op3 = 1; 1924 1925 alu.dst.sel = tmp1; 1926 alu.dst.chan = i; 1927 alu.dst.write = 1; 1928 1929 alu.src[0].sel = tmp0; 1930 alu.src[0].chan = i; 1931 1932 alu.src[1].sel = V_SQ_ALU_SRC_0_5; 1933 1934 if (unsigned_op) 1935 alu.src[2].sel = V_SQ_ALU_SRC_0; 1936 else { 1937 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1938 alu.src[2].neg = 1; 1939 } 1940 1941 if (i == last_inst) 1942 alu.last = 1; 1943 r = r600_bytecode_add_alu(ctx->bc, &alu); 1944 if (r) 1945 return r; 1946 } 1947 } 1948 1949 /* tmp0 = tmp0 + tmp1 for int */ 1950 /* tmp0 = tmp0 + 0.5 for uint */ 1951 for (i = 0; i < 4; i++) { 1952 if (!(write_mask & (1<<i))) 1953 continue; 1954 1955 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1956 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1957 1958 alu.dst.sel = tmp0; 1959 alu.dst.chan = i; 1960 alu.dst.write = 1; 1961 1962 alu.src[0].sel = tmp0; 1963 alu.src[0].chan = i; 1964 1965 if (unsigned_op) 1966 alu.src[1].sel = V_SQ_ALU_SRC_0_5; 1967 else { 1968 alu.src[1].sel = tmp1; 1969 alu.src[1].chan = i; 1970 } 1971 1972 if (i == last_inst) 1973 alu.last = 1; 1974 r = r600_bytecode_add_alu(ctx->bc, &alu); 1975 if (r) 1976 return r; 1977 } 1978 1979 /* tmp1 = float(src1) */ 1980 for (i = 0; i < 4; i++) { 1981 if (!(write_mask & (1<<i))) 1982 continue; 1983 1984 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1985 1986 if (unsigned_op) 1987 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT); 1988 else 1989 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 1990 1991 alu.dst.sel = tmp1; 1992 alu.dst.chan = i; 1993 alu.dst.write = 1; 1994 1995 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 1996 alu.last = 1; 1997 r = r600_bytecode_add_alu(ctx->bc, &alu); 1998 if (r) 1999 return r; 2000 } 2001 2002 /* tmp1 = 1.0/src1 */ 2003 for (i = 0; i < 4; i++) { 2004 if (!(write_mask & (1<<i))) 2005 continue; 2006 2007 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2008 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2009 2010 alu.dst.sel = tmp1; 2011 alu.dst.chan = i; 2012 alu.dst.write = 1; 2013 2014 alu.src[0].sel = tmp1; 2015 alu.src[0].chan = i; 2016 2017 alu.last = 1; 2018 r = r600_bytecode_add_alu(ctx->bc, &alu); 2019 if (r) 2020 return r; 2021 } 2022 2023 /* tmp1 = tmp0 * tmp1 */ 2024 for (i = 0; i < 4; i++) { 2025 if (!(write_mask & (1<<i))) 2026 continue; 2027 2028 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2029 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2030 2031 alu.dst.sel = tmp1; 2032 alu.dst.chan = i; 2033 alu.dst.write = 1; 2034 2035 alu.src[0].sel = ctx->temp_reg; 2036 alu.src[0].chan = i; 2037 2038 alu.src[1].sel = tmp1; 2039 alu.src[1].chan = i; 2040 2041 if (i == last_inst) 2042 alu.last = 1; 2043 r = r600_bytecode_add_alu(ctx->bc, &alu); 2044 if (r) 2045 return r; 2046 } 2047 2048 /* tmp1 = trunc(tmp1) for evergreen+ */ 2049 if (ctx->bc->chip_class >= EVERGREEN) { 2050 for (i = 0; i < 4; i++) { 2051 if (!(write_mask & (1<<i))) 2052 continue; 2053 2054 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2055 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC); 2056 2057 alu.dst.sel = tmp1; 2058 alu.dst.chan = i; 2059 alu.dst.write = 1; 2060 2061 alu.src[0].sel = tmp1; 2062 alu.src[0].chan = i; 2063 2064 if (i == last_inst) 2065 alu.last = 1; 2066 r = r600_bytecode_add_alu(ctx->bc, &alu); 2067 if (r) 2068 return r; 2069 } 2070 } 2071 2072 /* dst = int(tmp1) */ 2073 for (i = 0; i < 4; i++) { 2074 if (!(write_mask & (1<<i))) 2075 continue; 2076 2077 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2078 2079 if (unsigned_op) 2080 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT); 2081 else 2082 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT); 2083 2084 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2085 2086 alu.src[0].sel = tmp1; 2087 alu.src[0].chan = i; 2088 2089 if ((ctx->bc->chip_class < EVERGREEN || unsigned_op) || i == last_inst) 2090 alu.last = 1; 2091 r = r600_bytecode_add_alu(ctx->bc, &alu); 2092 if (r) 2093 return r; 2094 } 2095 2096 return 0; 2097} 2098 2099static int tgsi_f2i(struct r600_shader_ctx *ctx) 2100{ 2101 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2102 struct r600_bytecode_alu alu; 2103 int i, r; 2104 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2105 int last_inst = tgsi_last_instruction(write_mask); 2106 2107 for (i = 0; i < 4; i++) { 2108 if (!(write_mask & (1<<i))) 2109 continue; 2110 2111 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2112 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC); 2113 2114 alu.dst.sel = ctx->temp_reg; 2115 alu.dst.chan = i; 2116 alu.dst.write = 1; 2117 2118 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2119 if (i == last_inst) 2120 alu.last = 1; 2121 r = r600_bytecode_add_alu(ctx->bc, &alu); 2122 if (r) 2123 return r; 2124 } 2125 2126 for (i = 0; i < 4; i++) { 2127 if (!(write_mask & (1<<i))) 2128 continue; 2129 2130 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2131 alu.inst = ctx->inst_info->r600_opcode; 2132 2133 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2134 2135 alu.src[0].sel = ctx->temp_reg; 2136 alu.src[0].chan = i; 2137 2138 if (i == last_inst) 2139 alu.last = 1; 2140 r = r600_bytecode_add_alu(ctx->bc, &alu); 2141 if (r) 2142 return r; 2143 } 2144 2145 return 0; 2146} 2147 2148static int tgsi_iabs(struct r600_shader_ctx *ctx) 2149{ 2150 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2151 struct r600_bytecode_alu alu; 2152 int i, r; 2153 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2154 int last_inst = tgsi_last_instruction(write_mask); 2155 2156 /* tmp = -src */ 2157 for (i = 0; i < 4; i++) { 2158 if (!(write_mask & (1<<i))) 2159 continue; 2160 2161 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2162 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2163 2164 alu.dst.sel = ctx->temp_reg; 2165 alu.dst.chan = i; 2166 alu.dst.write = 1; 2167 2168 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2169 alu.src[0].sel = V_SQ_ALU_SRC_0; 2170 2171 if (i == last_inst) 2172 alu.last = 1; 2173 r = r600_bytecode_add_alu(ctx->bc, &alu); 2174 if (r) 2175 return r; 2176 } 2177 2178 /* dst = (src >= 0 ? src : tmp) */ 2179 for (i = 0; i < 4; i++) { 2180 if (!(write_mask & (1<<i))) 2181 continue; 2182 2183 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2184 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2185 alu.is_op3 = 1; 2186 alu.dst.write = 1; 2187 2188 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2189 2190 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2191 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2192 alu.src[2].sel = ctx->temp_reg; 2193 alu.src[2].chan = i; 2194 2195 if (i == last_inst) 2196 alu.last = 1; 2197 r = r600_bytecode_add_alu(ctx->bc, &alu); 2198 if (r) 2199 return r; 2200 } 2201 return 0; 2202} 2203 2204static int tgsi_issg(struct r600_shader_ctx *ctx) 2205{ 2206 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2207 struct r600_bytecode_alu alu; 2208 int i, r; 2209 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2210 int last_inst = tgsi_last_instruction(write_mask); 2211 2212 /* tmp = (src >= 0 ? src : -1) */ 2213 for (i = 0; i < 4; i++) { 2214 if (!(write_mask & (1<<i))) 2215 continue; 2216 2217 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2218 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2219 alu.is_op3 = 1; 2220 2221 alu.dst.sel = ctx->temp_reg; 2222 alu.dst.chan = i; 2223 alu.dst.write = 1; 2224 2225 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2226 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2227 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 2228 2229 if (i == last_inst) 2230 alu.last = 1; 2231 r = r600_bytecode_add_alu(ctx->bc, &alu); 2232 if (r) 2233 return r; 2234 } 2235 2236 /* dst = (tmp > 0 ? 1 : tmp) */ 2237 for (i = 0; i < 4; i++) { 2238 if (!(write_mask & (1<<i))) 2239 continue; 2240 2241 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2242 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT); 2243 alu.is_op3 = 1; 2244 alu.dst.write = 1; 2245 2246 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2247 2248 alu.src[0].sel = ctx->temp_reg; 2249 alu.src[0].chan = i; 2250 2251 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 2252 2253 alu.src[2].sel = ctx->temp_reg; 2254 alu.src[2].chan = i; 2255 2256 if (i == last_inst) 2257 alu.last = 1; 2258 r = r600_bytecode_add_alu(ctx->bc, &alu); 2259 if (r) 2260 return r; 2261 } 2262 return 0; 2263} 2264 2265 2266 2267static int tgsi_ssg(struct r600_shader_ctx *ctx) 2268{ 2269 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2270 struct r600_bytecode_alu alu; 2271 int i, r; 2272 2273 /* tmp = (src > 0 ? 1 : src) */ 2274 for (i = 0; i < 4; i++) { 2275 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2276 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 2277 alu.is_op3 = 1; 2278 2279 alu.dst.sel = ctx->temp_reg; 2280 alu.dst.chan = i; 2281 2282 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2283 alu.src[1].sel = V_SQ_ALU_SRC_1; 2284 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 2285 2286 if (i == 3) 2287 alu.last = 1; 2288 r = r600_bytecode_add_alu(ctx->bc, &alu); 2289 if (r) 2290 return r; 2291 } 2292 2293 /* dst = (-tmp > 0 ? -1 : tmp) */ 2294 for (i = 0; i < 4; i++) { 2295 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2296 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 2297 alu.is_op3 = 1; 2298 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2299 2300 alu.src[0].sel = ctx->temp_reg; 2301 alu.src[0].chan = i; 2302 alu.src[0].neg = 1; 2303 2304 alu.src[1].sel = V_SQ_ALU_SRC_1; 2305 alu.src[1].neg = 1; 2306 2307 alu.src[2].sel = ctx->temp_reg; 2308 alu.src[2].chan = i; 2309 2310 if (i == 3) 2311 alu.last = 1; 2312 r = r600_bytecode_add_alu(ctx->bc, &alu); 2313 if (r) 2314 return r; 2315 } 2316 return 0; 2317} 2318 2319static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 2320{ 2321 struct r600_bytecode_alu alu; 2322 int i, r; 2323 2324 for (i = 0; i < 4; i++) { 2325 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2326 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 2327 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 2328 alu.dst.chan = i; 2329 } else { 2330 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2331 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2332 alu.src[0].sel = ctx->temp_reg; 2333 alu.src[0].chan = i; 2334 } 2335 if (i == 3) { 2336 alu.last = 1; 2337 } 2338 r = r600_bytecode_add_alu(ctx->bc, &alu); 2339 if (r) 2340 return r; 2341 } 2342 return 0; 2343} 2344 2345static int tgsi_op3(struct r600_shader_ctx *ctx) 2346{ 2347 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2348 struct r600_bytecode_alu alu; 2349 int i, j, r; 2350 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2351 2352 for (i = 0; i < lasti + 1; i++) { 2353 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2354 continue; 2355 2356 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2357 alu.inst = ctx->inst_info->r600_opcode; 2358 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2359 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 2360 } 2361 2362 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2363 alu.dst.chan = i; 2364 alu.dst.write = 1; 2365 alu.is_op3 = 1; 2366 if (i == lasti) { 2367 alu.last = 1; 2368 } 2369 r = r600_bytecode_add_alu(ctx->bc, &alu); 2370 if (r) 2371 return r; 2372 } 2373 return 0; 2374} 2375 2376static int tgsi_dp(struct r600_shader_ctx *ctx) 2377{ 2378 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2379 struct r600_bytecode_alu alu; 2380 int i, j, r; 2381 2382 for (i = 0; i < 4; i++) { 2383 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2384 alu.inst = ctx->inst_info->r600_opcode; 2385 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2386 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 2387 } 2388 2389 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2390 alu.dst.chan = i; 2391 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 2392 /* handle some special cases */ 2393 switch (ctx->inst_info->tgsi_opcode) { 2394 case TGSI_OPCODE_DP2: 2395 if (i > 1) { 2396 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 2397 alu.src[0].chan = alu.src[1].chan = 0; 2398 } 2399 break; 2400 case TGSI_OPCODE_DP3: 2401 if (i > 2) { 2402 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 2403 alu.src[0].chan = alu.src[1].chan = 0; 2404 } 2405 break; 2406 case TGSI_OPCODE_DPH: 2407 if (i == 3) { 2408 alu.src[0].sel = V_SQ_ALU_SRC_1; 2409 alu.src[0].chan = 0; 2410 alu.src[0].neg = 0; 2411 } 2412 break; 2413 default: 2414 break; 2415 } 2416 if (i == 3) { 2417 alu.last = 1; 2418 } 2419 r = r600_bytecode_add_alu(ctx->bc, &alu); 2420 if (r) 2421 return r; 2422 } 2423 return 0; 2424} 2425 2426static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 2427 unsigned index) 2428{ 2429 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2430 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 2431 inst->Src[index].Register.File != TGSI_FILE_INPUT) || 2432 ctx->src[index].neg || ctx->src[index].abs; 2433} 2434 2435static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 2436 unsigned index) 2437{ 2438 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2439 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 2440} 2441 2442static int tgsi_tex(struct r600_shader_ctx *ctx) 2443{ 2444 static float one_point_five = 1.5f; 2445 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2446 struct r600_bytecode_tex tex; 2447 struct r600_bytecode_alu alu; 2448 unsigned src_gpr; 2449 int r, i, j; 2450 int opcode; 2451 /* Texture fetch instructions can only use gprs as source. 2452 * Also they cannot negate the source or take the absolute value */ 2453 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0); 2454 boolean src_loaded = FALSE; 2455 unsigned sampler_src_reg = 1; 2456 u8 offset_x = 0, offset_y = 0, offset_z = 0; 2457 2458 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 2459 2460 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 2461 /* get offset values */ 2462 if (inst->Texture.NumOffsets) { 2463 assert(inst->Texture.NumOffsets == 1); 2464 2465 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 2466 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 2467 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 2468 } 2469 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 2470 /* TGSI moves the sampler to src reg 3 for TXD */ 2471 sampler_src_reg = 3; 2472 2473 for (i = 1; i < 3; i++) { 2474 /* set gradients h/v */ 2475 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 2476 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : 2477 SQ_TEX_INST_SET_GRADIENTS_V; 2478 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 2479 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 2480 2481 if (tgsi_tex_src_requires_loading(ctx, i)) { 2482 tex.src_gpr = r600_get_temp(ctx); 2483 tex.src_sel_x = 0; 2484 tex.src_sel_y = 1; 2485 tex.src_sel_z = 2; 2486 tex.src_sel_w = 3; 2487 2488 for (j = 0; j < 4; j++) { 2489 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2490 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2491 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 2492 alu.dst.sel = tex.src_gpr; 2493 alu.dst.chan = j; 2494 if (j == 3) 2495 alu.last = 1; 2496 alu.dst.write = 1; 2497 r = r600_bytecode_add_alu(ctx->bc, &alu); 2498 if (r) 2499 return r; 2500 } 2501 2502 } else { 2503 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i); 2504 tex.src_sel_x = ctx->src[i].swizzle[0]; 2505 tex.src_sel_y = ctx->src[i].swizzle[1]; 2506 tex.src_sel_z = ctx->src[i].swizzle[2]; 2507 tex.src_sel_w = ctx->src[i].swizzle[3]; 2508 tex.src_rel = ctx->src[i].rel; 2509 } 2510 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */ 2511 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 2512 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 2513 tex.coord_type_x = 1; 2514 tex.coord_type_y = 1; 2515 tex.coord_type_z = 1; 2516 tex.coord_type_w = 1; 2517 } 2518 r = r600_bytecode_add_tex(ctx->bc, &tex); 2519 if (r) 2520 return r; 2521 } 2522 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 2523 int out_chan; 2524 /* Add perspective divide */ 2525 if (ctx->bc->chip_class == CAYMAN) { 2526 out_chan = 2; 2527 for (i = 0; i < 3; i++) { 2528 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2529 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2530 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 2531 2532 alu.dst.sel = ctx->temp_reg; 2533 alu.dst.chan = i; 2534 if (i == 2) 2535 alu.last = 1; 2536 if (out_chan == i) 2537 alu.dst.write = 1; 2538 r = r600_bytecode_add_alu(ctx->bc, &alu); 2539 if (r) 2540 return r; 2541 } 2542 2543 } else { 2544 out_chan = 3; 2545 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2546 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2547 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 2548 2549 alu.dst.sel = ctx->temp_reg; 2550 alu.dst.chan = out_chan; 2551 alu.last = 1; 2552 alu.dst.write = 1; 2553 r = r600_bytecode_add_alu(ctx->bc, &alu); 2554 if (r) 2555 return r; 2556 } 2557 2558 for (i = 0; i < 3; i++) { 2559 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2560 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2561 alu.src[0].sel = ctx->temp_reg; 2562 alu.src[0].chan = out_chan; 2563 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2564 alu.dst.sel = ctx->temp_reg; 2565 alu.dst.chan = i; 2566 alu.dst.write = 1; 2567 r = r600_bytecode_add_alu(ctx->bc, &alu); 2568 if (r) 2569 return r; 2570 } 2571 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2572 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2573 alu.src[0].sel = V_SQ_ALU_SRC_1; 2574 alu.src[0].chan = 0; 2575 alu.dst.sel = ctx->temp_reg; 2576 alu.dst.chan = 3; 2577 alu.last = 1; 2578 alu.dst.write = 1; 2579 r = r600_bytecode_add_alu(ctx->bc, &alu); 2580 if (r) 2581 return r; 2582 src_loaded = TRUE; 2583 src_gpr = ctx->temp_reg; 2584 } 2585 2586 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 2587 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 2588 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 2589 2590 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 2591 for (i = 0; i < 4; i++) { 2592 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2593 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 2594 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 2595 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 2596 alu.dst.sel = ctx->temp_reg; 2597 alu.dst.chan = i; 2598 if (i == 3) 2599 alu.last = 1; 2600 alu.dst.write = 1; 2601 r = r600_bytecode_add_alu(ctx->bc, &alu); 2602 if (r) 2603 return r; 2604 } 2605 2606 /* tmp1.z = RCP_e(|tmp1.z|) */ 2607 if (ctx->bc->chip_class == CAYMAN) { 2608 for (i = 0; i < 3; i++) { 2609 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2610 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2611 alu.src[0].sel = ctx->temp_reg; 2612 alu.src[0].chan = 2; 2613 alu.src[0].abs = 1; 2614 alu.dst.sel = ctx->temp_reg; 2615 alu.dst.chan = i; 2616 if (i == 2) 2617 alu.dst.write = 1; 2618 if (i == 2) 2619 alu.last = 1; 2620 r = r600_bytecode_add_alu(ctx->bc, &alu); 2621 if (r) 2622 return r; 2623 } 2624 } else { 2625 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2626 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2627 alu.src[0].sel = ctx->temp_reg; 2628 alu.src[0].chan = 2; 2629 alu.src[0].abs = 1; 2630 alu.dst.sel = ctx->temp_reg; 2631 alu.dst.chan = 2; 2632 alu.dst.write = 1; 2633 alu.last = 1; 2634 r = r600_bytecode_add_alu(ctx->bc, &alu); 2635 if (r) 2636 return r; 2637 } 2638 2639 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 2640 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 2641 * muladd has no writemask, have to use another temp 2642 */ 2643 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2644 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2645 alu.is_op3 = 1; 2646 2647 alu.src[0].sel = ctx->temp_reg; 2648 alu.src[0].chan = 0; 2649 alu.src[1].sel = ctx->temp_reg; 2650 alu.src[1].chan = 2; 2651 2652 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 2653 alu.src[2].chan = 0; 2654 alu.src[2].value = *(uint32_t *)&one_point_five; 2655 2656 alu.dst.sel = ctx->temp_reg; 2657 alu.dst.chan = 0; 2658 alu.dst.write = 1; 2659 2660 r = r600_bytecode_add_alu(ctx->bc, &alu); 2661 if (r) 2662 return r; 2663 2664 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2665 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2666 alu.is_op3 = 1; 2667 2668 alu.src[0].sel = ctx->temp_reg; 2669 alu.src[0].chan = 1; 2670 alu.src[1].sel = ctx->temp_reg; 2671 alu.src[1].chan = 2; 2672 2673 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 2674 alu.src[2].chan = 0; 2675 alu.src[2].value = *(uint32_t *)&one_point_five; 2676 2677 alu.dst.sel = ctx->temp_reg; 2678 alu.dst.chan = 1; 2679 alu.dst.write = 1; 2680 2681 alu.last = 1; 2682 r = r600_bytecode_add_alu(ctx->bc, &alu); 2683 if (r) 2684 return r; 2685 2686 src_loaded = TRUE; 2687 src_gpr = ctx->temp_reg; 2688 } 2689 2690 if (src_requires_loading && !src_loaded) { 2691 for (i = 0; i < 4; i++) { 2692 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2693 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2694 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2695 alu.dst.sel = ctx->temp_reg; 2696 alu.dst.chan = i; 2697 if (i == 3) 2698 alu.last = 1; 2699 alu.dst.write = 1; 2700 r = r600_bytecode_add_alu(ctx->bc, &alu); 2701 if (r) 2702 return r; 2703 } 2704 src_loaded = TRUE; 2705 src_gpr = ctx->temp_reg; 2706 } 2707 2708 opcode = ctx->inst_info->r600_opcode; 2709 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 2710 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 2711 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 2712 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 2713 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) { 2714 switch (opcode) { 2715 case SQ_TEX_INST_SAMPLE: 2716 opcode = SQ_TEX_INST_SAMPLE_C; 2717 break; 2718 case SQ_TEX_INST_SAMPLE_L: 2719 opcode = SQ_TEX_INST_SAMPLE_C_L; 2720 break; 2721 case SQ_TEX_INST_SAMPLE_LB: 2722 opcode = SQ_TEX_INST_SAMPLE_C_LB; 2723 break; 2724 case SQ_TEX_INST_SAMPLE_G: 2725 opcode = SQ_TEX_INST_SAMPLE_C_G; 2726 break; 2727 } 2728 } 2729 2730 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 2731 tex.inst = opcode; 2732 2733 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 2734 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 2735 tex.src_gpr = src_gpr; 2736 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 2737 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 2738 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 2739 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 2740 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 2741 if (src_loaded) { 2742 tex.src_sel_x = 0; 2743 tex.src_sel_y = 1; 2744 tex.src_sel_z = 2; 2745 tex.src_sel_w = 3; 2746 } else { 2747 tex.src_sel_x = ctx->src[0].swizzle[0]; 2748 tex.src_sel_y = ctx->src[0].swizzle[1]; 2749 tex.src_sel_z = ctx->src[0].swizzle[2]; 2750 tex.src_sel_w = ctx->src[0].swizzle[3]; 2751 tex.src_rel = ctx->src[0].rel; 2752 } 2753 2754 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 2755 tex.src_sel_x = 1; 2756 tex.src_sel_y = 0; 2757 tex.src_sel_z = 3; 2758 tex.src_sel_w = 1; 2759 } 2760 2761 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 2762 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 2763 tex.coord_type_x = 1; 2764 tex.coord_type_y = 1; 2765 } 2766 tex.coord_type_z = 1; 2767 tex.coord_type_w = 1; 2768 2769 tex.offset_x = offset_x; 2770 tex.offset_y = offset_y; 2771 tex.offset_z = offset_z; 2772 2773 /* Put the depth for comparison in W. 2774 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 2775 * Some instructions expect the depth in Z. */ 2776 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 2777 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 2778 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 2779 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 2780 opcode != SQ_TEX_INST_SAMPLE_C_L && 2781 opcode != SQ_TEX_INST_SAMPLE_C_LB) { 2782 tex.src_sel_w = tex.src_sel_z; 2783 } 2784 2785 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 2786 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 2787 if (opcode == SQ_TEX_INST_SAMPLE_C_L || 2788 opcode == SQ_TEX_INST_SAMPLE_C_LB) { 2789 /* the array index is read from Y */ 2790 tex.coord_type_y = 0; 2791 } else { 2792 /* the array index is read from Z */ 2793 tex.coord_type_z = 0; 2794 tex.src_sel_z = tex.src_sel_y; 2795 } 2796 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 2797 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) 2798 /* the array index is read from Z */ 2799 tex.coord_type_z = 0; 2800 2801 r = r600_bytecode_add_tex(ctx->bc, &tex); 2802 if (r) 2803 return r; 2804 2805 /* add shadow ambient support - gallium doesn't do it yet */ 2806 return 0; 2807} 2808 2809static int tgsi_lrp(struct r600_shader_ctx *ctx) 2810{ 2811 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2812 struct r600_bytecode_alu alu; 2813 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2814 unsigned i; 2815 int r; 2816 2817 /* optimize if it's just an equal balance */ 2818 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 2819 for (i = 0; i < lasti + 1; i++) { 2820 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2821 continue; 2822 2823 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2824 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2825 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2826 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 2827 alu.omod = 3; 2828 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2829 alu.dst.chan = i; 2830 if (i == lasti) { 2831 alu.last = 1; 2832 } 2833 r = r600_bytecode_add_alu(ctx->bc, &alu); 2834 if (r) 2835 return r; 2836 } 2837 return 0; 2838 } 2839 2840 /* 1 - src0 */ 2841 for (i = 0; i < lasti + 1; i++) { 2842 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2843 continue; 2844 2845 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2846 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2847 alu.src[0].sel = V_SQ_ALU_SRC_1; 2848 alu.src[0].chan = 0; 2849 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2850 r600_bytecode_src_toggle_neg(&alu.src[1]); 2851 alu.dst.sel = ctx->temp_reg; 2852 alu.dst.chan = i; 2853 if (i == lasti) { 2854 alu.last = 1; 2855 } 2856 alu.dst.write = 1; 2857 r = r600_bytecode_add_alu(ctx->bc, &alu); 2858 if (r) 2859 return r; 2860 } 2861 2862 /* (1 - src0) * src2 */ 2863 for (i = 0; i < lasti + 1; i++) { 2864 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2865 continue; 2866 2867 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2868 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2869 alu.src[0].sel = ctx->temp_reg; 2870 alu.src[0].chan = i; 2871 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 2872 alu.dst.sel = ctx->temp_reg; 2873 alu.dst.chan = i; 2874 if (i == lasti) { 2875 alu.last = 1; 2876 } 2877 alu.dst.write = 1; 2878 r = r600_bytecode_add_alu(ctx->bc, &alu); 2879 if (r) 2880 return r; 2881 } 2882 2883 /* src0 * src1 + (1 - src0) * src2 */ 2884 for (i = 0; i < lasti + 1; i++) { 2885 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2886 continue; 2887 2888 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2889 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2890 alu.is_op3 = 1; 2891 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2892 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2893 alu.src[2].sel = ctx->temp_reg; 2894 alu.src[2].chan = i; 2895 2896 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2897 alu.dst.chan = i; 2898 if (i == lasti) { 2899 alu.last = 1; 2900 } 2901 r = r600_bytecode_add_alu(ctx->bc, &alu); 2902 if (r) 2903 return r; 2904 } 2905 return 0; 2906} 2907 2908static int tgsi_cmp(struct r600_shader_ctx *ctx) 2909{ 2910 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2911 struct r600_bytecode_alu alu; 2912 int i, r; 2913 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2914 2915 for (i = 0; i < lasti + 1; i++) { 2916 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2917 continue; 2918 2919 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2920 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2921 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2922 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 2923 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 2924 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2925 alu.dst.chan = i; 2926 alu.dst.write = 1; 2927 alu.is_op3 = 1; 2928 if (i == lasti) 2929 alu.last = 1; 2930 r = r600_bytecode_add_alu(ctx->bc, &alu); 2931 if (r) 2932 return r; 2933 } 2934 return 0; 2935} 2936 2937static int tgsi_xpd(struct r600_shader_ctx *ctx) 2938{ 2939 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2940 static const unsigned int src0_swizzle[] = {2, 0, 1}; 2941 static const unsigned int src1_swizzle[] = {1, 2, 0}; 2942 struct r600_bytecode_alu alu; 2943 uint32_t use_temp = 0; 2944 int i, r; 2945 2946 if (inst->Dst[0].Register.WriteMask != 0xf) 2947 use_temp = 1; 2948 2949 for (i = 0; i < 4; i++) { 2950 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2951 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2952 if (i < 3) { 2953 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 2954 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 2955 } else { 2956 alu.src[0].sel = V_SQ_ALU_SRC_0; 2957 alu.src[0].chan = i; 2958 alu.src[1].sel = V_SQ_ALU_SRC_0; 2959 alu.src[1].chan = i; 2960 } 2961 2962 alu.dst.sel = ctx->temp_reg; 2963 alu.dst.chan = i; 2964 alu.dst.write = 1; 2965 2966 if (i == 3) 2967 alu.last = 1; 2968 r = r600_bytecode_add_alu(ctx->bc, &alu); 2969 if (r) 2970 return r; 2971 } 2972 2973 for (i = 0; i < 4; i++) { 2974 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2975 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2976 2977 if (i < 3) { 2978 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 2979 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 2980 } else { 2981 alu.src[0].sel = V_SQ_ALU_SRC_0; 2982 alu.src[0].chan = i; 2983 alu.src[1].sel = V_SQ_ALU_SRC_0; 2984 alu.src[1].chan = i; 2985 } 2986 2987 alu.src[2].sel = ctx->temp_reg; 2988 alu.src[2].neg = 1; 2989 alu.src[2].chan = i; 2990 2991 if (use_temp) 2992 alu.dst.sel = ctx->temp_reg; 2993 else 2994 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2995 alu.dst.chan = i; 2996 alu.dst.write = 1; 2997 alu.is_op3 = 1; 2998 if (i == 3) 2999 alu.last = 1; 3000 r = r600_bytecode_add_alu(ctx->bc, &alu); 3001 if (r) 3002 return r; 3003 } 3004 if (use_temp) 3005 return tgsi_helper_copy(ctx, inst); 3006 return 0; 3007} 3008 3009static int tgsi_exp(struct r600_shader_ctx *ctx) 3010{ 3011 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3012 struct r600_bytecode_alu alu; 3013 int r; 3014 int i; 3015 3016 /* result.x = 2^floor(src); */ 3017 if (inst->Dst[0].Register.WriteMask & 1) { 3018 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3019 3020 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 3021 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3022 3023 alu.dst.sel = ctx->temp_reg; 3024 alu.dst.chan = 0; 3025 alu.dst.write = 1; 3026 alu.last = 1; 3027 r = r600_bytecode_add_alu(ctx->bc, &alu); 3028 if (r) 3029 return r; 3030 3031 if (ctx->bc->chip_class == CAYMAN) { 3032 for (i = 0; i < 3; i++) { 3033 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3034 alu.src[0].sel = ctx->temp_reg; 3035 alu.src[0].chan = 0; 3036 3037 alu.dst.sel = ctx->temp_reg; 3038 alu.dst.chan = i; 3039 if (i == 0) 3040 alu.dst.write = 1; 3041 if (i == 2) 3042 alu.last = 1; 3043 r = r600_bytecode_add_alu(ctx->bc, &alu); 3044 if (r) 3045 return r; 3046 } 3047 } else { 3048 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3049 alu.src[0].sel = ctx->temp_reg; 3050 alu.src[0].chan = 0; 3051 3052 alu.dst.sel = ctx->temp_reg; 3053 alu.dst.chan = 0; 3054 alu.dst.write = 1; 3055 alu.last = 1; 3056 r = r600_bytecode_add_alu(ctx->bc, &alu); 3057 if (r) 3058 return r; 3059 } 3060 } 3061 3062 /* result.y = tmp - floor(tmp); */ 3063 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 3064 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3065 3066 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 3067 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3068 3069 alu.dst.sel = ctx->temp_reg; 3070#if 0 3071 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3072 if (r) 3073 return r; 3074#endif 3075 alu.dst.write = 1; 3076 alu.dst.chan = 1; 3077 3078 alu.last = 1; 3079 3080 r = r600_bytecode_add_alu(ctx->bc, &alu); 3081 if (r) 3082 return r; 3083 } 3084 3085 /* result.z = RoughApprox2ToX(tmp);*/ 3086 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 3087 if (ctx->bc->chip_class == CAYMAN) { 3088 for (i = 0; i < 3; i++) { 3089 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3090 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3091 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3092 3093 alu.dst.sel = ctx->temp_reg; 3094 alu.dst.chan = i; 3095 if (i == 2) { 3096 alu.dst.write = 1; 3097 alu.last = 1; 3098 } 3099 3100 r = r600_bytecode_add_alu(ctx->bc, &alu); 3101 if (r) 3102 return r; 3103 } 3104 } else { 3105 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3106 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3107 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3108 3109 alu.dst.sel = ctx->temp_reg; 3110 alu.dst.write = 1; 3111 alu.dst.chan = 2; 3112 3113 alu.last = 1; 3114 3115 r = r600_bytecode_add_alu(ctx->bc, &alu); 3116 if (r) 3117 return r; 3118 } 3119 } 3120 3121 /* result.w = 1.0;*/ 3122 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 3123 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3124 3125 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3126 alu.src[0].sel = V_SQ_ALU_SRC_1; 3127 alu.src[0].chan = 0; 3128 3129 alu.dst.sel = ctx->temp_reg; 3130 alu.dst.chan = 3; 3131 alu.dst.write = 1; 3132 alu.last = 1; 3133 r = r600_bytecode_add_alu(ctx->bc, &alu); 3134 if (r) 3135 return r; 3136 } 3137 return tgsi_helper_copy(ctx, inst); 3138} 3139 3140static int tgsi_log(struct r600_shader_ctx *ctx) 3141{ 3142 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3143 struct r600_bytecode_alu alu; 3144 int r; 3145 int i; 3146 3147 /* result.x = floor(log2(|src|)); */ 3148 if (inst->Dst[0].Register.WriteMask & 1) { 3149 if (ctx->bc->chip_class == CAYMAN) { 3150 for (i = 0; i < 3; i++) { 3151 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3152 3153 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3154 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3155 r600_bytecode_src_set_abs(&alu.src[0]); 3156 3157 alu.dst.sel = ctx->temp_reg; 3158 alu.dst.chan = i; 3159 if (i == 0) 3160 alu.dst.write = 1; 3161 if (i == 2) 3162 alu.last = 1; 3163 r = r600_bytecode_add_alu(ctx->bc, &alu); 3164 if (r) 3165 return r; 3166 } 3167 3168 } else { 3169 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3170 3171 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3172 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3173 r600_bytecode_src_set_abs(&alu.src[0]); 3174 3175 alu.dst.sel = ctx->temp_reg; 3176 alu.dst.chan = 0; 3177 alu.dst.write = 1; 3178 alu.last = 1; 3179 r = r600_bytecode_add_alu(ctx->bc, &alu); 3180 if (r) 3181 return r; 3182 } 3183 3184 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 3185 alu.src[0].sel = ctx->temp_reg; 3186 alu.src[0].chan = 0; 3187 3188 alu.dst.sel = ctx->temp_reg; 3189 alu.dst.chan = 0; 3190 alu.dst.write = 1; 3191 alu.last = 1; 3192 3193 r = r600_bytecode_add_alu(ctx->bc, &alu); 3194 if (r) 3195 return r; 3196 } 3197 3198 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 3199 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 3200 3201 if (ctx->bc->chip_class == CAYMAN) { 3202 for (i = 0; i < 3; i++) { 3203 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3204 3205 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3206 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3207 r600_bytecode_src_set_abs(&alu.src[0]); 3208 3209 alu.dst.sel = ctx->temp_reg; 3210 alu.dst.chan = i; 3211 if (i == 1) 3212 alu.dst.write = 1; 3213 if (i == 2) 3214 alu.last = 1; 3215 3216 r = r600_bytecode_add_alu(ctx->bc, &alu); 3217 if (r) 3218 return r; 3219 } 3220 } else { 3221 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3222 3223 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3224 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3225 r600_bytecode_src_set_abs(&alu.src[0]); 3226 3227 alu.dst.sel = ctx->temp_reg; 3228 alu.dst.chan = 1; 3229 alu.dst.write = 1; 3230 alu.last = 1; 3231 3232 r = r600_bytecode_add_alu(ctx->bc, &alu); 3233 if (r) 3234 return r; 3235 } 3236 3237 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3238 3239 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 3240 alu.src[0].sel = ctx->temp_reg; 3241 alu.src[0].chan = 1; 3242 3243 alu.dst.sel = ctx->temp_reg; 3244 alu.dst.chan = 1; 3245 alu.dst.write = 1; 3246 alu.last = 1; 3247 3248 r = r600_bytecode_add_alu(ctx->bc, &alu); 3249 if (r) 3250 return r; 3251 3252 if (ctx->bc->chip_class == CAYMAN) { 3253 for (i = 0; i < 3; i++) { 3254 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3255 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3256 alu.src[0].sel = ctx->temp_reg; 3257 alu.src[0].chan = 1; 3258 3259 alu.dst.sel = ctx->temp_reg; 3260 alu.dst.chan = i; 3261 if (i == 1) 3262 alu.dst.write = 1; 3263 if (i == 2) 3264 alu.last = 1; 3265 3266 r = r600_bytecode_add_alu(ctx->bc, &alu); 3267 if (r) 3268 return r; 3269 } 3270 } else { 3271 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3272 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3273 alu.src[0].sel = ctx->temp_reg; 3274 alu.src[0].chan = 1; 3275 3276 alu.dst.sel = ctx->temp_reg; 3277 alu.dst.chan = 1; 3278 alu.dst.write = 1; 3279 alu.last = 1; 3280 3281 r = r600_bytecode_add_alu(ctx->bc, &alu); 3282 if (r) 3283 return r; 3284 } 3285 3286 if (ctx->bc->chip_class == CAYMAN) { 3287 for (i = 0; i < 3; i++) { 3288 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3289 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3290 alu.src[0].sel = ctx->temp_reg; 3291 alu.src[0].chan = 1; 3292 3293 alu.dst.sel = ctx->temp_reg; 3294 alu.dst.chan = i; 3295 if (i == 1) 3296 alu.dst.write = 1; 3297 if (i == 2) 3298 alu.last = 1; 3299 3300 r = r600_bytecode_add_alu(ctx->bc, &alu); 3301 if (r) 3302 return r; 3303 } 3304 } else { 3305 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3306 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3307 alu.src[0].sel = ctx->temp_reg; 3308 alu.src[0].chan = 1; 3309 3310 alu.dst.sel = ctx->temp_reg; 3311 alu.dst.chan = 1; 3312 alu.dst.write = 1; 3313 alu.last = 1; 3314 3315 r = r600_bytecode_add_alu(ctx->bc, &alu); 3316 if (r) 3317 return r; 3318 } 3319 3320 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3321 3322 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 3323 3324 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3325 r600_bytecode_src_set_abs(&alu.src[0]); 3326 3327 alu.src[1].sel = ctx->temp_reg; 3328 alu.src[1].chan = 1; 3329 3330 alu.dst.sel = ctx->temp_reg; 3331 alu.dst.chan = 1; 3332 alu.dst.write = 1; 3333 alu.last = 1; 3334 3335 r = r600_bytecode_add_alu(ctx->bc, &alu); 3336 if (r) 3337 return r; 3338 } 3339 3340 /* result.z = log2(|src|);*/ 3341 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 3342 if (ctx->bc->chip_class == CAYMAN) { 3343 for (i = 0; i < 3; i++) { 3344 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3345 3346 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3347 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3348 r600_bytecode_src_set_abs(&alu.src[0]); 3349 3350 alu.dst.sel = ctx->temp_reg; 3351 if (i == 2) 3352 alu.dst.write = 1; 3353 alu.dst.chan = i; 3354 if (i == 2) 3355 alu.last = 1; 3356 3357 r = r600_bytecode_add_alu(ctx->bc, &alu); 3358 if (r) 3359 return r; 3360 } 3361 } else { 3362 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3363 3364 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3365 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3366 r600_bytecode_src_set_abs(&alu.src[0]); 3367 3368 alu.dst.sel = ctx->temp_reg; 3369 alu.dst.write = 1; 3370 alu.dst.chan = 2; 3371 alu.last = 1; 3372 3373 r = r600_bytecode_add_alu(ctx->bc, &alu); 3374 if (r) 3375 return r; 3376 } 3377 } 3378 3379 /* result.w = 1.0; */ 3380 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 3381 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3382 3383 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3384 alu.src[0].sel = V_SQ_ALU_SRC_1; 3385 alu.src[0].chan = 0; 3386 3387 alu.dst.sel = ctx->temp_reg; 3388 alu.dst.chan = 3; 3389 alu.dst.write = 1; 3390 alu.last = 1; 3391 3392 r = r600_bytecode_add_alu(ctx->bc, &alu); 3393 if (r) 3394 return r; 3395 } 3396 3397 return tgsi_helper_copy(ctx, inst); 3398} 3399 3400static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 3401{ 3402 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3403 struct r600_bytecode_alu alu; 3404 int r; 3405 3406 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3407 3408 switch (inst->Instruction.Opcode) { 3409 case TGSI_OPCODE_ARL: 3410 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 3411 break; 3412 case TGSI_OPCODE_ARR: 3413 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 3414 break; 3415 case TGSI_OPCODE_UARL: 3416 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 3417 break; 3418 default: 3419 assert(0); 3420 return -1; 3421 } 3422 3423 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3424 alu.last = 1; 3425 alu.dst.sel = ctx->bc->ar_reg; 3426 alu.dst.write = 1; 3427 r = r600_bytecode_add_alu(ctx->bc, &alu); 3428 if (r) 3429 return r; 3430 3431 ctx->bc->ar_loaded = 0; 3432 return 0; 3433} 3434static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 3435{ 3436 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3437 struct r600_bytecode_alu alu; 3438 int r; 3439 3440 switch (inst->Instruction.Opcode) { 3441 case TGSI_OPCODE_ARL: 3442 memset(&alu, 0, sizeof(alu)); 3443 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 3444 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3445 alu.dst.sel = ctx->bc->ar_reg; 3446 alu.dst.write = 1; 3447 alu.last = 1; 3448 3449 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3450 return r; 3451 3452 memset(&alu, 0, sizeof(alu)); 3453 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 3454 alu.src[0].sel = ctx->bc->ar_reg; 3455 alu.dst.sel = ctx->bc->ar_reg; 3456 alu.dst.write = 1; 3457 alu.last = 1; 3458 3459 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3460 return r; 3461 break; 3462 case TGSI_OPCODE_ARR: 3463 memset(&alu, 0, sizeof(alu)); 3464 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 3465 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3466 alu.dst.sel = ctx->bc->ar_reg; 3467 alu.dst.write = 1; 3468 alu.last = 1; 3469 3470 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3471 return r; 3472 break; 3473 case TGSI_OPCODE_UARL: 3474 memset(&alu, 0, sizeof(alu)); 3475 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 3476 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3477 alu.dst.sel = ctx->bc->ar_reg; 3478 alu.dst.write = 1; 3479 alu.last = 1; 3480 3481 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3482 return r; 3483 break; 3484 default: 3485 assert(0); 3486 return -1; 3487 } 3488 3489 ctx->bc->ar_loaded = 0; 3490 return 0; 3491} 3492 3493static int tgsi_opdst(struct r600_shader_ctx *ctx) 3494{ 3495 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3496 struct r600_bytecode_alu alu; 3497 int i, r = 0; 3498 3499 for (i = 0; i < 4; i++) { 3500 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3501 3502 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 3503 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3504 3505 if (i == 0 || i == 3) { 3506 alu.src[0].sel = V_SQ_ALU_SRC_1; 3507 } else { 3508 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3509 } 3510 3511 if (i == 0 || i == 2) { 3512 alu.src[1].sel = V_SQ_ALU_SRC_1; 3513 } else { 3514 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3515 } 3516 if (i == 3) 3517 alu.last = 1; 3518 r = r600_bytecode_add_alu(ctx->bc, &alu); 3519 if (r) 3520 return r; 3521 } 3522 return 0; 3523} 3524 3525static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 3526{ 3527 struct r600_bytecode_alu alu; 3528 int r; 3529 3530 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3531 alu.inst = opcode; 3532 alu.predicate = 1; 3533 3534 alu.dst.sel = ctx->temp_reg; 3535 alu.dst.write = 1; 3536 alu.dst.chan = 0; 3537 3538 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3539 alu.src[1].sel = V_SQ_ALU_SRC_0; 3540 alu.src[1].chan = 0; 3541 3542 alu.last = 1; 3543 3544 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 3545 if (r) 3546 return r; 3547 return 0; 3548} 3549 3550static int pops(struct r600_shader_ctx *ctx, int pops) 3551{ 3552 unsigned force_pop = ctx->bc->force_add_cf; 3553 3554 if (!force_pop) { 3555 int alu_pop = 3; 3556 if (ctx->bc->cf_last) { 3557 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)) 3558 alu_pop = 0; 3559 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER)) 3560 alu_pop = 1; 3561 } 3562 alu_pop += pops; 3563 if (alu_pop == 1) { 3564 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER); 3565 ctx->bc->force_add_cf = 1; 3566 } else if (alu_pop == 2) { 3567 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER); 3568 ctx->bc->force_add_cf = 1; 3569 } else { 3570 force_pop = 1; 3571 } 3572 } 3573 3574 if (force_pop) { 3575 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 3576 ctx->bc->cf_last->pop_count = pops; 3577 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 3578 } 3579 3580 return 0; 3581} 3582 3583static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 3584{ 3585 switch(reason) { 3586 case FC_PUSH_VPM: 3587 ctx->bc->callstack[ctx->bc->call_sp].current--; 3588 break; 3589 case FC_PUSH_WQM: 3590 case FC_LOOP: 3591 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 3592 break; 3593 case FC_REP: 3594 /* TOODO : for 16 vp asic should -= 2; */ 3595 ctx->bc->callstack[ctx->bc->call_sp].current --; 3596 break; 3597 } 3598} 3599 3600static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 3601{ 3602 if (check_max_only) { 3603 int diff; 3604 switch (reason) { 3605 case FC_PUSH_VPM: 3606 diff = 1; 3607 break; 3608 case FC_PUSH_WQM: 3609 diff = 4; 3610 break; 3611 default: 3612 assert(0); 3613 diff = 0; 3614 } 3615 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 3616 ctx->bc->callstack[ctx->bc->call_sp].max) { 3617 ctx->bc->callstack[ctx->bc->call_sp].max = 3618 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 3619 } 3620 return; 3621 } 3622 switch (reason) { 3623 case FC_PUSH_VPM: 3624 ctx->bc->callstack[ctx->bc->call_sp].current++; 3625 break; 3626 case FC_PUSH_WQM: 3627 case FC_LOOP: 3628 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 3629 break; 3630 case FC_REP: 3631 ctx->bc->callstack[ctx->bc->call_sp].current++; 3632 break; 3633 } 3634 3635 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 3636 ctx->bc->callstack[ctx->bc->call_sp].max) { 3637 ctx->bc->callstack[ctx->bc->call_sp].max = 3638 ctx->bc->callstack[ctx->bc->call_sp].current; 3639 } 3640} 3641 3642static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 3643{ 3644 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 3645 3646 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid, 3647 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 3648 sp->mid[sp->num_mid] = ctx->bc->cf_last; 3649 sp->num_mid++; 3650} 3651 3652static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 3653{ 3654 ctx->bc->fc_sp++; 3655 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 3656 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 3657} 3658 3659static void fc_poplevel(struct r600_shader_ctx *ctx) 3660{ 3661 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 3662 if (sp->mid) { 3663 free(sp->mid); 3664 sp->mid = NULL; 3665 } 3666 sp->num_mid = 0; 3667 sp->start = NULL; 3668 sp->type = 0; 3669 ctx->bc->fc_sp--; 3670} 3671 3672#if 0 3673static int emit_return(struct r600_shader_ctx *ctx) 3674{ 3675 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); 3676 return 0; 3677} 3678 3679static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 3680{ 3681 3682 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 3683 ctx->bc->cf_last->pop_count = pops; 3684 /* TODO work out offset */ 3685 return 0; 3686} 3687 3688static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 3689{ 3690 return 0; 3691} 3692 3693static void emit_testflag(struct r600_shader_ctx *ctx) 3694{ 3695 3696} 3697 3698static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 3699{ 3700 emit_testflag(ctx); 3701 emit_jump_to_offset(ctx, 1, 4); 3702 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 3703 pops(ctx, ifidx + 1); 3704 emit_return(ctx); 3705} 3706 3707static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 3708{ 3709 emit_testflag(ctx); 3710 3711 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 3712 ctx->bc->cf_last->pop_count = 1; 3713 3714 fc_set_mid(ctx, fc_sp); 3715 3716 pops(ctx, 1); 3717} 3718#endif 3719 3720static int tgsi_if(struct r600_shader_ctx *ctx) 3721{ 3722 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)); 3723 3724 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 3725 3726 fc_pushlevel(ctx, FC_IF); 3727 3728 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 3729 return 0; 3730} 3731 3732static int tgsi_else(struct r600_shader_ctx *ctx) 3733{ 3734 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 3735 ctx->bc->cf_last->pop_count = 1; 3736 3737 fc_set_mid(ctx, ctx->bc->fc_sp); 3738 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 3739 return 0; 3740} 3741 3742static int tgsi_endif(struct r600_shader_ctx *ctx) 3743{ 3744 pops(ctx, 1); 3745 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 3746 R600_ERR("if/endif unbalanced in shader\n"); 3747 return -1; 3748 } 3749 3750 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 3751 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3752 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 3753 } else { 3754 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 3755 } 3756 fc_poplevel(ctx); 3757 3758 callstack_decrease_current(ctx, FC_PUSH_VPM); 3759 return 0; 3760} 3761 3762static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 3763{ 3764 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 3765 3766 fc_pushlevel(ctx, FC_LOOP); 3767 3768 /* check stack depth */ 3769 callstack_check_depth(ctx, FC_LOOP, 0); 3770 return 0; 3771} 3772 3773static int tgsi_endloop(struct r600_shader_ctx *ctx) 3774{ 3775 int i; 3776 3777 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 3778 3779 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 3780 R600_ERR("loop/endloop in shader code are not paired.\n"); 3781 return -EINVAL; 3782 } 3783 3784 /* fixup loop pointers - from r600isa 3785 LOOP END points to CF after LOOP START, 3786 LOOP START point to CF after LOOP END 3787 BRK/CONT point to LOOP END CF 3788 */ 3789 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 3790 3791 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3792 3793 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 3794 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 3795 } 3796 /* TODO add LOOPRET support */ 3797 fc_poplevel(ctx); 3798 callstack_decrease_current(ctx, FC_LOOP); 3799 return 0; 3800} 3801 3802static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 3803{ 3804 unsigned int fscp; 3805 3806 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 3807 { 3808 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 3809 break; 3810 } 3811 3812 if (fscp == 0) { 3813 R600_ERR("Break not inside loop/endloop pair\n"); 3814 return -EINVAL; 3815 } 3816 3817 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 3818 ctx->bc->cf_last->pop_count = 1; 3819 3820 fc_set_mid(ctx, fscp); 3821 3822 pops(ctx, 1); 3823 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 3824 return 0; 3825} 3826 3827static int tgsi_umad(struct r600_shader_ctx *ctx) 3828{ 3829 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3830 struct r600_bytecode_alu alu; 3831 int i, j, r; 3832 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3833 3834 /* src0 * src1 */ 3835 for (i = 0; i < lasti + 1; i++) { 3836 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3837 continue; 3838 3839 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3840 3841 alu.dst.chan = i; 3842 alu.dst.sel = ctx->temp_reg; 3843 alu.dst.write = 1; 3844 3845 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 3846 for (j = 0; j < 2; j++) { 3847 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3848 } 3849 3850 alu.last = 1; 3851 r = r600_bytecode_add_alu(ctx->bc, &alu); 3852 if (r) 3853 return r; 3854 } 3855 3856 3857 for (i = 0; i < lasti + 1; i++) { 3858 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3859 continue; 3860 3861 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3862 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3863 3864 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 3865 3866 alu.src[0].sel = ctx->temp_reg; 3867 alu.src[0].chan = i; 3868 3869 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 3870 if (i == lasti) { 3871 alu.last = 1; 3872 } 3873 r = r600_bytecode_add_alu(ctx->bc, &alu); 3874 if (r) 3875 return r; 3876 } 3877 return 0; 3878} 3879 3880static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 3881 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3882 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3883 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3884 3885 /* FIXME: 3886 * For state trackers other than OpenGL, we'll want to use 3887 * _RECIP_IEEE instead. 3888 */ 3889 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 3890 3891 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 3892 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3893 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3894 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3895 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3896 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3897 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3898 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3899 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3900 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3901 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3902 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3903 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3904 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3905 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3906 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3907 /* gap */ 3908 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3909 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3910 /* gap */ 3911 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3912 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3913 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3914 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3915 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3916 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 3917 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3918 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3919 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3920 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3921 /* gap */ 3922 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3923 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3924 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3925 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3926 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3927 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3928 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3929 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3930 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3931 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3932 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3933 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3934 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3935 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3936 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3937 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3938 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3939 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3940 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3941 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3942 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3943 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3944 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3945 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3946 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3947 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3948 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3949 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3950 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3951 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3952 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3953 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3954 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3955 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3956 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3957 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3958 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 3959 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3960 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3961 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3962 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3963 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3964 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3965 /* gap */ 3966 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3967 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3968 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3969 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3970 /* gap */ 3971 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3972 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3973 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3974 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3975 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3976 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, 3977 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 3978 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3979 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans}, 3980 /* gap */ 3981 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3982 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 3983 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 3984 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3985 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 3986 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3987 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 3988 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 3989 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3990 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3991 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3992 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3993 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3994 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3995 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3996 /* gap */ 3997 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3998 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3999 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4000 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4001 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4002 /* gap */ 4003 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4004 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4005 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4006 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4007 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4008 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4009 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4010 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4011 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 4012 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 4013 /* gap */ 4014 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4015 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans}, 4016 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 4017 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 4018 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 4019 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_op2}, 4020 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 4021 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans}, 4022 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2}, 4023 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2}, 4024 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans}, 4025 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 4026 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 4027 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 4028 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 4029 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 4030 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4031 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2}, 4032 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 4033 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 4034 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans}, 4035 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 4036 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap}, 4037 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4038 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4039 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4040 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4041 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 4042 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 4043 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 4044 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 4045 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 4046 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 4047 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 4048 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 4049 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 4050 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 4051 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 4052 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 4053 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl}, 4054 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 4055 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 4056 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 4057 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4058}; 4059 4060static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 4061 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 4062 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4063 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 4064 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 4065 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, 4066 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 4067 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 4068 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 4069 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4070 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4071 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4072 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 4073 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 4074 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 4075 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 4076 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 4077 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 4078 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4079 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 4080 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4081 /* gap */ 4082 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4083 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4084 /* gap */ 4085 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4086 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4087 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 4088 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4089 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 4090 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 4091 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 4092 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 4093 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 4094 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 4095 /* gap */ 4096 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4097 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4098 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4099 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4100 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 4101 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 4102 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 4103 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 4104 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4105 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4106 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4107 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4108 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4109 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 4110 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4111 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 4112 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 4113 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 4114 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 4115 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4116 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4117 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 4118 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4119 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4120 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4121 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4122 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4123 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4124 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4125 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 4126 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4127 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4128 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4129 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 4130 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 4131 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 4132 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 4133 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4134 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4135 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4136 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 4137 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 4138 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 4139 /* gap */ 4140 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4141 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4142 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 4143 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 4144 /* gap */ 4145 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4146 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4147 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4148 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4149 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4150 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, 4151 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 4152 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 4153 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2}, 4154 /* gap */ 4155 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4156 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 4157 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 4158 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4159 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 4160 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4161 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 4162 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 4163 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 4164 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4165 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4166 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 4167 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4168 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 4169 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4170 /* gap */ 4171 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4172 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4173 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4174 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4175 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4176 /* gap */ 4177 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4178 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4179 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4180 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4181 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4182 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4183 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4184 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4185 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 4186 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 4187 /* gap */ 4188 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4189 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i}, 4190 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 4191 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 4192 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 4193 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, 4194 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 4195 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2}, 4196 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 4197 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i}, 4198 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2}, 4199 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 4200 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 4201 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 4202 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 4203 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 4204 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4205 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans}, 4206 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 4207 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 4208 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2}, 4209 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 4210 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2}, 4211 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4212 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4213 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4214 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4215 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 4216 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 4217 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 4218 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 4219 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 4220 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 4221 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 4222 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 4223 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 4224 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 4225 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 4226 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 4227 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, 4228 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 4229 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 4230 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 4231 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4232}; 4233 4234static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 4235 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 4236 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4237 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 4238 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr}, 4239 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr}, 4240 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 4241 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 4242 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 4243 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4244 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4245 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4246 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 4247 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 4248 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 4249 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 4250 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 4251 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 4252 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4253 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 4254 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4255 /* gap */ 4256 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4257 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4258 /* gap */ 4259 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4260 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4261 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 4262 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4263 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 4264 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 4265 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, 4266 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, 4267 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, 4268 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 4269 /* gap */ 4270 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4271 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4272 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4273 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4274 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig}, 4275 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 4276 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 4277 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 4278 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4279 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4280 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4281 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4282 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4283 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 4284 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4285 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 4286 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig}, 4287 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 4288 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 4289 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4290 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4291 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 4292 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4293 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4294 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4295 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4296 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4297 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4298 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4299 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 4300 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4301 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4302 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4303 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 4304 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 4305 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 4306 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 4307 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4308 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4309 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4310 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 4311 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 4312 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 4313 /* gap */ 4314 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4315 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4316 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 4317 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 4318 /* gap */ 4319 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4320 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4321 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4322 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4323 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4324 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4325 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 4326 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 4327 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4328 /* gap */ 4329 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4330 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4331 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4332 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4333 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 4334 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4335 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 4336 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 4337 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 4338 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4339 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4340 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 4341 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4342 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 4343 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4344 /* gap */ 4345 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4346 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4347 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4348 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4349 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4350 /* gap */ 4351 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4352 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4353 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4354 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4355 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4356 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4357 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4358 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4359 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 4360 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 4361 /* gap */ 4362 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4363 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4364 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4365 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 4366 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 4367 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4368 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4369 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4370 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4371 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4372 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4373 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4374 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4375 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4376 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4377 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4378 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4379 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4380 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4381 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4382 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4383 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4384 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4385 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4386 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4387 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4388 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4389 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 4390 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 4391 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 4392 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 4393 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 4394 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 4395 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 4396 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 4397 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 4398 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 4399 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 4400 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 4401 {TGSI_OPCODE_UARL, 0, 0, tgsi_unsupported}, 4402 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 4403 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4404}; 4405