r600_shader.c revision ef419599d9b18de2a9077c5f0a7f02bfc11d1762
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_screen.h" 29#include "r600_context.h" 30#include "r600_shader.h" 31#include "r600_asm.h" 32#include "r600_sq.h" 33#include "r600_opcodes.h" 34#include "r600d.h" 35#include <stdio.h> 36#include <errno.h> 37 38 39struct r600_shader_tgsi_instruction; 40 41struct r600_shader_ctx { 42 struct tgsi_shader_info info; 43 struct tgsi_parse_context parse; 44 const struct tgsi_token *tokens; 45 unsigned type; 46 unsigned file_offset[TGSI_FILE_COUNT]; 47 unsigned temp_reg; 48 struct r600_shader_tgsi_instruction *inst_info; 49 struct r600_bc *bc; 50 struct r600_shader *shader; 51 u32 value[4]; 52 u32 *literals; 53 u32 nliterals; 54 u32 max_driver_temp_used; 55}; 56 57struct r600_shader_tgsi_instruction { 58 unsigned tgsi_opcode; 59 unsigned is_op3; 60 unsigned r600_opcode; 61 int (*process)(struct r600_shader_ctx *ctx); 62}; 63 64static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 65static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 66 67static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) 68{ 69 struct r600_context *rctx = r600_context(ctx); 70 const struct util_format_description *desc; 71 enum pipe_format resource_format[160]; 72 unsigned i, nresources = 0; 73 struct r600_bc *bc = &shader->bc; 74 struct r600_bc_cf *cf; 75 struct r600_bc_vtx *vtx; 76 77 if (shader->processor_type != TGSI_PROCESSOR_VERTEX) 78 return 0; 79 for (i = 0; i < rctx->vertex_elements->count; i++) { 80 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; 81 } 82 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 83 switch (cf->inst) { 84 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 85 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 86 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 87 desc = util_format_description(resource_format[vtx->buffer_id]); 88 if (desc == NULL) { 89 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); 90 return -EINVAL; 91 } 92 vtx->dst_sel_x = desc->swizzle[0]; 93 vtx->dst_sel_y = desc->swizzle[1]; 94 vtx->dst_sel_z = desc->swizzle[2]; 95 vtx->dst_sel_w = desc->swizzle[3]; 96 } 97 break; 98 default: 99 break; 100 } 101 } 102 return r600_bc_build(&shader->bc); 103} 104 105int r600_pipe_shader_create(struct pipe_context *ctx, 106 struct r600_context_state *rpshader, 107 const struct tgsi_token *tokens) 108{ 109 struct r600_screen *rscreen = r600_screen(ctx->screen); 110 int r; 111 112//fprintf(stderr, "--------------------------------------------------------------\n"); 113//tgsi_dump(tokens, 0); 114 if (rpshader == NULL) 115 return -ENOMEM; 116 rpshader->shader.family = radeon_get_family(rscreen->rw); 117 rpshader->shader.use_mem_constant = rscreen->use_mem_constant; 118 r = r600_shader_from_tgsi(tokens, &rpshader->shader); 119 if (r) { 120 R600_ERR("translation from TGSI failed !\n"); 121 return r; 122 } 123 r = r600_bc_build(&rpshader->shader.bc); 124 if (r) { 125 R600_ERR("building bytecode failed !\n"); 126 return r; 127 } 128//fprintf(stderr, "______________________________________________________________\n"); 129 return 0; 130} 131 132static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader) 133{ 134 struct r600_context *rctx = r600_context(ctx); 135 struct radeon_state *state; 136 137 state = &rpshader->rstate[0]; 138 radeon_state_fini(&rpshader->rstate[0]); 139 140 return rctx->vtbl->vs_shader(rctx, rpshader, state); 141} 142 143static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader) 144{ 145 struct r600_context *rctx = r600_context(ctx); 146 struct radeon_state *state; 147 148 state = &rpshader->rstate[0]; 149 radeon_state_fini(state); 150 151 return rctx->vtbl->ps_shader(rctx, rpshader, state); 152} 153 154static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader) 155{ 156 struct r600_screen *rscreen = r600_screen(ctx->screen); 157 struct r600_context *rctx = r600_context(ctx); 158 struct r600_shader *rshader = &rpshader->shader; 159 int r; 160 void *data; 161 162 /* copy new shader */ 163 radeon_ws_bo_reference(rscreen->rw, &rpshader->bo, NULL); 164 rpshader->bo = NULL; 165 rpshader->bo = radeon_ws_bo(rscreen->rw, rshader->bc.ndw * 4, 166 4096, 0); 167 if (rpshader->bo == NULL) { 168 return -ENOMEM; 169 } 170 data = radeon_ws_bo_map(rscreen->rw, rpshader->bo, 0, rctx); 171 memcpy(data, rshader->bc.bytecode, rshader->bc.ndw * 4); 172 radeon_ws_bo_unmap(rscreen->rw, rpshader->bo); 173 /* build state */ 174 rshader->flat_shade = rctx->flat_shade; 175 switch (rshader->processor_type) { 176 case TGSI_PROCESSOR_VERTEX: 177 r = r600_pipe_shader_vs(ctx, rpshader); 178 break; 179 case TGSI_PROCESSOR_FRAGMENT: 180 r = r600_pipe_shader_ps(ctx, rpshader); 181 break; 182 default: 183 r = -EINVAL; 184 break; 185 } 186 return r; 187} 188 189int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader) 190{ 191 struct r600_context *rctx = r600_context(ctx); 192 int r; 193 194 if (rpshader == NULL) 195 return -EINVAL; 196 /* there should be enough input */ 197 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) { 198 R600_ERR("%d resources provided, expecting %d\n", 199 rctx->vertex_elements->count, rpshader->shader.bc.nresource); 200 return -EINVAL; 201 } 202 r = r600_shader_update(ctx, &rpshader->shader); 203 if (r) 204 return r; 205 return r600_pipe_shader(ctx, rpshader); 206} 207 208static int tgsi_is_supported(struct r600_shader_ctx *ctx) 209{ 210 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 211 int j; 212 213 if (i->Instruction.NumDstRegs > 1) { 214 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 215 return -EINVAL; 216 } 217 if (i->Instruction.Predicate) { 218 R600_ERR("predicate unsupported\n"); 219 return -EINVAL; 220 } 221#if 0 222 if (i->Instruction.Label) { 223 R600_ERR("label unsupported\n"); 224 return -EINVAL; 225 } 226#endif 227 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 228 if (i->Src[j].Register.Dimension || 229 i->Src[j].Register.Absolute) { 230 R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j, 231 i->Src[j].Register.Dimension, 232 i->Src[j].Register.Absolute); 233 return -EINVAL; 234 } 235 } 236 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 237 if (i->Dst[j].Register.Dimension) { 238 R600_ERR("unsupported dst (dimension)\n"); 239 return -EINVAL; 240 } 241 } 242 return 0; 243} 244 245static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) 246{ 247 int i, r; 248 struct r600_bc_alu alu; 249 250 for (i = 0; i < 8; i++) { 251 memset(&alu, 0, sizeof(struct r600_bc_alu)); 252 253 if (i < 4) 254 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 255 else 256 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 257 258 if ((i > 1) && (i < 6)) { 259 alu.dst.sel = ctx->shader->input[gpr].gpr; 260 alu.dst.write = 1; 261 } 262 263 alu.dst.chan = i % 4; 264 alu.src[0].chan = (1 - (i % 2)); 265 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr; 266 267 alu.bank_swizzle_force = SQ_ALU_VEC_210; 268 if ((i % 4) == 3) 269 alu.last = 1; 270 r = r600_bc_add_alu(ctx->bc, &alu); 271 if (r) 272 return r; 273 } 274 return 0; 275} 276 277 278static int tgsi_declaration(struct r600_shader_ctx *ctx) 279{ 280 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 281 struct r600_bc_vtx vtx; 282 unsigned i; 283 int r; 284 285 switch (d->Declaration.File) { 286 case TGSI_FILE_INPUT: 287 i = ctx->shader->ninput++; 288 ctx->shader->input[i].name = d->Semantic.Name; 289 ctx->shader->input[i].sid = d->Semantic.Index; 290 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 291 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 292 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 293 /* turn input into fetch */ 294 memset(&vtx, 0, sizeof(struct r600_bc_vtx)); 295 vtx.inst = 0; 296 vtx.fetch_type = 0; 297 vtx.buffer_id = i; 298 /* register containing the index into the buffer */ 299 vtx.src_gpr = 0; 300 vtx.src_sel_x = 0; 301 vtx.mega_fetch_count = 0x1F; 302 vtx.dst_gpr = ctx->shader->input[i].gpr; 303 vtx.dst_sel_x = 0; 304 vtx.dst_sel_y = 1; 305 vtx.dst_sel_z = 2; 306 vtx.dst_sel_w = 3; 307 r = r600_bc_add_vtx(ctx->bc, &vtx); 308 if (r) 309 return r; 310 } 311 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { 312 /* turn input into interpolate on EG */ 313 evergreen_interp_alu(ctx, i); 314 } 315 break; 316 case TGSI_FILE_OUTPUT: 317 i = ctx->shader->noutput++; 318 ctx->shader->output[i].name = d->Semantic.Name; 319 ctx->shader->output[i].sid = d->Semantic.Index; 320 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 321 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 322 break; 323 case TGSI_FILE_CONSTANT: 324 case TGSI_FILE_TEMPORARY: 325 case TGSI_FILE_SAMPLER: 326 case TGSI_FILE_ADDRESS: 327 break; 328 default: 329 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 330 return -EINVAL; 331 } 332 return 0; 333} 334 335static int r600_get_temp(struct r600_shader_ctx *ctx) 336{ 337 return ctx->temp_reg + ctx->max_driver_temp_used++; 338} 339 340int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 341{ 342 struct tgsi_full_immediate *immediate; 343 struct r600_shader_ctx ctx; 344 struct r600_bc_output output[32]; 345 unsigned output_done, noutput; 346 unsigned opcode; 347 int i, r = 0, pos0; 348 349 ctx.bc = &shader->bc; 350 ctx.shader = shader; 351 r = r600_bc_init(ctx.bc, shader->family); 352 if (r) 353 return r; 354 ctx.bc->use_mem_constant = shader->use_mem_constant; 355 ctx.tokens = tokens; 356 tgsi_scan_shader(tokens, &ctx.info); 357 tgsi_parse_init(&ctx.parse, tokens); 358 ctx.type = ctx.parse.FullHeader.Processor.Processor; 359 shader->processor_type = ctx.type; 360 361 /* register allocations */ 362 /* Values [0,127] correspond to GPR[0..127]. 363 * Values [128,159] correspond to constant buffer bank 0 364 * Values [160,191] correspond to constant buffer bank 1 365 * Values [256,511] correspond to cfile constants c[0..255]. 366 * Other special values are shown in the list below. 367 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 368 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 369 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 370 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 371 * 248 SQ_ALU_SRC_0: special constant 0.0. 372 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 373 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 374 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 375 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 376 * 253 SQ_ALU_SRC_LITERAL: literal constant. 377 * 254 SQ_ALU_SRC_PV: previous vector result. 378 * 255 SQ_ALU_SRC_PS: previous scalar result. 379 */ 380 for (i = 0; i < TGSI_FILE_COUNT; i++) { 381 ctx.file_offset[i] = 0; 382 } 383 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 384 ctx.file_offset[TGSI_FILE_INPUT] = 1; 385 } 386 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 387 ctx.info.file_count[TGSI_FILE_INPUT]; 388 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 389 ctx.info.file_count[TGSI_FILE_OUTPUT]; 390 if (ctx.shader->use_mem_constant) 391 ctx.file_offset[TGSI_FILE_CONSTANT] = 128; 392 else 393 ctx.file_offset[TGSI_FILE_CONSTANT] = 256; 394 395 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 396 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 397 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 398 399 ctx.nliterals = 0; 400 ctx.literals = NULL; 401 402 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 403 tgsi_parse_token(&ctx.parse); 404 switch (ctx.parse.FullToken.Token.Type) { 405 case TGSI_TOKEN_TYPE_IMMEDIATE: 406 immediate = &ctx.parse.FullToken.FullImmediate; 407 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 408 if(ctx.literals == NULL) { 409 r = -ENOMEM; 410 goto out_err; 411 } 412 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 413 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 414 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 415 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 416 ctx.nliterals++; 417 break; 418 case TGSI_TOKEN_TYPE_DECLARATION: 419 r = tgsi_declaration(&ctx); 420 if (r) 421 goto out_err; 422 break; 423 case TGSI_TOKEN_TYPE_INSTRUCTION: 424 r = tgsi_is_supported(&ctx); 425 if (r) 426 goto out_err; 427 ctx.max_driver_temp_used = 0; 428 /* reserve first tmp for everyone */ 429 r600_get_temp(&ctx); 430 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 431 if (ctx.bc->chiprev == 2) 432 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 433 else 434 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 435 r = ctx.inst_info->process(&ctx); 436 if (r) 437 goto out_err; 438 r = r600_bc_add_literal(ctx.bc, ctx.value); 439 if (r) 440 goto out_err; 441 break; 442 default: 443 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 444 r = -EINVAL; 445 goto out_err; 446 } 447 } 448 /* export output */ 449 noutput = shader->noutput; 450 for (i = 0, pos0 = 0; i < noutput; i++) { 451 memset(&output[i], 0, sizeof(struct r600_bc_output)); 452 output[i].gpr = shader->output[i].gpr; 453 output[i].elem_size = 3; 454 output[i].swizzle_x = 0; 455 output[i].swizzle_y = 1; 456 output[i].swizzle_z = 2; 457 output[i].swizzle_w = 3; 458 output[i].barrier = 1; 459 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 460 output[i].array_base = i - pos0; 461 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 462 switch (ctx.type) { 463 case TGSI_PROCESSOR_VERTEX: 464 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 465 output[i].array_base = 60; 466 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 467 /* position doesn't count in array_base */ 468 pos0++; 469 } 470 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 471 output[i].array_base = 61; 472 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 473 /* position doesn't count in array_base */ 474 pos0++; 475 } 476 break; 477 case TGSI_PROCESSOR_FRAGMENT: 478 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 479 output[i].array_base = shader->output[i].sid; 480 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 481 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 482 output[i].array_base = 61; 483 output[i].swizzle_x = 2; 484 output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7; 485 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 486 } else { 487 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 488 r = -EINVAL; 489 goto out_err; 490 } 491 break; 492 default: 493 R600_ERR("unsupported processor type %d\n", ctx.type); 494 r = -EINVAL; 495 goto out_err; 496 } 497 } 498 /* add fake param output for vertex shader if no param is exported */ 499 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 500 for (i = 0, pos0 = 0; i < noutput; i++) { 501 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 502 pos0 = 1; 503 break; 504 } 505 } 506 if (!pos0) { 507 memset(&output[i], 0, sizeof(struct r600_bc_output)); 508 output[i].gpr = 0; 509 output[i].elem_size = 3; 510 output[i].swizzle_x = 0; 511 output[i].swizzle_y = 1; 512 output[i].swizzle_z = 2; 513 output[i].swizzle_w = 3; 514 output[i].barrier = 1; 515 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 516 output[i].array_base = 0; 517 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 518 noutput++; 519 } 520 } 521 /* add fake pixel export */ 522 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 523 memset(&output[0], 0, sizeof(struct r600_bc_output)); 524 output[0].gpr = 0; 525 output[0].elem_size = 3; 526 output[0].swizzle_x = 7; 527 output[0].swizzle_y = 7; 528 output[0].swizzle_z = 7; 529 output[0].swizzle_w = 7; 530 output[0].barrier = 1; 531 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 532 output[0].array_base = 0; 533 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 534 noutput++; 535 } 536 /* set export done on last export of each type */ 537 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 538 if (i == (noutput - 1)) { 539 output[i].end_of_program = 1; 540 } 541 if (!(output_done & (1 << output[i].type))) { 542 output_done |= (1 << output[i].type); 543 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 544 } 545 } 546 /* add output to bytecode */ 547 for (i = 0; i < noutput; i++) { 548 r = r600_bc_add_output(ctx.bc, &output[i]); 549 if (r) 550 goto out_err; 551 } 552 free(ctx.literals); 553 tgsi_parse_free(&ctx.parse); 554 return 0; 555out_err: 556 free(ctx.literals); 557 tgsi_parse_free(&ctx.parse); 558 return r; 559} 560 561static int tgsi_unsupported(struct r600_shader_ctx *ctx) 562{ 563 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 564 return -EINVAL; 565} 566 567static int tgsi_end(struct r600_shader_ctx *ctx) 568{ 569 return 0; 570} 571 572static int tgsi_src(struct r600_shader_ctx *ctx, 573 const struct tgsi_full_src_register *tgsi_src, 574 struct r600_bc_alu_src *r600_src) 575{ 576 int index; 577 memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); 578 r600_src->sel = tgsi_src->Register.Index; 579 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 580 r600_src->sel = 0; 581 index = tgsi_src->Register.Index; 582 ctx->value[0] = ctx->literals[index * 4 + 0]; 583 ctx->value[1] = ctx->literals[index * 4 + 1]; 584 ctx->value[2] = ctx->literals[index * 4 + 2]; 585 ctx->value[3] = ctx->literals[index * 4 + 3]; 586 } 587 if (tgsi_src->Register.Indirect) 588 r600_src->rel = V_SQ_REL_RELATIVE; 589 r600_src->neg = tgsi_src->Register.Negate; 590 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 591 return 0; 592} 593 594static int tgsi_dst(struct r600_shader_ctx *ctx, 595 const struct tgsi_full_dst_register *tgsi_dst, 596 unsigned swizzle, 597 struct r600_bc_alu_dst *r600_dst) 598{ 599 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 600 601 r600_dst->sel = tgsi_dst->Register.Index; 602 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 603 r600_dst->chan = swizzle; 604 r600_dst->write = 1; 605 if (tgsi_dst->Register.Indirect) 606 r600_dst->rel = V_SQ_REL_RELATIVE; 607 if (inst->Instruction.Saturate) { 608 r600_dst->clamp = 1; 609 } 610 return 0; 611} 612 613static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) 614{ 615 switch (swizzle) { 616 case 0: 617 return tgsi_src->Register.SwizzleX; 618 case 1: 619 return tgsi_src->Register.SwizzleY; 620 case 2: 621 return tgsi_src->Register.SwizzleZ; 622 case 3: 623 return tgsi_src->Register.SwizzleW; 624 default: 625 return 0; 626 } 627} 628 629static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 630{ 631 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 632 struct r600_bc_alu alu; 633 int i, j, k, nconst, r; 634 635 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 636 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 637 nconst++; 638 } 639 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); 640 if (r) { 641 return r; 642 } 643 } 644 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 645 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) { 646 int treg = r600_get_temp(ctx); 647 for (k = 0; k < 4; k++) { 648 memset(&alu, 0, sizeof(struct r600_bc_alu)); 649 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 650 alu.src[0].sel = r600_src[j].sel; 651 alu.src[0].chan = k; 652 alu.dst.sel = treg; 653 alu.dst.chan = k; 654 alu.dst.write = 1; 655 if (k == 3) 656 alu.last = 1; 657 r = r600_bc_add_alu(ctx->bc, &alu); 658 if (r) 659 return r; 660 } 661 r600_src[j].sel = treg; 662 j--; 663 } 664 } 665 return 0; 666} 667 668/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 669static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 670{ 671 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 672 struct r600_bc_alu alu; 673 int i, j, k, nliteral, r; 674 675 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 676 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 677 nliteral++; 678 } 679 } 680 for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) { 681 if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) { 682 int treg = r600_get_temp(ctx); 683 for (k = 0; k < 4; k++) { 684 memset(&alu, 0, sizeof(struct r600_bc_alu)); 685 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 686 alu.src[0].sel = r600_src[j].sel; 687 alu.src[0].chan = k; 688 alu.dst.sel = treg; 689 alu.dst.chan = k; 690 alu.dst.write = 1; 691 if (k == 3) 692 alu.last = 1; 693 r = r600_bc_add_alu(ctx->bc, &alu); 694 if (r) 695 return r; 696 } 697 r = r600_bc_add_literal(ctx->bc, ctx->value); 698 if (r) 699 return r; 700 r600_src[j].sel = treg; 701 j++; 702 } 703 } 704 return 0; 705} 706 707static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 708{ 709 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 710 struct r600_bc_alu_src r600_src[3]; 711 struct r600_bc_alu alu; 712 int i, j, r; 713 int lasti = 0; 714 715 for (i = 0; i < 4; i++) { 716 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 717 lasti = i; 718 } 719 } 720 721 r = tgsi_split_constant(ctx, r600_src); 722 if (r) 723 return r; 724 for (i = 0; i < lasti + 1; i++) { 725 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 726 continue; 727 728 memset(&alu, 0, sizeof(struct r600_bc_alu)); 729 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 730 if (r) 731 return r; 732 733 alu.inst = ctx->inst_info->r600_opcode; 734 if (!swap) { 735 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 736 alu.src[j] = r600_src[j]; 737 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 738 } 739 } else { 740 alu.src[0] = r600_src[1]; 741 alu.src[0].chan = tgsi_chan(&inst->Src[1], i); 742 743 alu.src[1] = r600_src[0]; 744 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 745 } 746 /* handle some special cases */ 747 switch (ctx->inst_info->tgsi_opcode) { 748 case TGSI_OPCODE_SUB: 749 alu.src[1].neg = 1; 750 break; 751 case TGSI_OPCODE_ABS: 752 alu.src[0].abs = 1; 753 break; 754 default: 755 break; 756 } 757 if (i == lasti) { 758 alu.last = 1; 759 } 760 r = r600_bc_add_alu(ctx->bc, &alu); 761 if (r) 762 return r; 763 } 764 return 0; 765} 766 767static int tgsi_op2(struct r600_shader_ctx *ctx) 768{ 769 return tgsi_op2_s(ctx, 0); 770} 771 772static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 773{ 774 return tgsi_op2_s(ctx, 1); 775} 776 777/* 778 * r600 - trunc to -PI..PI range 779 * r700 - normalize by dividing by 2PI 780 * see fdo bug 27901 781 */ 782static int tgsi_setup_trig(struct r600_shader_ctx *ctx, 783 struct r600_bc_alu_src r600_src[3]) 784{ 785 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 786 int r; 787 uint32_t lit_vals[4]; 788 struct r600_bc_alu alu; 789 790 memset(lit_vals, 0, 4*4); 791 r = tgsi_split_constant(ctx, r600_src); 792 if (r) 793 return r; 794 795 r = tgsi_split_literal_constant(ctx, r600_src); 796 if (r) 797 return r; 798 799 lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); 800 lit_vals[1] = fui(0.5f); 801 802 memset(&alu, 0, sizeof(struct r600_bc_alu)); 803 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 804 alu.is_op3 = 1; 805 806 alu.dst.chan = 0; 807 alu.dst.sel = ctx->temp_reg; 808 alu.dst.write = 1; 809 810 alu.src[0] = r600_src[0]; 811 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 812 813 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 814 alu.src[1].chan = 0; 815 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 816 alu.src[2].chan = 1; 817 alu.last = 1; 818 r = r600_bc_add_alu(ctx->bc, &alu); 819 if (r) 820 return r; 821 r = r600_bc_add_literal(ctx->bc, lit_vals); 822 if (r) 823 return r; 824 825 memset(&alu, 0, sizeof(struct r600_bc_alu)); 826 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 827 828 alu.dst.chan = 0; 829 alu.dst.sel = ctx->temp_reg; 830 alu.dst.write = 1; 831 832 alu.src[0].sel = ctx->temp_reg; 833 alu.src[0].chan = 0; 834 alu.last = 1; 835 r = r600_bc_add_alu(ctx->bc, &alu); 836 if (r) 837 return r; 838 839 if (ctx->bc->chiprev == 0) { 840 lit_vals[0] = fui(3.1415926535897f * 2.0f); 841 lit_vals[1] = fui(-3.1415926535897f); 842 } else { 843 lit_vals[0] = fui(1.0f); 844 lit_vals[1] = fui(-0.5f); 845 } 846 847 memset(&alu, 0, sizeof(struct r600_bc_alu)); 848 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 849 alu.is_op3 = 1; 850 851 alu.dst.chan = 0; 852 alu.dst.sel = ctx->temp_reg; 853 alu.dst.write = 1; 854 855 alu.src[0].sel = ctx->temp_reg; 856 alu.src[0].chan = 0; 857 858 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 859 alu.src[1].chan = 0; 860 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 861 alu.src[2].chan = 1; 862 alu.last = 1; 863 r = r600_bc_add_alu(ctx->bc, &alu); 864 if (r) 865 return r; 866 r = r600_bc_add_literal(ctx->bc, lit_vals); 867 if (r) 868 return r; 869 return 0; 870} 871 872static int tgsi_trig(struct r600_shader_ctx *ctx) 873{ 874 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 875 struct r600_bc_alu_src r600_src[3]; 876 struct r600_bc_alu alu; 877 int i, r; 878 int lasti = 0; 879 880 r = tgsi_setup_trig(ctx, r600_src); 881 if (r) 882 return r; 883 884 memset(&alu, 0, sizeof(struct r600_bc_alu)); 885 alu.inst = ctx->inst_info->r600_opcode; 886 alu.dst.chan = 0; 887 alu.dst.sel = ctx->temp_reg; 888 alu.dst.write = 1; 889 890 alu.src[0].sel = ctx->temp_reg; 891 alu.src[0].chan = 0; 892 alu.last = 1; 893 r = r600_bc_add_alu(ctx->bc, &alu); 894 if (r) 895 return r; 896 897 /* replicate result */ 898 for (i = 0; i < 4; i++) { 899 if (inst->Dst[0].Register.WriteMask & (1 << i)) 900 lasti = i; 901 } 902 for (i = 0; i < lasti + 1; i++) { 903 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 904 continue; 905 906 memset(&alu, 0, sizeof(struct r600_bc_alu)); 907 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 908 909 alu.src[0].sel = ctx->temp_reg; 910 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 911 if (r) 912 return r; 913 if (i == lasti) 914 alu.last = 1; 915 r = r600_bc_add_alu(ctx->bc, &alu); 916 if (r) 917 return r; 918 } 919 return 0; 920} 921 922static int tgsi_scs(struct r600_shader_ctx *ctx) 923{ 924 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 925 struct r600_bc_alu_src r600_src[3]; 926 struct r600_bc_alu alu; 927 int r; 928 929 /* We'll only need the trig stuff if we are going to write to the 930 * X or Y components of the destination vector. 931 */ 932 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 933 r = tgsi_setup_trig(ctx, r600_src); 934 if (r) 935 return r; 936 } 937 938 /* dst.x = COS */ 939 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 940 memset(&alu, 0, sizeof(struct r600_bc_alu)); 941 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 942 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 943 if (r) 944 return r; 945 946 alu.src[0].sel = ctx->temp_reg; 947 alu.src[0].chan = 0; 948 alu.last = 1; 949 r = r600_bc_add_alu(ctx->bc, &alu); 950 if (r) 951 return r; 952 } 953 954 /* dst.y = SIN */ 955 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 956 memset(&alu, 0, sizeof(struct r600_bc_alu)); 957 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 958 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 959 if (r) 960 return r; 961 962 alu.src[0].sel = ctx->temp_reg; 963 alu.src[0].chan = 0; 964 alu.last = 1; 965 r = r600_bc_add_alu(ctx->bc, &alu); 966 if (r) 967 return r; 968 } 969 970 /* dst.z = 0.0; */ 971 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 972 fprintf(stderr, "writing z\n"); 973 memset(&alu, 0, sizeof(struct r600_bc_alu)); 974 975 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 976 977 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 978 if (r) 979 return r; 980 981 alu.src[0].sel = V_SQ_ALU_SRC_0; 982 alu.src[0].chan = 0; 983 984 alu.last = 1; 985 986 r = r600_bc_add_alu(ctx->bc, &alu); 987 if (r) 988 return r; 989 990 r = r600_bc_add_literal(ctx->bc, ctx->value); 991 if (r) 992 return r; 993 } 994 995 /* dst.w = 1.0; */ 996 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 997 memset(&alu, 0, sizeof(struct r600_bc_alu)); 998 999 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1000 1001 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1002 if (r) 1003 return r; 1004 1005 alu.src[0].sel = V_SQ_ALU_SRC_1; 1006 alu.src[0].chan = 0; 1007 1008 alu.last = 1; 1009 1010 r = r600_bc_add_alu(ctx->bc, &alu); 1011 if (r) 1012 return r; 1013 1014 r = r600_bc_add_literal(ctx->bc, ctx->value); 1015 if (r) 1016 return r; 1017 } 1018 1019 return 0; 1020} 1021 1022static int tgsi_kill(struct r600_shader_ctx *ctx) 1023{ 1024 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1025 struct r600_bc_alu alu; 1026 int i, r; 1027 1028 for (i = 0; i < 4; i++) { 1029 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1030 alu.inst = ctx->inst_info->r600_opcode; 1031 1032 alu.dst.chan = i; 1033 1034 alu.src[0].sel = V_SQ_ALU_SRC_0; 1035 1036 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1037 alu.src[1].sel = V_SQ_ALU_SRC_1; 1038 alu.src[1].neg = 1; 1039 } else { 1040 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1041 if (r) 1042 return r; 1043 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1044 } 1045 if (i == 3) { 1046 alu.last = 1; 1047 } 1048 r = r600_bc_add_alu(ctx->bc, &alu); 1049 if (r) 1050 return r; 1051 } 1052 r = r600_bc_add_literal(ctx->bc, ctx->value); 1053 if (r) 1054 return r; 1055 1056 /* kill must be last in ALU */ 1057 ctx->bc->force_add_cf = 1; 1058 ctx->shader->uses_kill = TRUE; 1059 return 0; 1060} 1061 1062static int tgsi_lit(struct r600_shader_ctx *ctx) 1063{ 1064 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1065 struct r600_bc_alu alu; 1066 struct r600_bc_alu_src r600_src[3]; 1067 int r; 1068 1069 r = tgsi_split_constant(ctx, r600_src); 1070 if (r) 1071 return r; 1072 r = tgsi_split_literal_constant(ctx, r600_src); 1073 if (r) 1074 return r; 1075 1076 /* dst.x, <- 1.0 */ 1077 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1078 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1079 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1080 alu.src[0].chan = 0; 1081 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1082 if (r) 1083 return r; 1084 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1085 r = r600_bc_add_alu(ctx->bc, &alu); 1086 if (r) 1087 return r; 1088 1089 /* dst.y = max(src.x, 0.0) */ 1090 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1091 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1092 alu.src[0] = r600_src[0]; 1093 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1094 alu.src[1].chan = 0; 1095 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1096 if (r) 1097 return r; 1098 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1099 r = r600_bc_add_alu(ctx->bc, &alu); 1100 if (r) 1101 return r; 1102 1103 /* dst.w, <- 1.0 */ 1104 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1105 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1106 alu.src[0].sel = V_SQ_ALU_SRC_1; 1107 alu.src[0].chan = 0; 1108 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1109 if (r) 1110 return r; 1111 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1112 alu.last = 1; 1113 r = r600_bc_add_alu(ctx->bc, &alu); 1114 if (r) 1115 return r; 1116 1117 r = r600_bc_add_literal(ctx->bc, ctx->value); 1118 if (r) 1119 return r; 1120 1121 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1122 { 1123 int chan; 1124 int sel; 1125 1126 /* dst.z = log(src.y) */ 1127 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1128 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1129 alu.src[0] = r600_src[0]; 1130 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1131 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1132 if (r) 1133 return r; 1134 alu.last = 1; 1135 r = r600_bc_add_alu(ctx->bc, &alu); 1136 if (r) 1137 return r; 1138 1139 r = r600_bc_add_literal(ctx->bc, ctx->value); 1140 if (r) 1141 return r; 1142 1143 chan = alu.dst.chan; 1144 sel = alu.dst.sel; 1145 1146 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1147 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1148 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1149 alu.src[0] = r600_src[0]; 1150 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1151 alu.src[1].sel = sel; 1152 alu.src[1].chan = chan; 1153 1154 alu.src[2] = r600_src[0]; 1155 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); 1156 alu.dst.sel = ctx->temp_reg; 1157 alu.dst.chan = 0; 1158 alu.dst.write = 1; 1159 alu.is_op3 = 1; 1160 alu.last = 1; 1161 r = r600_bc_add_alu(ctx->bc, &alu); 1162 if (r) 1163 return r; 1164 1165 r = r600_bc_add_literal(ctx->bc, ctx->value); 1166 if (r) 1167 return r; 1168 /* dst.z = exp(tmp.x) */ 1169 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1170 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1171 alu.src[0].sel = ctx->temp_reg; 1172 alu.src[0].chan = 0; 1173 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1174 if (r) 1175 return r; 1176 alu.last = 1; 1177 r = r600_bc_add_alu(ctx->bc, &alu); 1178 if (r) 1179 return r; 1180 } 1181 return 0; 1182} 1183 1184static int tgsi_rsq(struct r600_shader_ctx *ctx) 1185{ 1186 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1187 struct r600_bc_alu alu; 1188 int i, r; 1189 1190 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1191 1192 /* FIXME: 1193 * For state trackers other than OpenGL, we'll want to use 1194 * _RECIPSQRT_IEEE instead. 1195 */ 1196 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1197 1198 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1199 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1200 if (r) 1201 return r; 1202 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1203 alu.src[i].abs = 1; 1204 } 1205 alu.dst.sel = ctx->temp_reg; 1206 alu.dst.write = 1; 1207 alu.last = 1; 1208 r = r600_bc_add_alu(ctx->bc, &alu); 1209 if (r) 1210 return r; 1211 r = r600_bc_add_literal(ctx->bc, ctx->value); 1212 if (r) 1213 return r; 1214 /* replicate result */ 1215 return tgsi_helper_tempx_replicate(ctx); 1216} 1217 1218static int tgsi_trans(struct r600_shader_ctx *ctx) 1219{ 1220 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1221 struct r600_bc_alu alu; 1222 int i, j, r; 1223 1224 for (i = 0; i < 4; i++) { 1225 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1226 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 1227 alu.inst = ctx->inst_info->r600_opcode; 1228 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1229 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]); 1230 if (r) 1231 return r; 1232 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1233 } 1234 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1235 if (r) 1236 return r; 1237 alu.last = 1; 1238 r = r600_bc_add_alu(ctx->bc, &alu); 1239 if (r) 1240 return r; 1241 } 1242 } 1243 return 0; 1244} 1245 1246static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1247{ 1248 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1249 struct r600_bc_alu alu; 1250 int i, r; 1251 1252 for (i = 0; i < 4; i++) { 1253 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1254 alu.src[0].sel = ctx->temp_reg; 1255 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1256 alu.dst.chan = i; 1257 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1258 if (r) 1259 return r; 1260 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1261 if (i == 3) 1262 alu.last = 1; 1263 r = r600_bc_add_alu(ctx->bc, &alu); 1264 if (r) 1265 return r; 1266 } 1267 return 0; 1268} 1269 1270static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1271{ 1272 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1273 struct r600_bc_alu alu; 1274 int i, r; 1275 1276 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1277 alu.inst = ctx->inst_info->r600_opcode; 1278 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1279 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1280 if (r) 1281 return r; 1282 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1283 } 1284 alu.dst.sel = ctx->temp_reg; 1285 alu.dst.write = 1; 1286 alu.last = 1; 1287 r = r600_bc_add_alu(ctx->bc, &alu); 1288 if (r) 1289 return r; 1290 r = r600_bc_add_literal(ctx->bc, ctx->value); 1291 if (r) 1292 return r; 1293 /* replicate result */ 1294 return tgsi_helper_tempx_replicate(ctx); 1295} 1296 1297static int tgsi_pow(struct r600_shader_ctx *ctx) 1298{ 1299 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1300 struct r600_bc_alu alu; 1301 int r; 1302 1303 /* LOG2(a) */ 1304 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1305 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1306 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1307 if (r) 1308 return r; 1309 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1310 alu.dst.sel = ctx->temp_reg; 1311 alu.dst.write = 1; 1312 alu.last = 1; 1313 r = r600_bc_add_alu(ctx->bc, &alu); 1314 if (r) 1315 return r; 1316 r = r600_bc_add_literal(ctx->bc,ctx->value); 1317 if (r) 1318 return r; 1319 /* b * LOG2(a) */ 1320 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1321 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE); 1322 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); 1323 if (r) 1324 return r; 1325 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); 1326 alu.src[1].sel = ctx->temp_reg; 1327 alu.dst.sel = ctx->temp_reg; 1328 alu.dst.write = 1; 1329 alu.last = 1; 1330 r = r600_bc_add_alu(ctx->bc, &alu); 1331 if (r) 1332 return r; 1333 r = r600_bc_add_literal(ctx->bc,ctx->value); 1334 if (r) 1335 return r; 1336 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1337 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1338 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1339 alu.src[0].sel = ctx->temp_reg; 1340 alu.dst.sel = ctx->temp_reg; 1341 alu.dst.write = 1; 1342 alu.last = 1; 1343 r = r600_bc_add_alu(ctx->bc, &alu); 1344 if (r) 1345 return r; 1346 r = r600_bc_add_literal(ctx->bc,ctx->value); 1347 if (r) 1348 return r; 1349 return tgsi_helper_tempx_replicate(ctx); 1350} 1351 1352static int tgsi_ssg(struct r600_shader_ctx *ctx) 1353{ 1354 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1355 struct r600_bc_alu alu; 1356 struct r600_bc_alu_src r600_src[3]; 1357 int i, r; 1358 1359 r = tgsi_split_constant(ctx, r600_src); 1360 if (r) 1361 return r; 1362 1363 /* tmp = (src > 0 ? 1 : src) */ 1364 for (i = 0; i < 4; i++) { 1365 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1366 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1367 alu.is_op3 = 1; 1368 1369 alu.dst.sel = ctx->temp_reg; 1370 alu.dst.chan = i; 1371 1372 alu.src[0] = r600_src[0]; 1373 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1374 1375 alu.src[1].sel = V_SQ_ALU_SRC_1; 1376 1377 alu.src[2] = r600_src[0]; 1378 alu.src[2].chan = tgsi_chan(&inst->Src[0], i); 1379 if (i == 3) 1380 alu.last = 1; 1381 r = r600_bc_add_alu(ctx->bc, &alu); 1382 if (r) 1383 return r; 1384 } 1385 r = r600_bc_add_literal(ctx->bc, ctx->value); 1386 if (r) 1387 return r; 1388 1389 /* dst = (-tmp > 0 ? -1 : tmp) */ 1390 for (i = 0; i < 4; i++) { 1391 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1392 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1393 alu.is_op3 = 1; 1394 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1395 if (r) 1396 return r; 1397 1398 alu.src[0].sel = ctx->temp_reg; 1399 alu.src[0].chan = i; 1400 alu.src[0].neg = 1; 1401 1402 alu.src[1].sel = V_SQ_ALU_SRC_1; 1403 alu.src[1].neg = 1; 1404 1405 alu.src[2].sel = ctx->temp_reg; 1406 alu.src[2].chan = i; 1407 1408 if (i == 3) 1409 alu.last = 1; 1410 r = r600_bc_add_alu(ctx->bc, &alu); 1411 if (r) 1412 return r; 1413 } 1414 return 0; 1415} 1416 1417static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1418{ 1419 struct r600_bc_alu alu; 1420 int i, r; 1421 1422 r = r600_bc_add_literal(ctx->bc, ctx->value); 1423 if (r) 1424 return r; 1425 for (i = 0; i < 4; i++) { 1426 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1427 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1428 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1429 alu.dst.chan = i; 1430 } else { 1431 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1432 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1433 if (r) 1434 return r; 1435 alu.src[0].sel = ctx->temp_reg; 1436 alu.src[0].chan = i; 1437 } 1438 if (i == 3) { 1439 alu.last = 1; 1440 } 1441 r = r600_bc_add_alu(ctx->bc, &alu); 1442 if (r) 1443 return r; 1444 } 1445 return 0; 1446} 1447 1448static int tgsi_op3(struct r600_shader_ctx *ctx) 1449{ 1450 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1451 struct r600_bc_alu_src r600_src[3]; 1452 struct r600_bc_alu alu; 1453 int i, j, r; 1454 1455 r = tgsi_split_constant(ctx, r600_src); 1456 if (r) 1457 return r; 1458 /* do it in 2 step as op3 doesn't support writemask */ 1459 for (i = 0; i < 4; i++) { 1460 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1461 alu.inst = ctx->inst_info->r600_opcode; 1462 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1463 alu.src[j] = r600_src[j]; 1464 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1465 } 1466 alu.dst.sel = ctx->temp_reg; 1467 alu.dst.chan = i; 1468 alu.dst.write = 1; 1469 alu.is_op3 = 1; 1470 if (i == 3) { 1471 alu.last = 1; 1472 } 1473 r = r600_bc_add_alu(ctx->bc, &alu); 1474 if (r) 1475 return r; 1476 } 1477 return tgsi_helper_copy(ctx, inst); 1478} 1479 1480static int tgsi_dp(struct r600_shader_ctx *ctx) 1481{ 1482 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1483 struct r600_bc_alu_src r600_src[3]; 1484 struct r600_bc_alu alu; 1485 int i, j, r; 1486 1487 r = tgsi_split_constant(ctx, r600_src); 1488 if (r) 1489 return r; 1490 for (i = 0; i < 4; i++) { 1491 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1492 alu.inst = ctx->inst_info->r600_opcode; 1493 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1494 alu.src[j] = r600_src[j]; 1495 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1496 } 1497 alu.dst.sel = ctx->temp_reg; 1498 alu.dst.chan = i; 1499 alu.dst.write = 1; 1500 /* handle some special cases */ 1501 switch (ctx->inst_info->tgsi_opcode) { 1502 case TGSI_OPCODE_DP2: 1503 if (i > 1) { 1504 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1505 alu.src[0].chan = alu.src[1].chan = 0; 1506 } 1507 break; 1508 case TGSI_OPCODE_DP3: 1509 if (i > 2) { 1510 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1511 alu.src[0].chan = alu.src[1].chan = 0; 1512 } 1513 break; 1514 case TGSI_OPCODE_DPH: 1515 if (i == 3) { 1516 alu.src[0].sel = V_SQ_ALU_SRC_1; 1517 alu.src[0].chan = 0; 1518 alu.src[0].neg = 0; 1519 } 1520 break; 1521 default: 1522 break; 1523 } 1524 if (i == 3) { 1525 alu.last = 1; 1526 } 1527 r = r600_bc_add_alu(ctx->bc, &alu); 1528 if (r) 1529 return r; 1530 } 1531 return tgsi_helper_copy(ctx, inst); 1532} 1533 1534static int tgsi_tex(struct r600_shader_ctx *ctx) 1535{ 1536 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1537 struct r600_bc_tex tex; 1538 struct r600_bc_alu alu; 1539 unsigned src_gpr; 1540 int r, i; 1541 int opcode; 1542 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; 1543 uint32_t lit_vals[4]; 1544 1545 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1546 1547 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1548 /* Add perspective divide */ 1549 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1550 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1551 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1552 if (r) 1553 return r; 1554 1555 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1556 alu.dst.sel = ctx->temp_reg; 1557 alu.dst.chan = 3; 1558 alu.last = 1; 1559 alu.dst.write = 1; 1560 r = r600_bc_add_alu(ctx->bc, &alu); 1561 if (r) 1562 return r; 1563 1564 for (i = 0; i < 3; i++) { 1565 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1566 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1567 alu.src[0].sel = ctx->temp_reg; 1568 alu.src[0].chan = 3; 1569 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1570 if (r) 1571 return r; 1572 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1573 alu.dst.sel = ctx->temp_reg; 1574 alu.dst.chan = i; 1575 alu.dst.write = 1; 1576 r = r600_bc_add_alu(ctx->bc, &alu); 1577 if (r) 1578 return r; 1579 } 1580 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1581 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1582 alu.src[0].sel = V_SQ_ALU_SRC_1; 1583 alu.src[0].chan = 0; 1584 alu.dst.sel = ctx->temp_reg; 1585 alu.dst.chan = 3; 1586 alu.last = 1; 1587 alu.dst.write = 1; 1588 r = r600_bc_add_alu(ctx->bc, &alu); 1589 if (r) 1590 return r; 1591 src_not_temp = FALSE; 1592 src_gpr = ctx->temp_reg; 1593 } 1594 1595 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1596 int src_chan, src2_chan; 1597 1598 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1599 for (i = 0; i < 4; i++) { 1600 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1601 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1602 switch (i) { 1603 case 0: 1604 src_chan = 2; 1605 src2_chan = 1; 1606 break; 1607 case 1: 1608 src_chan = 2; 1609 src2_chan = 0; 1610 break; 1611 case 2: 1612 src_chan = 0; 1613 src2_chan = 2; 1614 break; 1615 case 3: 1616 src_chan = 1; 1617 src2_chan = 2; 1618 break; 1619 } 1620 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1621 if (r) 1622 return r; 1623 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); 1624 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1625 if (r) 1626 return r; 1627 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); 1628 alu.dst.sel = ctx->temp_reg; 1629 alu.dst.chan = i; 1630 if (i == 3) 1631 alu.last = 1; 1632 alu.dst.write = 1; 1633 r = r600_bc_add_alu(ctx->bc, &alu); 1634 if (r) 1635 return r; 1636 } 1637 1638 /* tmp1.z = RCP_e(|tmp1.z|) */ 1639 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1640 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1641 alu.src[0].sel = ctx->temp_reg; 1642 alu.src[0].chan = 2; 1643 alu.src[0].abs = 1; 1644 alu.dst.sel = ctx->temp_reg; 1645 alu.dst.chan = 2; 1646 alu.dst.write = 1; 1647 alu.last = 1; 1648 r = r600_bc_add_alu(ctx->bc, &alu); 1649 if (r) 1650 return r; 1651 1652 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1653 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1654 * muladd has no writemask, have to use another temp 1655 */ 1656 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1657 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1658 alu.is_op3 = 1; 1659 1660 alu.src[0].sel = ctx->temp_reg; 1661 alu.src[0].chan = 0; 1662 alu.src[1].sel = ctx->temp_reg; 1663 alu.src[1].chan = 2; 1664 1665 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1666 alu.src[2].chan = 0; 1667 1668 alu.dst.sel = ctx->temp_reg; 1669 alu.dst.chan = 0; 1670 alu.dst.write = 1; 1671 1672 r = r600_bc_add_alu(ctx->bc, &alu); 1673 if (r) 1674 return r; 1675 1676 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1677 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1678 alu.is_op3 = 1; 1679 1680 alu.src[0].sel = ctx->temp_reg; 1681 alu.src[0].chan = 1; 1682 alu.src[1].sel = ctx->temp_reg; 1683 alu.src[1].chan = 2; 1684 1685 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1686 alu.src[2].chan = 0; 1687 1688 alu.dst.sel = ctx->temp_reg; 1689 alu.dst.chan = 1; 1690 alu.dst.write = 1; 1691 1692 alu.last = 1; 1693 r = r600_bc_add_alu(ctx->bc, &alu); 1694 if (r) 1695 return r; 1696 1697 lit_vals[0] = fui(1.5f); 1698 1699 r = r600_bc_add_literal(ctx->bc, lit_vals); 1700 if (r) 1701 return r; 1702 src_not_temp = FALSE; 1703 src_gpr = ctx->temp_reg; 1704 } 1705 1706 if (src_not_temp) { 1707 for (i = 0; i < 4; i++) { 1708 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1709 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1710 alu.src[0].sel = src_gpr; 1711 alu.src[0].chan = i; 1712 alu.dst.sel = ctx->temp_reg; 1713 alu.dst.chan = i; 1714 if (i == 3) 1715 alu.last = 1; 1716 alu.dst.write = 1; 1717 r = r600_bc_add_alu(ctx->bc, &alu); 1718 if (r) 1719 return r; 1720 } 1721 src_gpr = ctx->temp_reg; 1722 } 1723 1724 opcode = ctx->inst_info->r600_opcode; 1725 if (opcode == SQ_TEX_INST_SAMPLE && 1726 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 1727 opcode = SQ_TEX_INST_SAMPLE_C; 1728 1729 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1730 tex.inst = opcode; 1731 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1732 tex.sampler_id = tex.resource_id; 1733 tex.src_gpr = src_gpr; 1734 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1735 tex.dst_sel_x = 0; 1736 tex.dst_sel_y = 1; 1737 tex.dst_sel_z = 2; 1738 tex.dst_sel_w = 3; 1739 tex.src_sel_x = 0; 1740 tex.src_sel_y = 1; 1741 tex.src_sel_z = 2; 1742 tex.src_sel_w = 3; 1743 1744 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1745 tex.src_sel_x = 1; 1746 tex.src_sel_y = 0; 1747 tex.src_sel_z = 3; 1748 tex.src_sel_w = 1; 1749 } 1750 1751 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1752 tex.coord_type_x = 1; 1753 tex.coord_type_y = 1; 1754 tex.coord_type_z = 1; 1755 tex.coord_type_w = 1; 1756 } 1757 1758 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 1759 tex.src_sel_w = 2; 1760 1761 r = r600_bc_add_tex(ctx->bc, &tex); 1762 if (r) 1763 return r; 1764 1765 /* add shadow ambient support - gallium doesn't do it yet */ 1766 return 0; 1767 1768} 1769 1770static int tgsi_lrp(struct r600_shader_ctx *ctx) 1771{ 1772 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1773 struct r600_bc_alu_src r600_src[3]; 1774 struct r600_bc_alu alu; 1775 unsigned i; 1776 int r; 1777 1778 r = tgsi_split_constant(ctx, r600_src); 1779 if (r) 1780 return r; 1781 /* 1 - src0 */ 1782 for (i = 0; i < 4; i++) { 1783 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1784 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1785 alu.src[0].sel = V_SQ_ALU_SRC_1; 1786 alu.src[0].chan = 0; 1787 alu.src[1] = r600_src[0]; 1788 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1789 alu.src[1].neg = 1; 1790 alu.dst.sel = ctx->temp_reg; 1791 alu.dst.chan = i; 1792 if (i == 3) { 1793 alu.last = 1; 1794 } 1795 alu.dst.write = 1; 1796 r = r600_bc_add_alu(ctx->bc, &alu); 1797 if (r) 1798 return r; 1799 } 1800 r = r600_bc_add_literal(ctx->bc, ctx->value); 1801 if (r) 1802 return r; 1803 1804 /* (1 - src0) * src2 */ 1805 for (i = 0; i < 4; i++) { 1806 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1807 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1808 alu.src[0].sel = ctx->temp_reg; 1809 alu.src[0].chan = i; 1810 alu.src[1] = r600_src[2]; 1811 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 1812 alu.dst.sel = ctx->temp_reg; 1813 alu.dst.chan = i; 1814 if (i == 3) { 1815 alu.last = 1; 1816 } 1817 alu.dst.write = 1; 1818 r = r600_bc_add_alu(ctx->bc, &alu); 1819 if (r) 1820 return r; 1821 } 1822 r = r600_bc_add_literal(ctx->bc, ctx->value); 1823 if (r) 1824 return r; 1825 1826 /* src0 * src1 + (1 - src0) * src2 */ 1827 for (i = 0; i < 4; i++) { 1828 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1829 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1830 alu.is_op3 = 1; 1831 alu.src[0] = r600_src[0]; 1832 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1833 alu.src[1] = r600_src[1]; 1834 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 1835 alu.src[2].sel = ctx->temp_reg; 1836 alu.src[2].chan = i; 1837 alu.dst.sel = ctx->temp_reg; 1838 alu.dst.chan = i; 1839 if (i == 3) { 1840 alu.last = 1; 1841 } 1842 r = r600_bc_add_alu(ctx->bc, &alu); 1843 if (r) 1844 return r; 1845 } 1846 return tgsi_helper_copy(ctx, inst); 1847} 1848 1849static int tgsi_cmp(struct r600_shader_ctx *ctx) 1850{ 1851 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1852 struct r600_bc_alu_src r600_src[3]; 1853 struct r600_bc_alu alu; 1854 int use_temp = 0; 1855 int i, r; 1856 1857 r = tgsi_split_constant(ctx, r600_src); 1858 if (r) 1859 return r; 1860 1861 if (inst->Dst[0].Register.WriteMask != 0xf) 1862 use_temp = 1; 1863 1864 for (i = 0; i < 4; i++) { 1865 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1866 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 1867 alu.src[0] = r600_src[0]; 1868 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1869 1870 alu.src[1] = r600_src[2]; 1871 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 1872 1873 alu.src[2] = r600_src[1]; 1874 alu.src[2].chan = tgsi_chan(&inst->Src[1], i); 1875 1876 if (use_temp) 1877 alu.dst.sel = ctx->temp_reg; 1878 else { 1879 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1880 if (r) 1881 return r; 1882 } 1883 alu.dst.chan = i; 1884 alu.dst.write = 1; 1885 alu.is_op3 = 1; 1886 if (i == 3) 1887 alu.last = 1; 1888 r = r600_bc_add_alu(ctx->bc, &alu); 1889 if (r) 1890 return r; 1891 } 1892 if (use_temp) 1893 return tgsi_helper_copy(ctx, inst); 1894 return 0; 1895} 1896 1897static int tgsi_xpd(struct r600_shader_ctx *ctx) 1898{ 1899 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1900 struct r600_bc_alu_src r600_src[3]; 1901 struct r600_bc_alu alu; 1902 uint32_t use_temp = 0; 1903 int i, r; 1904 1905 if (inst->Dst[0].Register.WriteMask != 0xf) 1906 use_temp = 1; 1907 1908 r = tgsi_split_constant(ctx, r600_src); 1909 if (r) 1910 return r; 1911 1912 for (i = 0; i < 4; i++) { 1913 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1914 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1915 1916 alu.src[0] = r600_src[0]; 1917 switch (i) { 1918 case 0: 1919 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 1920 break; 1921 case 1: 1922 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1923 break; 1924 case 2: 1925 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1926 break; 1927 case 3: 1928 alu.src[0].sel = V_SQ_ALU_SRC_0; 1929 alu.src[0].chan = i; 1930 } 1931 1932 alu.src[1] = r600_src[1]; 1933 switch (i) { 1934 case 0: 1935 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 1936 break; 1937 case 1: 1938 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 1939 break; 1940 case 2: 1941 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 1942 break; 1943 case 3: 1944 alu.src[1].sel = V_SQ_ALU_SRC_0; 1945 alu.src[1].chan = i; 1946 } 1947 1948 alu.dst.sel = ctx->temp_reg; 1949 alu.dst.chan = i; 1950 alu.dst.write = 1; 1951 1952 if (i == 3) 1953 alu.last = 1; 1954 r = r600_bc_add_alu(ctx->bc, &alu); 1955 if (r) 1956 return r; 1957 1958 r = r600_bc_add_literal(ctx->bc, ctx->value); 1959 if (r) 1960 return r; 1961 } 1962 1963 for (i = 0; i < 4; i++) { 1964 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1965 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1966 1967 alu.src[0] = r600_src[0]; 1968 switch (i) { 1969 case 0: 1970 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1971 break; 1972 case 1: 1973 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 1974 break; 1975 case 2: 1976 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1977 break; 1978 case 3: 1979 alu.src[0].sel = V_SQ_ALU_SRC_0; 1980 alu.src[0].chan = i; 1981 } 1982 1983 alu.src[1] = r600_src[1]; 1984 switch (i) { 1985 case 0: 1986 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 1987 break; 1988 case 1: 1989 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 1990 break; 1991 case 2: 1992 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 1993 break; 1994 case 3: 1995 alu.src[1].sel = V_SQ_ALU_SRC_0; 1996 alu.src[1].chan = i; 1997 } 1998 1999 alu.src[2].sel = ctx->temp_reg; 2000 alu.src[2].neg = 1; 2001 alu.src[2].chan = i; 2002 2003 if (use_temp) 2004 alu.dst.sel = ctx->temp_reg; 2005 else { 2006 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2007 if (r) 2008 return r; 2009 } 2010 alu.dst.chan = i; 2011 alu.dst.write = 1; 2012 alu.is_op3 = 1; 2013 if (i == 3) 2014 alu.last = 1; 2015 r = r600_bc_add_alu(ctx->bc, &alu); 2016 if (r) 2017 return r; 2018 2019 r = r600_bc_add_literal(ctx->bc, ctx->value); 2020 if (r) 2021 return r; 2022 } 2023 if (use_temp) 2024 return tgsi_helper_copy(ctx, inst); 2025 return 0; 2026} 2027 2028static int tgsi_exp(struct r600_shader_ctx *ctx) 2029{ 2030 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2031 struct r600_bc_alu_src r600_src[3]; 2032 struct r600_bc_alu alu; 2033 int r; 2034 2035 /* result.x = 2^floor(src); */ 2036 if (inst->Dst[0].Register.WriteMask & 1) { 2037 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2038 2039 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2040 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2041 if (r) 2042 return r; 2043 2044 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2045 2046 alu.dst.sel = ctx->temp_reg; 2047 alu.dst.chan = 0; 2048 alu.dst.write = 1; 2049 alu.last = 1; 2050 r = r600_bc_add_alu(ctx->bc, &alu); 2051 if (r) 2052 return r; 2053 2054 r = r600_bc_add_literal(ctx->bc, ctx->value); 2055 if (r) 2056 return r; 2057 2058 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2059 alu.src[0].sel = ctx->temp_reg; 2060 alu.src[0].chan = 0; 2061 2062 alu.dst.sel = ctx->temp_reg; 2063 alu.dst.chan = 0; 2064 alu.dst.write = 1; 2065 alu.last = 1; 2066 r = r600_bc_add_alu(ctx->bc, &alu); 2067 if (r) 2068 return r; 2069 2070 r = r600_bc_add_literal(ctx->bc, ctx->value); 2071 if (r) 2072 return r; 2073 } 2074 2075 /* result.y = tmp - floor(tmp); */ 2076 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2077 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2078 2079 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2080 alu.src[0] = r600_src[0]; 2081 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2082 if (r) 2083 return r; 2084 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2085 2086 alu.dst.sel = ctx->temp_reg; 2087// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2088// if (r) 2089// return r; 2090 alu.dst.write = 1; 2091 alu.dst.chan = 1; 2092 2093 alu.last = 1; 2094 2095 r = r600_bc_add_alu(ctx->bc, &alu); 2096 if (r) 2097 return r; 2098 r = r600_bc_add_literal(ctx->bc, ctx->value); 2099 if (r) 2100 return r; 2101 } 2102 2103 /* result.z = RoughApprox2ToX(tmp);*/ 2104 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2105 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2106 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2107 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2108 if (r) 2109 return r; 2110 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2111 2112 alu.dst.sel = ctx->temp_reg; 2113 alu.dst.write = 1; 2114 alu.dst.chan = 2; 2115 2116 alu.last = 1; 2117 2118 r = r600_bc_add_alu(ctx->bc, &alu); 2119 if (r) 2120 return r; 2121 r = r600_bc_add_literal(ctx->bc, ctx->value); 2122 if (r) 2123 return r; 2124 } 2125 2126 /* result.w = 1.0;*/ 2127 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2128 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2129 2130 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2131 alu.src[0].sel = V_SQ_ALU_SRC_1; 2132 alu.src[0].chan = 0; 2133 2134 alu.dst.sel = ctx->temp_reg; 2135 alu.dst.chan = 3; 2136 alu.dst.write = 1; 2137 alu.last = 1; 2138 r = r600_bc_add_alu(ctx->bc, &alu); 2139 if (r) 2140 return r; 2141 r = r600_bc_add_literal(ctx->bc, ctx->value); 2142 if (r) 2143 return r; 2144 } 2145 return tgsi_helper_copy(ctx, inst); 2146} 2147 2148static int tgsi_log(struct r600_shader_ctx *ctx) 2149{ 2150 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2151 struct r600_bc_alu alu; 2152 int r; 2153 2154 /* result.x = floor(log2(src)); */ 2155 if (inst->Dst[0].Register.WriteMask & 1) { 2156 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2157 2158 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2159 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2160 if (r) 2161 return r; 2162 2163 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2164 2165 alu.dst.sel = ctx->temp_reg; 2166 alu.dst.chan = 0; 2167 alu.dst.write = 1; 2168 alu.last = 1; 2169 r = r600_bc_add_alu(ctx->bc, &alu); 2170 if (r) 2171 return r; 2172 2173 r = r600_bc_add_literal(ctx->bc, ctx->value); 2174 if (r) 2175 return r; 2176 2177 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2178 alu.src[0].sel = ctx->temp_reg; 2179 alu.src[0].chan = 0; 2180 2181 alu.dst.sel = ctx->temp_reg; 2182 alu.dst.chan = 0; 2183 alu.dst.write = 1; 2184 alu.last = 1; 2185 2186 r = r600_bc_add_alu(ctx->bc, &alu); 2187 if (r) 2188 return r; 2189 2190 r = r600_bc_add_literal(ctx->bc, ctx->value); 2191 if (r) 2192 return r; 2193 } 2194 2195 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2196 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2197 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2198 2199 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2200 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2201 if (r) 2202 return r; 2203 2204 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2205 2206 alu.dst.sel = ctx->temp_reg; 2207 alu.dst.chan = 1; 2208 alu.dst.write = 1; 2209 alu.last = 1; 2210 2211 r = r600_bc_add_alu(ctx->bc, &alu); 2212 if (r) 2213 return r; 2214 2215 r = r600_bc_add_literal(ctx->bc, ctx->value); 2216 if (r) 2217 return r; 2218 2219 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2220 2221 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2222 alu.src[0].sel = ctx->temp_reg; 2223 alu.src[0].chan = 1; 2224 2225 alu.dst.sel = ctx->temp_reg; 2226 alu.dst.chan = 1; 2227 alu.dst.write = 1; 2228 alu.last = 1; 2229 2230 r = r600_bc_add_alu(ctx->bc, &alu); 2231 if (r) 2232 return r; 2233 2234 r = r600_bc_add_literal(ctx->bc, ctx->value); 2235 if (r) 2236 return r; 2237 2238 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2239 2240 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2241 alu.src[0].sel = ctx->temp_reg; 2242 alu.src[0].chan = 1; 2243 2244 alu.dst.sel = ctx->temp_reg; 2245 alu.dst.chan = 1; 2246 alu.dst.write = 1; 2247 alu.last = 1; 2248 2249 r = r600_bc_add_alu(ctx->bc, &alu); 2250 if (r) 2251 return r; 2252 2253 r = r600_bc_add_literal(ctx->bc, ctx->value); 2254 if (r) 2255 return r; 2256 2257 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2258 2259 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2260 alu.src[0].sel = ctx->temp_reg; 2261 alu.src[0].chan = 1; 2262 2263 alu.dst.sel = ctx->temp_reg; 2264 alu.dst.chan = 1; 2265 alu.dst.write = 1; 2266 alu.last = 1; 2267 2268 r = r600_bc_add_alu(ctx->bc, &alu); 2269 if (r) 2270 return r; 2271 2272 r = r600_bc_add_literal(ctx->bc, ctx->value); 2273 if (r) 2274 return r; 2275 2276 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2277 2278 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2279 2280 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2281 if (r) 2282 return r; 2283 2284 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2285 2286 alu.src[1].sel = ctx->temp_reg; 2287 alu.src[1].chan = 1; 2288 2289 alu.dst.sel = ctx->temp_reg; 2290 alu.dst.chan = 1; 2291 alu.dst.write = 1; 2292 alu.last = 1; 2293 2294 r = r600_bc_add_alu(ctx->bc, &alu); 2295 if (r) 2296 return r; 2297 2298 r = r600_bc_add_literal(ctx->bc, ctx->value); 2299 if (r) 2300 return r; 2301 } 2302 2303 /* result.z = log2(src);*/ 2304 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2305 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2306 2307 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2308 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2309 if (r) 2310 return r; 2311 2312 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2313 2314 alu.dst.sel = ctx->temp_reg; 2315 alu.dst.write = 1; 2316 alu.dst.chan = 2; 2317 alu.last = 1; 2318 2319 r = r600_bc_add_alu(ctx->bc, &alu); 2320 if (r) 2321 return r; 2322 2323 r = r600_bc_add_literal(ctx->bc, ctx->value); 2324 if (r) 2325 return r; 2326 } 2327 2328 /* result.w = 1.0; */ 2329 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2330 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2331 2332 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2333 alu.src[0].sel = V_SQ_ALU_SRC_1; 2334 alu.src[0].chan = 0; 2335 2336 alu.dst.sel = ctx->temp_reg; 2337 alu.dst.chan = 3; 2338 alu.dst.write = 1; 2339 alu.last = 1; 2340 2341 r = r600_bc_add_alu(ctx->bc, &alu); 2342 if (r) 2343 return r; 2344 2345 r = r600_bc_add_literal(ctx->bc, ctx->value); 2346 if (r) 2347 return r; 2348 } 2349 2350 return tgsi_helper_copy(ctx, inst); 2351} 2352 2353/* r6/7 only for now */ 2354static int tgsi_arl(struct r600_shader_ctx *ctx) 2355{ 2356 /* TODO from r600c, ar values don't persist between clauses */ 2357 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2358 struct r600_bc_alu alu; 2359 int r; 2360 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2361 2362 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; 2363 2364 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2365 if (r) 2366 return r; 2367 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2368 2369 alu.last = 1; 2370 2371 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2372 if (r) 2373 return r; 2374 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2375 return 0; 2376} 2377 2378static int tgsi_opdst(struct r600_shader_ctx *ctx) 2379{ 2380 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2381 struct r600_bc_alu alu; 2382 int i, r = 0; 2383 2384 for (i = 0; i < 4; i++) { 2385 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2386 2387 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2388 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2389 if (r) 2390 return r; 2391 2392 if (i == 0 || i == 3) { 2393 alu.src[0].sel = V_SQ_ALU_SRC_1; 2394 } else { 2395 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2396 if (r) 2397 return r; 2398 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2399 } 2400 2401 if (i == 0 || i == 2) { 2402 alu.src[1].sel = V_SQ_ALU_SRC_1; 2403 } else { 2404 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); 2405 if (r) 2406 return r; 2407 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2408 } 2409 if (i == 3) 2410 alu.last = 1; 2411 r = r600_bc_add_alu(ctx->bc, &alu); 2412 if (r) 2413 return r; 2414 } 2415 return 0; 2416} 2417 2418static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2419{ 2420 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2421 struct r600_bc_alu alu; 2422 int r; 2423 2424 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2425 alu.inst = opcode; 2426 alu.predicate = 1; 2427 2428 alu.dst.sel = ctx->temp_reg; 2429 alu.dst.write = 1; 2430 alu.dst.chan = 0; 2431 2432 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2433 if (r) 2434 return r; 2435 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2436 alu.src[1].sel = V_SQ_ALU_SRC_0; 2437 alu.src[1].chan = 0; 2438 2439 alu.last = 1; 2440 2441 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2442 if (r) 2443 return r; 2444 return 0; 2445} 2446 2447static int pops(struct r600_shader_ctx *ctx, int pops) 2448{ 2449 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2450 ctx->bc->cf_last->pop_count = pops; 2451 return 0; 2452} 2453 2454static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2455{ 2456 switch(reason) { 2457 case FC_PUSH_VPM: 2458 ctx->bc->callstack[ctx->bc->call_sp].current--; 2459 break; 2460 case FC_PUSH_WQM: 2461 case FC_LOOP: 2462 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2463 break; 2464 case FC_REP: 2465 /* TOODO : for 16 vp asic should -= 2; */ 2466 ctx->bc->callstack[ctx->bc->call_sp].current --; 2467 break; 2468 } 2469} 2470 2471static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2472{ 2473 if (check_max_only) { 2474 int diff; 2475 switch (reason) { 2476 case FC_PUSH_VPM: 2477 diff = 1; 2478 break; 2479 case FC_PUSH_WQM: 2480 diff = 4; 2481 break; 2482 default: 2483 assert(0); 2484 diff = 0; 2485 } 2486 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2487 ctx->bc->callstack[ctx->bc->call_sp].max) { 2488 ctx->bc->callstack[ctx->bc->call_sp].max = 2489 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2490 } 2491 return; 2492 } 2493 switch (reason) { 2494 case FC_PUSH_VPM: 2495 ctx->bc->callstack[ctx->bc->call_sp].current++; 2496 break; 2497 case FC_PUSH_WQM: 2498 case FC_LOOP: 2499 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2500 break; 2501 case FC_REP: 2502 ctx->bc->callstack[ctx->bc->call_sp].current++; 2503 break; 2504 } 2505 2506 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2507 ctx->bc->callstack[ctx->bc->call_sp].max) { 2508 ctx->bc->callstack[ctx->bc->call_sp].max = 2509 ctx->bc->callstack[ctx->bc->call_sp].current; 2510 } 2511} 2512 2513static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2514{ 2515 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2516 2517 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2518 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2519 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2520 sp->num_mid++; 2521} 2522 2523static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2524{ 2525 ctx->bc->fc_sp++; 2526 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2527 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2528} 2529 2530static void fc_poplevel(struct r600_shader_ctx *ctx) 2531{ 2532 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2533 if (sp->mid) { 2534 free(sp->mid); 2535 sp->mid = NULL; 2536 } 2537 sp->num_mid = 0; 2538 sp->start = NULL; 2539 sp->type = 0; 2540 ctx->bc->fc_sp--; 2541} 2542 2543#if 0 2544static int emit_return(struct r600_shader_ctx *ctx) 2545{ 2546 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2547 return 0; 2548} 2549 2550static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2551{ 2552 2553 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2554 ctx->bc->cf_last->pop_count = pops; 2555 /* TODO work out offset */ 2556 return 0; 2557} 2558 2559static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2560{ 2561 return 0; 2562} 2563 2564static void emit_testflag(struct r600_shader_ctx *ctx) 2565{ 2566 2567} 2568 2569static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2570{ 2571 emit_testflag(ctx); 2572 emit_jump_to_offset(ctx, 1, 4); 2573 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2574 pops(ctx, ifidx + 1); 2575 emit_return(ctx); 2576} 2577 2578static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2579{ 2580 emit_testflag(ctx); 2581 2582 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2583 ctx->bc->cf_last->pop_count = 1; 2584 2585 fc_set_mid(ctx, fc_sp); 2586 2587 pops(ctx, 1); 2588} 2589#endif 2590 2591static int tgsi_if(struct r600_shader_ctx *ctx) 2592{ 2593 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2594 2595 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2596 2597 fc_pushlevel(ctx, FC_IF); 2598 2599 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2600 return 0; 2601} 2602 2603static int tgsi_else(struct r600_shader_ctx *ctx) 2604{ 2605 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2606 ctx->bc->cf_last->pop_count = 1; 2607 2608 fc_set_mid(ctx, ctx->bc->fc_sp); 2609 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2610 return 0; 2611} 2612 2613static int tgsi_endif(struct r600_shader_ctx *ctx) 2614{ 2615 pops(ctx, 1); 2616 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2617 R600_ERR("if/endif unbalanced in shader\n"); 2618 return -1; 2619 } 2620 2621 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2622 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2623 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2624 } else { 2625 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2626 } 2627 fc_poplevel(ctx); 2628 2629 callstack_decrease_current(ctx, FC_PUSH_VPM); 2630 return 0; 2631} 2632 2633static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2634{ 2635 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2636 2637 fc_pushlevel(ctx, FC_LOOP); 2638 2639 /* check stack depth */ 2640 callstack_check_depth(ctx, FC_LOOP, 0); 2641 return 0; 2642} 2643 2644static int tgsi_endloop(struct r600_shader_ctx *ctx) 2645{ 2646 int i; 2647 2648 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 2649 2650 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2651 R600_ERR("loop/endloop in shader code are not paired.\n"); 2652 return -EINVAL; 2653 } 2654 2655 /* fixup loop pointers - from r600isa 2656 LOOP END points to CF after LOOP START, 2657 LOOP START point to CF after LOOP END 2658 BRK/CONT point to LOOP END CF 2659 */ 2660 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2661 2662 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2663 2664 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2665 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2666 } 2667 /* TODO add LOOPRET support */ 2668 fc_poplevel(ctx); 2669 callstack_decrease_current(ctx, FC_LOOP); 2670 return 0; 2671} 2672 2673static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2674{ 2675 unsigned int fscp; 2676 2677 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2678 { 2679 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2680 break; 2681 } 2682 2683 if (fscp == 0) { 2684 R600_ERR("Break not inside loop/endloop pair\n"); 2685 return -EINVAL; 2686 } 2687 2688 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2689 ctx->bc->cf_last->pop_count = 1; 2690 2691 fc_set_mid(ctx, fscp); 2692 2693 pops(ctx, 1); 2694 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2695 return 0; 2696} 2697 2698static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 2699 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl}, 2700 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2701 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2702 2703 /* FIXME: 2704 * For state trackers other than OpenGL, we'll want to use 2705 * _RECIP_IEEE instead. 2706 */ 2707 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 2708 2709 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 2710 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2711 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 2712 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2713 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2714 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2715 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2716 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2717 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2718 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2719 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2720 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2721 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2722 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2723 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2724 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2725 /* gap */ 2726 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2727 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2728 /* gap */ 2729 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2730 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2731 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2732 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2733 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2734 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2735 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2736 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2737 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2738 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2739 /* gap */ 2740 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2741 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2742 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2743 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2744 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2745 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2746 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2747 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2748 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2749 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2750 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2751 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2752 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2753 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2754 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2755 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2756 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2757 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2758 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2759 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2760 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2761 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2762 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2763 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2764 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2765 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2766 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2767 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2768 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2769 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2770 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2771 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2772 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2773 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2774 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2775 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2776 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2777 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2778 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2779 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2780 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2781 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2782 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2783 /* gap */ 2784 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2785 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2786 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2787 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2788 /* gap */ 2789 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2790 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2791 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2792 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2793 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2794 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2795 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2796 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 2797 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2798 /* gap */ 2799 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2800 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2801 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2802 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2803 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2804 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2805 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2806 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2807 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2808 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2809 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2810 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2811 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2812 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2813 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2814 /* gap */ 2815 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2816 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2817 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2818 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2819 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2820 /* gap */ 2821 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2822 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2823 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2824 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2825 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2826 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2827 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2828 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2829 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2830 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2831 /* gap */ 2832 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2833 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2834 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2835 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2836 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2837 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2838 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2839 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2840 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2841 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2842 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2843 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2844 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2845 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2846 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2847 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2848 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2849 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2850 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2851 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2852 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2853 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2854 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2855 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2856 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2857 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2858 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2859 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2860}; 2861 2862static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 2863 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2864 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2865 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2866 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 2867 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 2868 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2869 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2870 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2871 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2872 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2873 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2874 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2875 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2876 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2877 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2878 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2879 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2880 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2881 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2882 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2883 /* gap */ 2884 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2885 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2886 /* gap */ 2887 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2888 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2889 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2890 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2891 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2892 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2893 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2894 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2895 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2896 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2897 /* gap */ 2898 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2899 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2900 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2901 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2902 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2903 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2904 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2905 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2906 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2907 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2908 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2909 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2910 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2911 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2912 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2913 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2914 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2915 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2916 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2917 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2918 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2919 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2920 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2921 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2922 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2923 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2924 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2925 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2926 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2927 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2928 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2929 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2930 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2931 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2932 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2933 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2934 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2935 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2936 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2937 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2938 {TGSI_OPCODE_TXL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2939 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2940 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2941 /* gap */ 2942 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2943 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2944 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2945 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2946 /* gap */ 2947 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2948 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2949 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2950 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2951 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2952 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2953 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2954 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 2955 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2956 /* gap */ 2957 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2958 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2959 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2960 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2961 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2962 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2963 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2964 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2965 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2966 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2967 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2968 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2969 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2970 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2971 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2972 /* gap */ 2973 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2974 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2975 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2976 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2977 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2978 /* gap */ 2979 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2980 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2981 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2982 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2983 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2984 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2985 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2986 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2987 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2988 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2989 /* gap */ 2990 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2991 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2992 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2993 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2994 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2995 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2996 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2997 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2998 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2999 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3000 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3001 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3002 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3003 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3004 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3005 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3006 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3007 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3008 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3009 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3010 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3011 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3012 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3013 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3014 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3015 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3016 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3017 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3018}; 3019