r600_shader.c revision 460c5304aba646143938b76d536a6fc13a302ca8
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_screen.h" 29#include "r600_context.h" 30#include "r600_shader.h" 31#include "r600_asm.h" 32#include "r600_sq.h" 33#include "r600_opcodes.h" 34#include "r600d.h" 35#include <stdio.h> 36#include <errno.h> 37 38 39struct r600_shader_tgsi_instruction; 40 41struct r600_shader_ctx { 42 struct tgsi_shader_info info; 43 struct tgsi_parse_context parse; 44 const struct tgsi_token *tokens; 45 unsigned type; 46 unsigned file_offset[TGSI_FILE_COUNT]; 47 unsigned temp_reg; 48 struct r600_shader_tgsi_instruction *inst_info; 49 struct r600_bc *bc; 50 struct r600_shader *shader; 51 u32 value[4]; 52 u32 *literals; 53 u32 nliterals; 54 u32 max_driver_temp_used; 55}; 56 57struct r600_shader_tgsi_instruction { 58 unsigned tgsi_opcode; 59 unsigned is_op3; 60 unsigned r600_opcode; 61 int (*process)(struct r600_shader_ctx *ctx); 62}; 63 64static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]; 65static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 66static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 67 68static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) 69{ 70 struct r600_context *rctx = r600_context(ctx); 71 const struct util_format_description *desc; 72 enum pipe_format resource_format[160]; 73 unsigned i, nresources = 0; 74 struct r600_bc *bc = &shader->bc; 75 struct r600_bc_cf *cf; 76 struct r600_bc_vtx *vtx; 77 78 if (shader->processor_type != TGSI_PROCESSOR_VERTEX) 79 return 0; 80 for (i = 0; i < rctx->vertex_elements->count; i++) { 81 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; 82 } 83 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 84 switch (cf->inst) { 85 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 86 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 87 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 88 desc = util_format_description(resource_format[vtx->buffer_id]); 89 if (desc == NULL) { 90 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); 91 return -EINVAL; 92 } 93 vtx->dst_sel_x = desc->swizzle[0]; 94 vtx->dst_sel_y = desc->swizzle[1]; 95 vtx->dst_sel_z = desc->swizzle[2]; 96 vtx->dst_sel_w = desc->swizzle[3]; 97 } 98 break; 99 default: 100 break; 101 } 102 } 103 return r600_bc_build(&shader->bc); 104} 105 106int r600_pipe_shader_create(struct pipe_context *ctx, 107 struct r600_context_state *rpshader, 108 const struct tgsi_token *tokens) 109{ 110 struct r600_screen *rscreen = r600_screen(ctx->screen); 111 int r; 112 113//fprintf(stderr, "--------------------------------------------------------------\n"); 114//tgsi_dump(tokens, 0); 115 if (rpshader == NULL) 116 return -ENOMEM; 117 rpshader->shader.family = radeon_get_family(rscreen->rw); 118 rpshader->shader.use_mem_constant = rscreen->use_mem_constant; 119 r = r600_shader_from_tgsi(tokens, &rpshader->shader); 120 if (r) { 121 R600_ERR("translation from TGSI failed !\n"); 122 return r; 123 } 124 r = r600_bc_build(&rpshader->shader.bc); 125 if (r) { 126 R600_ERR("building bytecode failed !\n"); 127 return r; 128 } 129//fprintf(stderr, "______________________________________________________________\n"); 130 return 0; 131} 132 133static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader) 134{ 135 struct r600_context *rctx = r600_context(ctx); 136 struct radeon_state *state; 137 138 state = &rpshader->rstate[0]; 139 radeon_state_fini(&rpshader->rstate[0]); 140 141 return rctx->vtbl->vs_shader(rctx, rpshader, state); 142} 143 144static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader) 145{ 146 struct r600_context *rctx = r600_context(ctx); 147 struct radeon_state *state; 148 149 state = &rpshader->rstate[0]; 150 radeon_state_fini(state); 151 152 return rctx->vtbl->ps_shader(rctx, rpshader, state); 153} 154 155static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader) 156{ 157 struct r600_screen *rscreen = r600_screen(ctx->screen); 158 struct r600_context *rctx = r600_context(ctx); 159 struct r600_shader *rshader = &rpshader->shader; 160 int r; 161 162 /* copy new shader */ 163 radeon_bo_decref(rscreen->rw, rpshader->bo); 164 rpshader->bo = NULL; 165 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4, 166 4096, NULL); 167 if (rpshader->bo == NULL) { 168 return -ENOMEM; 169 } 170 radeon_bo_map(rscreen->rw, rpshader->bo); 171 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4); 172 radeon_bo_unmap(rscreen->rw, rpshader->bo); 173 /* build state */ 174 rshader->flat_shade = rctx->flat_shade; 175 switch (rshader->processor_type) { 176 case TGSI_PROCESSOR_VERTEX: 177 r = r600_pipe_shader_vs(ctx, rpshader); 178 break; 179 case TGSI_PROCESSOR_FRAGMENT: 180 r = r600_pipe_shader_ps(ctx, rpshader); 181 break; 182 default: 183 r = -EINVAL; 184 break; 185 } 186 return r; 187} 188 189int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader) 190{ 191 struct r600_context *rctx = r600_context(ctx); 192 int r; 193 194 if (rpshader == NULL) 195 return -EINVAL; 196 /* there should be enough input */ 197 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) { 198 R600_ERR("%d resources provided, expecting %d\n", 199 rctx->vertex_elements->count, rpshader->shader.bc.nresource); 200 return -EINVAL; 201 } 202 r = r600_shader_update(ctx, &rpshader->shader); 203 if (r) 204 return r; 205 return r600_pipe_shader(ctx, rpshader); 206} 207 208static int tgsi_is_supported(struct r600_shader_ctx *ctx) 209{ 210 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 211 int j; 212 213 if (i->Instruction.NumDstRegs > 1) { 214 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 215 return -EINVAL; 216 } 217 if (i->Instruction.Predicate) { 218 R600_ERR("predicate unsupported\n"); 219 return -EINVAL; 220 } 221#if 0 222 if (i->Instruction.Label) { 223 R600_ERR("label unsupported\n"); 224 return -EINVAL; 225 } 226#endif 227 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 228 if (i->Src[j].Register.Dimension || 229 i->Src[j].Register.Absolute) { 230 R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j, 231 i->Src[j].Register.Dimension, 232 i->Src[j].Register.Absolute); 233 return -EINVAL; 234 } 235 } 236 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 237 if (i->Dst[j].Register.Dimension) { 238 R600_ERR("unsupported dst (dimension)\n"); 239 return -EINVAL; 240 } 241 } 242 return 0; 243} 244 245static int tgsi_declaration(struct r600_shader_ctx *ctx) 246{ 247 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 248 struct r600_bc_vtx vtx; 249 unsigned i; 250 int r; 251 252 switch (d->Declaration.File) { 253 case TGSI_FILE_INPUT: 254 i = ctx->shader->ninput++; 255 ctx->shader->input[i].name = d->Semantic.Name; 256 ctx->shader->input[i].sid = d->Semantic.Index; 257 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 258 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 259 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 260 /* turn input into fetch */ 261 memset(&vtx, 0, sizeof(struct r600_bc_vtx)); 262 vtx.inst = 0; 263 vtx.fetch_type = 0; 264 vtx.buffer_id = i; 265 /* register containing the index into the buffer */ 266 vtx.src_gpr = 0; 267 vtx.src_sel_x = 0; 268 vtx.mega_fetch_count = 0x1F; 269 vtx.dst_gpr = ctx->shader->input[i].gpr; 270 vtx.dst_sel_x = 0; 271 vtx.dst_sel_y = 1; 272 vtx.dst_sel_z = 2; 273 vtx.dst_sel_w = 3; 274 r = r600_bc_add_vtx(ctx->bc, &vtx); 275 if (r) 276 return r; 277 } 278 break; 279 case TGSI_FILE_OUTPUT: 280 i = ctx->shader->noutput++; 281 ctx->shader->output[i].name = d->Semantic.Name; 282 ctx->shader->output[i].sid = d->Semantic.Index; 283 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 284 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 285 break; 286 case TGSI_FILE_CONSTANT: 287 case TGSI_FILE_TEMPORARY: 288 case TGSI_FILE_SAMPLER: 289 case TGSI_FILE_ADDRESS: 290 break; 291 default: 292 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 293 return -EINVAL; 294 } 295 return 0; 296} 297 298static int r600_get_temp(struct r600_shader_ctx *ctx) 299{ 300 return ctx->temp_reg + ctx->max_driver_temp_used++; 301} 302 303int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 304{ 305 struct tgsi_full_immediate *immediate; 306 struct r600_shader_ctx ctx; 307 struct r600_bc_output output[32]; 308 unsigned output_done, noutput; 309 unsigned opcode; 310 int i, r = 0, pos0; 311 312 ctx.bc = &shader->bc; 313 ctx.shader = shader; 314 r = r600_bc_init(ctx.bc, shader->family); 315 if (r) 316 return r; 317 ctx.bc->use_mem_constant = shader->use_mem_constant; 318 ctx.tokens = tokens; 319 tgsi_scan_shader(tokens, &ctx.info); 320 tgsi_parse_init(&ctx.parse, tokens); 321 ctx.type = ctx.parse.FullHeader.Processor.Processor; 322 shader->processor_type = ctx.type; 323 324 /* register allocations */ 325 /* Values [0,127] correspond to GPR[0..127]. 326 * Values [128,159] correspond to constant buffer bank 0 327 * Values [160,191] correspond to constant buffer bank 1 328 * Values [256,511] correspond to cfile constants c[0..255]. 329 * Other special values are shown in the list below. 330 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 331 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 332 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 333 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 334 * 248 SQ_ALU_SRC_0: special constant 0.0. 335 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 336 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 337 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 338 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 339 * 253 SQ_ALU_SRC_LITERAL: literal constant. 340 * 254 SQ_ALU_SRC_PV: previous vector result. 341 * 255 SQ_ALU_SRC_PS: previous scalar result. 342 */ 343 for (i = 0; i < TGSI_FILE_COUNT; i++) { 344 ctx.file_offset[i] = 0; 345 } 346 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 347 ctx.file_offset[TGSI_FILE_INPUT] = 1; 348 } 349 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 350 ctx.info.file_count[TGSI_FILE_INPUT]; 351 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 352 ctx.info.file_count[TGSI_FILE_OUTPUT]; 353 if (ctx.shader->use_mem_constant) 354 ctx.file_offset[TGSI_FILE_CONSTANT] = 128; 355 else 356 ctx.file_offset[TGSI_FILE_CONSTANT] = 256; 357 358 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 359 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 360 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 361 362 ctx.nliterals = 0; 363 ctx.literals = NULL; 364 365 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 366 tgsi_parse_token(&ctx.parse); 367 switch (ctx.parse.FullToken.Token.Type) { 368 case TGSI_TOKEN_TYPE_IMMEDIATE: 369 immediate = &ctx.parse.FullToken.FullImmediate; 370 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 371 if(ctx.literals == NULL) { 372 r = -ENOMEM; 373 goto out_err; 374 } 375 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 376 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 377 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 378 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 379 ctx.nliterals++; 380 break; 381 case TGSI_TOKEN_TYPE_DECLARATION: 382 r = tgsi_declaration(&ctx); 383 if (r) 384 goto out_err; 385 break; 386 case TGSI_TOKEN_TYPE_INSTRUCTION: 387 r = tgsi_is_supported(&ctx); 388 if (r) 389 goto out_err; 390 ctx.max_driver_temp_used = 0; 391 /* reserve first tmp for everyone */ 392 r600_get_temp(&ctx); 393 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 394 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 395 r = ctx.inst_info->process(&ctx); 396 if (r) 397 goto out_err; 398 r = r600_bc_add_literal(ctx.bc, ctx.value); 399 if (r) 400 goto out_err; 401 break; 402 default: 403 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 404 r = -EINVAL; 405 goto out_err; 406 } 407 } 408 /* export output */ 409 noutput = shader->noutput; 410 for (i = 0, pos0 = 0; i < noutput; i++) { 411 memset(&output[i], 0, sizeof(struct r600_bc_output)); 412 output[i].gpr = shader->output[i].gpr; 413 output[i].elem_size = 3; 414 output[i].swizzle_x = 0; 415 output[i].swizzle_y = 1; 416 output[i].swizzle_z = 2; 417 output[i].swizzle_w = 3; 418 output[i].barrier = 1; 419 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 420 output[i].array_base = i - pos0; 421 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 422 switch (ctx.type) { 423 case TGSI_PROCESSOR_VERTEX: 424 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 425 output[i].array_base = 60; 426 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 427 /* position doesn't count in array_base */ 428 pos0++; 429 } 430 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 431 output[i].array_base = 61; 432 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 433 /* position doesn't count in array_base */ 434 pos0++; 435 } 436 break; 437 case TGSI_PROCESSOR_FRAGMENT: 438 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 439 output[i].array_base = shader->output[i].sid; 440 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 441 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 442 output[i].array_base = 61; 443 output[i].swizzle_x = 2; 444 output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7; 445 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 446 } else { 447 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 448 r = -EINVAL; 449 goto out_err; 450 } 451 break; 452 default: 453 R600_ERR("unsupported processor type %d\n", ctx.type); 454 r = -EINVAL; 455 goto out_err; 456 } 457 } 458 /* add fake param output for vertex shader if no param is exported */ 459 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 460 for (i = 0, pos0 = 0; i < noutput; i++) { 461 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 462 pos0 = 1; 463 break; 464 } 465 } 466 if (!pos0) { 467 memset(&output[i], 0, sizeof(struct r600_bc_output)); 468 output[i].gpr = 0; 469 output[i].elem_size = 3; 470 output[i].swizzle_x = 0; 471 output[i].swizzle_y = 1; 472 output[i].swizzle_z = 2; 473 output[i].swizzle_w = 3; 474 output[i].barrier = 1; 475 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 476 output[i].array_base = 0; 477 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; 478 noutput++; 479 } 480 } 481 /* add fake pixel export */ 482 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 483 memset(&output[0], 0, sizeof(struct r600_bc_output)); 484 output[0].gpr = 0; 485 output[0].elem_size = 3; 486 output[0].swizzle_x = 7; 487 output[0].swizzle_y = 7; 488 output[0].swizzle_z = 7; 489 output[0].swizzle_w = 7; 490 output[0].barrier = 1; 491 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 492 output[0].array_base = 0; 493 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 494 noutput++; 495 } 496 /* set export done on last export of each type */ 497 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 498 if (i == (noutput - 1)) { 499 output[i].end_of_program = 1; 500 } 501 if (!(output_done & (1 << output[i].type))) { 502 output_done |= (1 << output[i].type); 503 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 504 } 505 } 506 /* add output to bytecode */ 507 for (i = 0; i < noutput; i++) { 508 r = r600_bc_add_output(ctx.bc, &output[i]); 509 if (r) 510 goto out_err; 511 } 512 free(ctx.literals); 513 tgsi_parse_free(&ctx.parse); 514 return 0; 515out_err: 516 free(ctx.literals); 517 tgsi_parse_free(&ctx.parse); 518 return r; 519} 520 521static int tgsi_unsupported(struct r600_shader_ctx *ctx) 522{ 523 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 524 return -EINVAL; 525} 526 527static int tgsi_end(struct r600_shader_ctx *ctx) 528{ 529 return 0; 530} 531 532static int tgsi_src(struct r600_shader_ctx *ctx, 533 const struct tgsi_full_src_register *tgsi_src, 534 struct r600_bc_alu_src *r600_src) 535{ 536 int index; 537 memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); 538 r600_src->sel = tgsi_src->Register.Index; 539 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 540 r600_src->sel = 0; 541 index = tgsi_src->Register.Index; 542 ctx->value[0] = ctx->literals[index * 4 + 0]; 543 ctx->value[1] = ctx->literals[index * 4 + 1]; 544 ctx->value[2] = ctx->literals[index * 4 + 2]; 545 ctx->value[3] = ctx->literals[index * 4 + 3]; 546 } 547 if (tgsi_src->Register.Indirect) 548 r600_src->rel = V_SQ_REL_RELATIVE; 549 r600_src->neg = tgsi_src->Register.Negate; 550 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 551 return 0; 552} 553 554static int tgsi_dst(struct r600_shader_ctx *ctx, 555 const struct tgsi_full_dst_register *tgsi_dst, 556 unsigned swizzle, 557 struct r600_bc_alu_dst *r600_dst) 558{ 559 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 560 561 r600_dst->sel = tgsi_dst->Register.Index; 562 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 563 r600_dst->chan = swizzle; 564 r600_dst->write = 1; 565 if (tgsi_dst->Register.Indirect) 566 r600_dst->rel = V_SQ_REL_RELATIVE; 567 if (inst->Instruction.Saturate) { 568 r600_dst->clamp = 1; 569 } 570 return 0; 571} 572 573static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) 574{ 575 switch (swizzle) { 576 case 0: 577 return tgsi_src->Register.SwizzleX; 578 case 1: 579 return tgsi_src->Register.SwizzleY; 580 case 2: 581 return tgsi_src->Register.SwizzleZ; 582 case 3: 583 return tgsi_src->Register.SwizzleW; 584 default: 585 return 0; 586 } 587} 588 589static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 590{ 591 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 592 struct r600_bc_alu alu; 593 int i, j, k, nconst, r; 594 595 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 596 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 597 nconst++; 598 } 599 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); 600 if (r) { 601 return r; 602 } 603 } 604 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 605 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) { 606 int treg = r600_get_temp(ctx); 607 for (k = 0; k < 4; k++) { 608 memset(&alu, 0, sizeof(struct r600_bc_alu)); 609 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 610 alu.src[0].sel = r600_src[j].sel; 611 alu.src[0].chan = k; 612 alu.dst.sel = treg; 613 alu.dst.chan = k; 614 alu.dst.write = 1; 615 if (k == 3) 616 alu.last = 1; 617 r = r600_bc_add_alu(ctx->bc, &alu); 618 if (r) 619 return r; 620 } 621 r600_src[j].sel = treg; 622 j--; 623 } 624 } 625 return 0; 626} 627 628/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 629static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 630{ 631 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 632 struct r600_bc_alu alu; 633 int i, j, k, nliteral, r; 634 635 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 636 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 637 nliteral++; 638 } 639 } 640 for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) { 641 if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) { 642 int treg = r600_get_temp(ctx); 643 for (k = 0; k < 4; k++) { 644 memset(&alu, 0, sizeof(struct r600_bc_alu)); 645 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 646 alu.src[0].sel = r600_src[j].sel; 647 alu.src[0].chan = k; 648 alu.dst.sel = treg; 649 alu.dst.chan = k; 650 alu.dst.write = 1; 651 if (k == 3) 652 alu.last = 1; 653 r = r600_bc_add_alu(ctx->bc, &alu); 654 if (r) 655 return r; 656 } 657 r = r600_bc_add_literal(ctx->bc, ctx->value); 658 if (r) 659 return r; 660 r600_src[j].sel = treg; 661 j++; 662 } 663 } 664 return 0; 665} 666 667static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 668{ 669 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 670 struct r600_bc_alu_src r600_src[3]; 671 struct r600_bc_alu alu; 672 int i, j, r; 673 int lasti = 0; 674 675 for (i = 0; i < 4; i++) { 676 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 677 lasti = i; 678 } 679 } 680 681 r = tgsi_split_constant(ctx, r600_src); 682 if (r) 683 return r; 684 for (i = 0; i < lasti + 1; i++) { 685 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 686 continue; 687 688 memset(&alu, 0, sizeof(struct r600_bc_alu)); 689 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 690 if (r) 691 return r; 692 693 alu.inst = ctx->inst_info->r600_opcode; 694 if (!swap) { 695 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 696 alu.src[j] = r600_src[j]; 697 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 698 } 699 } else { 700 alu.src[0] = r600_src[1]; 701 alu.src[0].chan = tgsi_chan(&inst->Src[1], i); 702 703 alu.src[1] = r600_src[0]; 704 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 705 } 706 /* handle some special cases */ 707 switch (ctx->inst_info->tgsi_opcode) { 708 case TGSI_OPCODE_SUB: 709 alu.src[1].neg = 1; 710 break; 711 case TGSI_OPCODE_ABS: 712 alu.src[0].abs = 1; 713 break; 714 default: 715 break; 716 } 717 if (i == lasti) { 718 alu.last = 1; 719 } 720 r = r600_bc_add_alu(ctx->bc, &alu); 721 if (r) 722 return r; 723 } 724 return 0; 725} 726 727static int tgsi_op2(struct r600_shader_ctx *ctx) 728{ 729 return tgsi_op2_s(ctx, 0); 730} 731 732static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 733{ 734 return tgsi_op2_s(ctx, 1); 735} 736 737/* 738 * r600 - trunc to -PI..PI range 739 * r700 - normalize by dividing by 2PI 740 * see fdo bug 27901 741 */ 742static int tgsi_setup_trig(struct r600_shader_ctx *ctx, 743 struct r600_bc_alu_src r600_src[3]) 744{ 745 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 746 int r; 747 uint32_t lit_vals[4]; 748 struct r600_bc_alu alu; 749 750 memset(lit_vals, 0, 4*4); 751 r = tgsi_split_constant(ctx, r600_src); 752 if (r) 753 return r; 754 755 r = tgsi_split_literal_constant(ctx, r600_src); 756 if (r) 757 return r; 758 759 lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); 760 lit_vals[1] = fui(0.5f); 761 762 memset(&alu, 0, sizeof(struct r600_bc_alu)); 763 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 764 alu.is_op3 = 1; 765 766 alu.dst.chan = 0; 767 alu.dst.sel = ctx->temp_reg; 768 alu.dst.write = 1; 769 770 alu.src[0] = r600_src[0]; 771 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 772 773 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 774 alu.src[1].chan = 0; 775 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 776 alu.src[2].chan = 1; 777 alu.last = 1; 778 r = r600_bc_add_alu(ctx->bc, &alu); 779 if (r) 780 return r; 781 r = r600_bc_add_literal(ctx->bc, lit_vals); 782 if (r) 783 return r; 784 785 memset(&alu, 0, sizeof(struct r600_bc_alu)); 786 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 787 788 alu.dst.chan = 0; 789 alu.dst.sel = ctx->temp_reg; 790 alu.dst.write = 1; 791 792 alu.src[0].sel = ctx->temp_reg; 793 alu.src[0].chan = 0; 794 alu.last = 1; 795 r = r600_bc_add_alu(ctx->bc, &alu); 796 if (r) 797 return r; 798 799 if (ctx->bc->chiprev == 0) { 800 lit_vals[0] = fui(3.1415926535897f * 2.0f); 801 lit_vals[1] = fui(-3.1415926535897f); 802 } else { 803 lit_vals[0] = fui(1.0f); 804 lit_vals[1] = fui(-0.5f); 805 } 806 807 memset(&alu, 0, sizeof(struct r600_bc_alu)); 808 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 809 alu.is_op3 = 1; 810 811 alu.dst.chan = 0; 812 alu.dst.sel = ctx->temp_reg; 813 alu.dst.write = 1; 814 815 alu.src[0].sel = ctx->temp_reg; 816 alu.src[0].chan = 0; 817 818 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 819 alu.src[1].chan = 0; 820 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 821 alu.src[2].chan = 1; 822 alu.last = 1; 823 r = r600_bc_add_alu(ctx->bc, &alu); 824 if (r) 825 return r; 826 r = r600_bc_add_literal(ctx->bc, lit_vals); 827 if (r) 828 return r; 829 return 0; 830} 831 832static int tgsi_trig(struct r600_shader_ctx *ctx) 833{ 834 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 835 struct r600_bc_alu_src r600_src[3]; 836 struct r600_bc_alu alu; 837 int i, r; 838 int lasti = 0; 839 840 r = tgsi_setup_trig(ctx, r600_src); 841 if (r) 842 return r; 843 844 memset(&alu, 0, sizeof(struct r600_bc_alu)); 845 alu.inst = ctx->inst_info->r600_opcode; 846 alu.dst.chan = 0; 847 alu.dst.sel = ctx->temp_reg; 848 alu.dst.write = 1; 849 850 alu.src[0].sel = ctx->temp_reg; 851 alu.src[0].chan = 0; 852 alu.last = 1; 853 r = r600_bc_add_alu(ctx->bc, &alu); 854 if (r) 855 return r; 856 857 /* replicate result */ 858 for (i = 0; i < 4; i++) { 859 if (inst->Dst[0].Register.WriteMask & (1 << i)) 860 lasti = i; 861 } 862 for (i = 0; i < lasti + 1; i++) { 863 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 864 continue; 865 866 memset(&alu, 0, sizeof(struct r600_bc_alu)); 867 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 868 869 alu.src[0].sel = ctx->temp_reg; 870 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 871 if (r) 872 return r; 873 if (i == lasti) 874 alu.last = 1; 875 r = r600_bc_add_alu(ctx->bc, &alu); 876 if (r) 877 return r; 878 } 879 return 0; 880} 881 882static int tgsi_scs(struct r600_shader_ctx *ctx) 883{ 884 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 885 struct r600_bc_alu_src r600_src[3]; 886 struct r600_bc_alu alu; 887 int r; 888 889 r = tgsi_setup_trig(ctx, r600_src); 890 if (r) 891 return r; 892 893 894 /* dst.x = COS */ 895 memset(&alu, 0, sizeof(struct r600_bc_alu)); 896 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 897 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 898 if (r) 899 return r; 900 901 alu.src[0].sel = ctx->temp_reg; 902 alu.src[0].chan = 0; 903 alu.last = 1; 904 r = r600_bc_add_alu(ctx->bc, &alu); 905 if (r) 906 return r; 907 908 /* dst.y = SIN */ 909 memset(&alu, 0, sizeof(struct r600_bc_alu)); 910 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 911 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 912 if (r) 913 return r; 914 915 alu.src[0].sel = ctx->temp_reg; 916 alu.src[0].chan = 0; 917 alu.last = 1; 918 r = r600_bc_add_alu(ctx->bc, &alu); 919 if (r) 920 return r; 921 return 0; 922} 923 924static int tgsi_kill(struct r600_shader_ctx *ctx) 925{ 926 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 927 struct r600_bc_alu alu; 928 int i, r; 929 930 for (i = 0; i < 4; i++) { 931 memset(&alu, 0, sizeof(struct r600_bc_alu)); 932 alu.inst = ctx->inst_info->r600_opcode; 933 934 alu.dst.chan = i; 935 936 alu.src[0].sel = V_SQ_ALU_SRC_0; 937 938 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 939 alu.src[1].sel = V_SQ_ALU_SRC_1; 940 alu.src[1].neg = 1; 941 } else { 942 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 943 if (r) 944 return r; 945 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 946 } 947 if (i == 3) { 948 alu.last = 1; 949 } 950 r = r600_bc_add_alu(ctx->bc, &alu); 951 if (r) 952 return r; 953 } 954 r = r600_bc_add_literal(ctx->bc, ctx->value); 955 if (r) 956 return r; 957 958 /* kill must be last in ALU */ 959 ctx->bc->force_add_cf = 1; 960 ctx->shader->uses_kill = TRUE; 961 return 0; 962} 963 964static int tgsi_lit(struct r600_shader_ctx *ctx) 965{ 966 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 967 struct r600_bc_alu alu; 968 struct r600_bc_alu_src r600_src[3]; 969 int r; 970 971 r = tgsi_split_constant(ctx, r600_src); 972 if (r) 973 return r; 974 r = tgsi_split_literal_constant(ctx, r600_src); 975 if (r) 976 return r; 977 978 /* dst.x, <- 1.0 */ 979 memset(&alu, 0, sizeof(struct r600_bc_alu)); 980 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 981 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 982 alu.src[0].chan = 0; 983 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 984 if (r) 985 return r; 986 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 987 r = r600_bc_add_alu(ctx->bc, &alu); 988 if (r) 989 return r; 990 991 /* dst.y = max(src.x, 0.0) */ 992 memset(&alu, 0, sizeof(struct r600_bc_alu)); 993 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 994 alu.src[0] = r600_src[0]; 995 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 996 alu.src[1].chan = 0; 997 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 998 if (r) 999 return r; 1000 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1001 r = r600_bc_add_alu(ctx->bc, &alu); 1002 if (r) 1003 return r; 1004 1005 /* dst.w, <- 1.0 */ 1006 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1007 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1008 alu.src[0].sel = V_SQ_ALU_SRC_1; 1009 alu.src[0].chan = 0; 1010 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1011 if (r) 1012 return r; 1013 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1014 alu.last = 1; 1015 r = r600_bc_add_alu(ctx->bc, &alu); 1016 if (r) 1017 return r; 1018 1019 r = r600_bc_add_literal(ctx->bc, ctx->value); 1020 if (r) 1021 return r; 1022 1023 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1024 { 1025 int chan; 1026 int sel; 1027 1028 /* dst.z = log(src.y) */ 1029 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1030 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1031 alu.src[0] = r600_src[0]; 1032 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1033 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1034 if (r) 1035 return r; 1036 alu.last = 1; 1037 r = r600_bc_add_alu(ctx->bc, &alu); 1038 if (r) 1039 return r; 1040 1041 r = r600_bc_add_literal(ctx->bc, ctx->value); 1042 if (r) 1043 return r; 1044 1045 chan = alu.dst.chan; 1046 sel = alu.dst.sel; 1047 1048 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1049 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1050 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1051 alu.src[0] = r600_src[0]; 1052 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1053 alu.src[1].sel = sel; 1054 alu.src[1].chan = chan; 1055 1056 alu.src[2] = r600_src[0]; 1057 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); 1058 alu.dst.sel = ctx->temp_reg; 1059 alu.dst.chan = 0; 1060 alu.dst.write = 1; 1061 alu.is_op3 = 1; 1062 alu.last = 1; 1063 r = r600_bc_add_alu(ctx->bc, &alu); 1064 if (r) 1065 return r; 1066 1067 r = r600_bc_add_literal(ctx->bc, ctx->value); 1068 if (r) 1069 return r; 1070 /* dst.z = exp(tmp.x) */ 1071 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1072 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1073 alu.src[0].sel = ctx->temp_reg; 1074 alu.src[0].chan = 0; 1075 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1076 if (r) 1077 return r; 1078 alu.last = 1; 1079 r = r600_bc_add_alu(ctx->bc, &alu); 1080 if (r) 1081 return r; 1082 } 1083 return 0; 1084} 1085 1086static int tgsi_rsq(struct r600_shader_ctx *ctx) 1087{ 1088 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1089 struct r600_bc_alu alu; 1090 int i, r; 1091 1092 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1093 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE); 1094 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1095 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1096 if (r) 1097 return r; 1098 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1099 alu.src[i].abs = 1; 1100 } 1101 alu.dst.sel = ctx->temp_reg; 1102 alu.dst.write = 1; 1103 alu.last = 1; 1104 r = r600_bc_add_alu(ctx->bc, &alu); 1105 if (r) 1106 return r; 1107 r = r600_bc_add_literal(ctx->bc, ctx->value); 1108 if (r) 1109 return r; 1110 /* replicate result */ 1111 return tgsi_helper_tempx_replicate(ctx); 1112} 1113 1114static int tgsi_trans(struct r600_shader_ctx *ctx) 1115{ 1116 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1117 struct r600_bc_alu alu; 1118 int i, j, r; 1119 1120 for (i = 0; i < 4; i++) { 1121 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1122 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 1123 alu.inst = ctx->inst_info->r600_opcode; 1124 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1125 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]); 1126 if (r) 1127 return r; 1128 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1129 } 1130 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1131 if (r) 1132 return r; 1133 alu.last = 1; 1134 r = r600_bc_add_alu(ctx->bc, &alu); 1135 if (r) 1136 return r; 1137 } 1138 } 1139 return 0; 1140} 1141 1142static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1143{ 1144 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1145 struct r600_bc_alu alu; 1146 int i, r; 1147 1148 for (i = 0; i < 4; i++) { 1149 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1150 alu.src[0].sel = ctx->temp_reg; 1151 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1152 alu.dst.chan = i; 1153 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1154 if (r) 1155 return r; 1156 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1157 if (i == 3) 1158 alu.last = 1; 1159 r = r600_bc_add_alu(ctx->bc, &alu); 1160 if (r) 1161 return r; 1162 } 1163 return 0; 1164} 1165 1166static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1167{ 1168 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1169 struct r600_bc_alu alu; 1170 int i, r; 1171 1172 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1173 alu.inst = ctx->inst_info->r600_opcode; 1174 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1175 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1176 if (r) 1177 return r; 1178 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1179 } 1180 alu.dst.sel = ctx->temp_reg; 1181 alu.dst.write = 1; 1182 alu.last = 1; 1183 r = r600_bc_add_alu(ctx->bc, &alu); 1184 if (r) 1185 return r; 1186 r = r600_bc_add_literal(ctx->bc, ctx->value); 1187 if (r) 1188 return r; 1189 /* replicate result */ 1190 return tgsi_helper_tempx_replicate(ctx); 1191} 1192 1193static int tgsi_pow(struct r600_shader_ctx *ctx) 1194{ 1195 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1196 struct r600_bc_alu alu; 1197 int r; 1198 1199 /* LOG2(a) */ 1200 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1201 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1202 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1203 if (r) 1204 return r; 1205 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1206 alu.dst.sel = ctx->temp_reg; 1207 alu.dst.write = 1; 1208 alu.last = 1; 1209 r = r600_bc_add_alu(ctx->bc, &alu); 1210 if (r) 1211 return r; 1212 r = r600_bc_add_literal(ctx->bc,ctx->value); 1213 if (r) 1214 return r; 1215 /* b * LOG2(a) */ 1216 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1217 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE); 1218 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); 1219 if (r) 1220 return r; 1221 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); 1222 alu.src[1].sel = ctx->temp_reg; 1223 alu.dst.sel = ctx->temp_reg; 1224 alu.dst.write = 1; 1225 alu.last = 1; 1226 r = r600_bc_add_alu(ctx->bc, &alu); 1227 if (r) 1228 return r; 1229 r = r600_bc_add_literal(ctx->bc,ctx->value); 1230 if (r) 1231 return r; 1232 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1233 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1234 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1235 alu.src[0].sel = ctx->temp_reg; 1236 alu.dst.sel = ctx->temp_reg; 1237 alu.dst.write = 1; 1238 alu.last = 1; 1239 r = r600_bc_add_alu(ctx->bc, &alu); 1240 if (r) 1241 return r; 1242 r = r600_bc_add_literal(ctx->bc,ctx->value); 1243 if (r) 1244 return r; 1245 return tgsi_helper_tempx_replicate(ctx); 1246} 1247 1248static int tgsi_ssg(struct r600_shader_ctx *ctx) 1249{ 1250 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1251 struct r600_bc_alu alu; 1252 struct r600_bc_alu_src r600_src[3]; 1253 int i, r; 1254 1255 r = tgsi_split_constant(ctx, r600_src); 1256 if (r) 1257 return r; 1258 1259 /* tmp = (src > 0 ? 1 : src) */ 1260 for (i = 0; i < 4; i++) { 1261 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1262 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1263 alu.is_op3 = 1; 1264 1265 alu.dst.sel = ctx->temp_reg; 1266 alu.dst.chan = i; 1267 1268 alu.src[0] = r600_src[0]; 1269 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1270 1271 alu.src[1].sel = V_SQ_ALU_SRC_1; 1272 1273 alu.src[2] = r600_src[0]; 1274 alu.src[2].chan = tgsi_chan(&inst->Src[0], i); 1275 if (i == 3) 1276 alu.last = 1; 1277 r = r600_bc_add_alu(ctx->bc, &alu); 1278 if (r) 1279 return r; 1280 } 1281 r = r600_bc_add_literal(ctx->bc, ctx->value); 1282 if (r) 1283 return r; 1284 1285 /* dst = (-tmp > 0 ? -1 : tmp) */ 1286 for (i = 0; i < 4; i++) { 1287 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1288 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1289 alu.is_op3 = 1; 1290 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1291 if (r) 1292 return r; 1293 1294 alu.src[0].sel = ctx->temp_reg; 1295 alu.src[0].chan = i; 1296 alu.src[0].neg = 1; 1297 1298 alu.src[1].sel = V_SQ_ALU_SRC_1; 1299 alu.src[1].neg = 1; 1300 1301 alu.src[2].sel = ctx->temp_reg; 1302 alu.src[2].chan = i; 1303 1304 if (i == 3) 1305 alu.last = 1; 1306 r = r600_bc_add_alu(ctx->bc, &alu); 1307 if (r) 1308 return r; 1309 } 1310 return 0; 1311} 1312 1313static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1314{ 1315 struct r600_bc_alu alu; 1316 int i, r; 1317 1318 r = r600_bc_add_literal(ctx->bc, ctx->value); 1319 if (r) 1320 return r; 1321 for (i = 0; i < 4; i++) { 1322 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1323 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1324 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1325 alu.dst.chan = i; 1326 } else { 1327 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1328 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1329 if (r) 1330 return r; 1331 alu.src[0].sel = ctx->temp_reg; 1332 alu.src[0].chan = i; 1333 } 1334 if (i == 3) { 1335 alu.last = 1; 1336 } 1337 r = r600_bc_add_alu(ctx->bc, &alu); 1338 if (r) 1339 return r; 1340 } 1341 return 0; 1342} 1343 1344static int tgsi_op3(struct r600_shader_ctx *ctx) 1345{ 1346 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1347 struct r600_bc_alu_src r600_src[3]; 1348 struct r600_bc_alu alu; 1349 int i, j, r; 1350 1351 r = tgsi_split_constant(ctx, r600_src); 1352 if (r) 1353 return r; 1354 /* do it in 2 step as op3 doesn't support writemask */ 1355 for (i = 0; i < 4; i++) { 1356 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1357 alu.inst = ctx->inst_info->r600_opcode; 1358 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1359 alu.src[j] = r600_src[j]; 1360 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1361 } 1362 alu.dst.sel = ctx->temp_reg; 1363 alu.dst.chan = i; 1364 alu.dst.write = 1; 1365 alu.is_op3 = 1; 1366 if (i == 3) { 1367 alu.last = 1; 1368 } 1369 r = r600_bc_add_alu(ctx->bc, &alu); 1370 if (r) 1371 return r; 1372 } 1373 return tgsi_helper_copy(ctx, inst); 1374} 1375 1376static int tgsi_dp(struct r600_shader_ctx *ctx) 1377{ 1378 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1379 struct r600_bc_alu_src r600_src[3]; 1380 struct r600_bc_alu alu; 1381 int i, j, r; 1382 1383 r = tgsi_split_constant(ctx, r600_src); 1384 if (r) 1385 return r; 1386 for (i = 0; i < 4; i++) { 1387 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1388 alu.inst = ctx->inst_info->r600_opcode; 1389 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1390 alu.src[j] = r600_src[j]; 1391 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1392 } 1393 alu.dst.sel = ctx->temp_reg; 1394 alu.dst.chan = i; 1395 alu.dst.write = 1; 1396 /* handle some special cases */ 1397 switch (ctx->inst_info->tgsi_opcode) { 1398 case TGSI_OPCODE_DP2: 1399 if (i > 1) { 1400 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1401 alu.src[0].chan = alu.src[1].chan = 0; 1402 } 1403 break; 1404 case TGSI_OPCODE_DP3: 1405 if (i > 2) { 1406 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1407 alu.src[0].chan = alu.src[1].chan = 0; 1408 } 1409 break; 1410 case TGSI_OPCODE_DPH: 1411 if (i == 3) { 1412 alu.src[0].sel = V_SQ_ALU_SRC_1; 1413 alu.src[0].chan = 0; 1414 alu.src[0].neg = 0; 1415 } 1416 break; 1417 default: 1418 break; 1419 } 1420 if (i == 3) { 1421 alu.last = 1; 1422 } 1423 r = r600_bc_add_alu(ctx->bc, &alu); 1424 if (r) 1425 return r; 1426 } 1427 return tgsi_helper_copy(ctx, inst); 1428} 1429 1430static int tgsi_tex(struct r600_shader_ctx *ctx) 1431{ 1432 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1433 struct r600_bc_tex tex; 1434 struct r600_bc_alu alu; 1435 unsigned src_gpr; 1436 int r, i; 1437 int opcode; 1438 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; 1439 uint32_t lit_vals[4]; 1440 1441 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1442 1443 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1444 /* Add perspective divide */ 1445 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1446 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1447 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1448 if (r) 1449 return r; 1450 1451 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1452 alu.dst.sel = ctx->temp_reg; 1453 alu.dst.chan = 3; 1454 alu.last = 1; 1455 alu.dst.write = 1; 1456 r = r600_bc_add_alu(ctx->bc, &alu); 1457 if (r) 1458 return r; 1459 1460 for (i = 0; i < 3; i++) { 1461 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1462 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1463 alu.src[0].sel = ctx->temp_reg; 1464 alu.src[0].chan = 3; 1465 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1466 if (r) 1467 return r; 1468 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1469 alu.dst.sel = ctx->temp_reg; 1470 alu.dst.chan = i; 1471 alu.dst.write = 1; 1472 r = r600_bc_add_alu(ctx->bc, &alu); 1473 if (r) 1474 return r; 1475 } 1476 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1477 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1478 alu.src[0].sel = V_SQ_ALU_SRC_1; 1479 alu.src[0].chan = 0; 1480 alu.dst.sel = ctx->temp_reg; 1481 alu.dst.chan = 3; 1482 alu.last = 1; 1483 alu.dst.write = 1; 1484 r = r600_bc_add_alu(ctx->bc, &alu); 1485 if (r) 1486 return r; 1487 src_not_temp = false; 1488 src_gpr = ctx->temp_reg; 1489 } 1490 1491 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1492 int src_chan, src2_chan; 1493 1494 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1495 for (i = 0; i < 4; i++) { 1496 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1497 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1498 switch (i) { 1499 case 0: 1500 src_chan = 2; 1501 src2_chan = 1; 1502 break; 1503 case 1: 1504 src_chan = 2; 1505 src2_chan = 0; 1506 break; 1507 case 2: 1508 src_chan = 0; 1509 src2_chan = 2; 1510 break; 1511 case 3: 1512 src_chan = 1; 1513 src2_chan = 2; 1514 break; 1515 } 1516 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1517 if (r) 1518 return r; 1519 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); 1520 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1521 if (r) 1522 return r; 1523 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); 1524 alu.dst.sel = ctx->temp_reg; 1525 alu.dst.chan = i; 1526 if (i == 3) 1527 alu.last = 1; 1528 alu.dst.write = 1; 1529 r = r600_bc_add_alu(ctx->bc, &alu); 1530 if (r) 1531 return r; 1532 } 1533 1534 /* tmp1.z = RCP_e(|tmp1.z|) */ 1535 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1536 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1537 alu.src[0].sel = ctx->temp_reg; 1538 alu.src[0].chan = 2; 1539 alu.src[0].abs = 1; 1540 alu.dst.sel = ctx->temp_reg; 1541 alu.dst.chan = 2; 1542 alu.dst.write = 1; 1543 alu.last = 1; 1544 r = r600_bc_add_alu(ctx->bc, &alu); 1545 if (r) 1546 return r; 1547 1548 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1549 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1550 * muladd has no writemask, have to use another temp 1551 */ 1552 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1553 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1554 alu.is_op3 = 1; 1555 1556 alu.src[0].sel = ctx->temp_reg; 1557 alu.src[0].chan = 0; 1558 alu.src[1].sel = ctx->temp_reg; 1559 alu.src[1].chan = 2; 1560 1561 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1562 alu.src[2].chan = 0; 1563 1564 alu.dst.sel = ctx->temp_reg; 1565 alu.dst.chan = 0; 1566 alu.dst.write = 1; 1567 1568 r = r600_bc_add_alu(ctx->bc, &alu); 1569 if (r) 1570 return r; 1571 1572 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1573 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1574 alu.is_op3 = 1; 1575 1576 alu.src[0].sel = ctx->temp_reg; 1577 alu.src[0].chan = 1; 1578 alu.src[1].sel = ctx->temp_reg; 1579 alu.src[1].chan = 2; 1580 1581 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1582 alu.src[2].chan = 0; 1583 1584 alu.dst.sel = ctx->temp_reg; 1585 alu.dst.chan = 1; 1586 alu.dst.write = 1; 1587 1588 alu.last = 1; 1589 r = r600_bc_add_alu(ctx->bc, &alu); 1590 if (r) 1591 return r; 1592 1593 lit_vals[0] = fui(1.5f); 1594 1595 r = r600_bc_add_literal(ctx->bc, lit_vals); 1596 if (r) 1597 return r; 1598 src_not_temp = false; 1599 src_gpr = ctx->temp_reg; 1600 } 1601 1602 if (src_not_temp) { 1603 for (i = 0; i < 4; i++) { 1604 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1605 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1606 alu.src[0].sel = src_gpr; 1607 alu.src[0].chan = i; 1608 alu.dst.sel = ctx->temp_reg; 1609 alu.dst.chan = i; 1610 if (i == 3) 1611 alu.last = 1; 1612 alu.dst.write = 1; 1613 r = r600_bc_add_alu(ctx->bc, &alu); 1614 if (r) 1615 return r; 1616 } 1617 src_gpr = ctx->temp_reg; 1618 } 1619 1620 opcode = ctx->inst_info->r600_opcode; 1621 if (opcode == SQ_TEX_INST_SAMPLE && 1622 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 1623 opcode = SQ_TEX_INST_SAMPLE_C; 1624 1625 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1626 tex.inst = opcode; 1627 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1628 tex.sampler_id = tex.resource_id; 1629 tex.src_gpr = src_gpr; 1630 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1631 tex.dst_sel_x = 0; 1632 tex.dst_sel_y = 1; 1633 tex.dst_sel_z = 2; 1634 tex.dst_sel_w = 3; 1635 tex.src_sel_x = 0; 1636 tex.src_sel_y = 1; 1637 tex.src_sel_z = 2; 1638 tex.src_sel_w = 3; 1639 1640 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1641 tex.src_sel_x = 1; 1642 tex.src_sel_y = 0; 1643 tex.src_sel_z = 3; 1644 tex.src_sel_w = 1; 1645 } 1646 1647 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1648 tex.coord_type_x = 1; 1649 tex.coord_type_y = 1; 1650 tex.coord_type_z = 1; 1651 tex.coord_type_w = 1; 1652 } 1653 1654 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 1655 tex.src_sel_w = 2; 1656 1657 r = r600_bc_add_tex(ctx->bc, &tex); 1658 if (r) 1659 return r; 1660 1661 /* add shadow ambient support - gallium doesn't do it yet */ 1662 return 0; 1663 1664} 1665 1666static int tgsi_lrp(struct r600_shader_ctx *ctx) 1667{ 1668 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1669 struct r600_bc_alu_src r600_src[3]; 1670 struct r600_bc_alu alu; 1671 unsigned i; 1672 int r; 1673 1674 r = tgsi_split_constant(ctx, r600_src); 1675 if (r) 1676 return r; 1677 /* 1 - src0 */ 1678 for (i = 0; i < 4; i++) { 1679 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1680 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1681 alu.src[0].sel = V_SQ_ALU_SRC_1; 1682 alu.src[0].chan = 0; 1683 alu.src[1] = r600_src[0]; 1684 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1685 alu.src[1].neg = 1; 1686 alu.dst.sel = ctx->temp_reg; 1687 alu.dst.chan = i; 1688 if (i == 3) { 1689 alu.last = 1; 1690 } 1691 alu.dst.write = 1; 1692 r = r600_bc_add_alu(ctx->bc, &alu); 1693 if (r) 1694 return r; 1695 } 1696 r = r600_bc_add_literal(ctx->bc, ctx->value); 1697 if (r) 1698 return r; 1699 1700 /* (1 - src0) * src2 */ 1701 for (i = 0; i < 4; i++) { 1702 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1703 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1704 alu.src[0].sel = ctx->temp_reg; 1705 alu.src[0].chan = i; 1706 alu.src[1] = r600_src[2]; 1707 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 1708 alu.dst.sel = ctx->temp_reg; 1709 alu.dst.chan = i; 1710 if (i == 3) { 1711 alu.last = 1; 1712 } 1713 alu.dst.write = 1; 1714 r = r600_bc_add_alu(ctx->bc, &alu); 1715 if (r) 1716 return r; 1717 } 1718 r = r600_bc_add_literal(ctx->bc, ctx->value); 1719 if (r) 1720 return r; 1721 1722 /* src0 * src1 + (1 - src0) * src2 */ 1723 for (i = 0; i < 4; i++) { 1724 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1725 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1726 alu.is_op3 = 1; 1727 alu.src[0] = r600_src[0]; 1728 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1729 alu.src[1] = r600_src[1]; 1730 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 1731 alu.src[2].sel = ctx->temp_reg; 1732 alu.src[2].chan = i; 1733 alu.dst.sel = ctx->temp_reg; 1734 alu.dst.chan = i; 1735 if (i == 3) { 1736 alu.last = 1; 1737 } 1738 r = r600_bc_add_alu(ctx->bc, &alu); 1739 if (r) 1740 return r; 1741 } 1742 return tgsi_helper_copy(ctx, inst); 1743} 1744 1745static int tgsi_cmp(struct r600_shader_ctx *ctx) 1746{ 1747 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1748 struct r600_bc_alu_src r600_src[3]; 1749 struct r600_bc_alu alu; 1750 int use_temp = 0; 1751 int i, r; 1752 1753 r = tgsi_split_constant(ctx, r600_src); 1754 if (r) 1755 return r; 1756 1757 if (inst->Dst[0].Register.WriteMask != 0xf) 1758 use_temp = 1; 1759 1760 for (i = 0; i < 4; i++) { 1761 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1762 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 1763 alu.src[0] = r600_src[0]; 1764 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1765 1766 alu.src[1] = r600_src[2]; 1767 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 1768 1769 alu.src[2] = r600_src[1]; 1770 alu.src[2].chan = tgsi_chan(&inst->Src[1], i); 1771 1772 if (use_temp) 1773 alu.dst.sel = ctx->temp_reg; 1774 else { 1775 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1776 if (r) 1777 return r; 1778 } 1779 alu.dst.chan = i; 1780 alu.dst.write = 1; 1781 alu.is_op3 = 1; 1782 if (i == 3) 1783 alu.last = 1; 1784 r = r600_bc_add_alu(ctx->bc, &alu); 1785 if (r) 1786 return r; 1787 } 1788 if (use_temp) 1789 return tgsi_helper_copy(ctx, inst); 1790 return 0; 1791} 1792 1793static int tgsi_xpd(struct r600_shader_ctx *ctx) 1794{ 1795 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1796 struct r600_bc_alu_src r600_src[3]; 1797 struct r600_bc_alu alu; 1798 uint32_t use_temp = 0; 1799 int i, r; 1800 1801 if (inst->Dst[0].Register.WriteMask != 0xf) 1802 use_temp = 1; 1803 1804 r = tgsi_split_constant(ctx, r600_src); 1805 if (r) 1806 return r; 1807 1808 for (i = 0; i < 4; i++) { 1809 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1810 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1811 1812 alu.src[0] = r600_src[0]; 1813 switch (i) { 1814 case 0: 1815 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 1816 break; 1817 case 1: 1818 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1819 break; 1820 case 2: 1821 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1822 break; 1823 case 3: 1824 alu.src[0].sel = V_SQ_ALU_SRC_0; 1825 alu.src[0].chan = i; 1826 } 1827 1828 alu.src[1] = r600_src[1]; 1829 switch (i) { 1830 case 0: 1831 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 1832 break; 1833 case 1: 1834 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 1835 break; 1836 case 2: 1837 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 1838 break; 1839 case 3: 1840 alu.src[1].sel = V_SQ_ALU_SRC_0; 1841 alu.src[1].chan = i; 1842 } 1843 1844 alu.dst.sel = ctx->temp_reg; 1845 alu.dst.chan = i; 1846 alu.dst.write = 1; 1847 1848 if (i == 3) 1849 alu.last = 1; 1850 r = r600_bc_add_alu(ctx->bc, &alu); 1851 if (r) 1852 return r; 1853 } 1854 1855 for (i = 0; i < 4; i++) { 1856 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1857 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1858 1859 alu.src[0] = r600_src[0]; 1860 switch (i) { 1861 case 0: 1862 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1863 break; 1864 case 1: 1865 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 1866 break; 1867 case 2: 1868 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1869 break; 1870 case 3: 1871 alu.src[0].sel = V_SQ_ALU_SRC_0; 1872 alu.src[0].chan = i; 1873 } 1874 1875 alu.src[1] = r600_src[1]; 1876 switch (i) { 1877 case 0: 1878 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 1879 break; 1880 case 1: 1881 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 1882 break; 1883 case 2: 1884 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 1885 break; 1886 case 3: 1887 alu.src[1].sel = V_SQ_ALU_SRC_0; 1888 alu.src[1].chan = i; 1889 } 1890 1891 alu.src[2].sel = ctx->temp_reg; 1892 alu.src[2].neg = 1; 1893 alu.src[2].chan = i; 1894 1895 if (use_temp) 1896 alu.dst.sel = ctx->temp_reg; 1897 else { 1898 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1899 if (r) 1900 return r; 1901 } 1902 alu.dst.chan = i; 1903 alu.dst.write = 1; 1904 alu.is_op3 = 1; 1905 if (i == 3) 1906 alu.last = 1; 1907 r = r600_bc_add_alu(ctx->bc, &alu); 1908 if (r) 1909 return r; 1910 } 1911 if (use_temp) 1912 return tgsi_helper_copy(ctx, inst); 1913 return 0; 1914} 1915 1916static int tgsi_exp(struct r600_shader_ctx *ctx) 1917{ 1918 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1919 struct r600_bc_alu_src r600_src[3]; 1920 struct r600_bc_alu alu; 1921 int r; 1922 1923 /* result.x = 2^floor(src); */ 1924 if (inst->Dst[0].Register.WriteMask & 1) { 1925 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1926 1927 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 1928 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1929 if (r) 1930 return r; 1931 1932 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1933 1934 alu.dst.sel = ctx->temp_reg; 1935 alu.dst.chan = 0; 1936 alu.dst.write = 1; 1937 alu.last = 1; 1938 r = r600_bc_add_alu(ctx->bc, &alu); 1939 if (r) 1940 return r; 1941 1942 r = r600_bc_add_literal(ctx->bc, ctx->value); 1943 if (r) 1944 return r; 1945 1946 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1947 alu.src[0].sel = ctx->temp_reg; 1948 alu.src[0].chan = 0; 1949 1950 alu.dst.sel = ctx->temp_reg; 1951 alu.dst.chan = 0; 1952 alu.dst.write = 1; 1953 alu.last = 1; 1954 r = r600_bc_add_alu(ctx->bc, &alu); 1955 if (r) 1956 return r; 1957 1958 r = r600_bc_add_literal(ctx->bc, ctx->value); 1959 if (r) 1960 return r; 1961 } 1962 1963 /* result.y = tmp - floor(tmp); */ 1964 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 1965 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1966 1967 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1968 alu.src[0] = r600_src[0]; 1969 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1970 if (r) 1971 return r; 1972 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1973 1974 alu.dst.sel = ctx->temp_reg; 1975// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1976// if (r) 1977// return r; 1978 alu.dst.write = 1; 1979 alu.dst.chan = 1; 1980 1981 alu.last = 1; 1982 1983 r = r600_bc_add_alu(ctx->bc, &alu); 1984 if (r) 1985 return r; 1986 r = r600_bc_add_literal(ctx->bc, ctx->value); 1987 if (r) 1988 return r; 1989 } 1990 1991 /* result.z = RoughApprox2ToX(tmp);*/ 1992 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 1993 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1994 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1995 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1996 if (r) 1997 return r; 1998 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1999 2000 alu.dst.sel = ctx->temp_reg; 2001 alu.dst.write = 1; 2002 alu.dst.chan = 2; 2003 2004 alu.last = 1; 2005 2006 r = r600_bc_add_alu(ctx->bc, &alu); 2007 if (r) 2008 return r; 2009 r = r600_bc_add_literal(ctx->bc, ctx->value); 2010 if (r) 2011 return r; 2012 } 2013 2014 /* result.w = 1.0;*/ 2015 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2016 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2017 2018 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2019 alu.src[0].sel = V_SQ_ALU_SRC_1; 2020 alu.src[0].chan = 0; 2021 2022 alu.dst.sel = ctx->temp_reg; 2023 alu.dst.chan = 3; 2024 alu.dst.write = 1; 2025 alu.last = 1; 2026 r = r600_bc_add_alu(ctx->bc, &alu); 2027 if (r) 2028 return r; 2029 r = r600_bc_add_literal(ctx->bc, ctx->value); 2030 if (r) 2031 return r; 2032 } 2033 return tgsi_helper_copy(ctx, inst); 2034} 2035 2036static int tgsi_log(struct r600_shader_ctx *ctx) 2037{ 2038 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2039 struct r600_bc_alu_src r600_src[3]; 2040 struct r600_bc_alu alu; 2041 int r; 2042 2043 /* result.x = floor(log2(src)); */ 2044 if (inst->Dst[0].Register.WriteMask & 1) { 2045 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2046 2047 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2048 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2049 if (r) 2050 return r; 2051 2052 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2053 2054 alu.dst.sel = ctx->temp_reg; 2055 alu.dst.chan = 0; 2056 alu.dst.write = 1; 2057 alu.last = 1; 2058 r = r600_bc_add_alu(ctx->bc, &alu); 2059 if (r) 2060 return r; 2061 2062 r = r600_bc_add_literal(ctx->bc, ctx->value); 2063 if (r) 2064 return r; 2065 2066 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2067 alu.src[0].sel = ctx->temp_reg; 2068 alu.src[0].chan = 0; 2069 2070 alu.dst.sel = ctx->temp_reg; 2071 alu.dst.chan = 0; 2072 alu.dst.write = 1; 2073 alu.last = 1; 2074 2075 r = r600_bc_add_alu(ctx->bc, &alu); 2076 if (r) 2077 return r; 2078 2079 r = r600_bc_add_literal(ctx->bc, ctx->value); 2080 if (r) 2081 return r; 2082 } 2083 2084 /* result.y = FIXME; */ 2085 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2086 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2087 2088 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2089 alu.src[0].sel = V_SQ_ALU_SRC_1; 2090 alu.src[0].chan = 0; 2091 2092 alu.dst.sel = ctx->temp_reg; 2093 alu.dst.chan = 1; 2094 alu.dst.write = 1; 2095 alu.last = 1; 2096 2097 r = r600_bc_add_alu(ctx->bc, &alu); 2098 if (r) 2099 return r; 2100 2101 r = r600_bc_add_literal(ctx->bc, ctx->value); 2102 if (r) 2103 return r; 2104 } 2105 2106 /* result.z = log2(src);*/ 2107 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2108 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2109 2110 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2111 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2112 if (r) 2113 return r; 2114 2115 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2116 2117 alu.dst.sel = ctx->temp_reg; 2118 alu.dst.write = 1; 2119 alu.dst.chan = 2; 2120 alu.last = 1; 2121 2122 r = r600_bc_add_alu(ctx->bc, &alu); 2123 if (r) 2124 return r; 2125 2126 r = r600_bc_add_literal(ctx->bc, ctx->value); 2127 if (r) 2128 return r; 2129 } 2130 2131 /* result.w = 1.0; */ 2132 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2133 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2134 2135 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2136 alu.src[0].sel = V_SQ_ALU_SRC_1; 2137 alu.src[0].chan = 0; 2138 2139 alu.dst.sel = ctx->temp_reg; 2140 alu.dst.chan = 3; 2141 alu.dst.write = 1; 2142 alu.last = 1; 2143 2144 r = r600_bc_add_alu(ctx->bc, &alu); 2145 if (r) 2146 return r; 2147 2148 r = r600_bc_add_literal(ctx->bc, ctx->value); 2149 if (r) 2150 return r; 2151 } 2152 2153 return tgsi_helper_copy(ctx, inst); 2154} 2155 2156static int tgsi_arl(struct r600_shader_ctx *ctx) 2157{ 2158 /* TODO from r600c, ar values don't persist between clauses */ 2159 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2160 struct r600_bc_alu alu; 2161 int r; 2162 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2163 2164 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; 2165 2166 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2167 if (r) 2168 return r; 2169 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2170 2171 alu.last = 1; 2172 2173 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2174 if (r) 2175 return r; 2176 return 0; 2177} 2178 2179static int tgsi_opdst(struct r600_shader_ctx *ctx) 2180{ 2181 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2182 struct r600_bc_alu alu; 2183 int i, r = 0; 2184 2185 for (i = 0; i < 4; i++) { 2186 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2187 2188 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2189 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2190 if (r) 2191 return r; 2192 2193 if (i == 0 || i == 3) { 2194 alu.src[0].sel = V_SQ_ALU_SRC_1; 2195 } else { 2196 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2197 if (r) 2198 return r; 2199 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2200 } 2201 2202 if (i == 0 || i == 2) { 2203 alu.src[1].sel = V_SQ_ALU_SRC_1; 2204 } else { 2205 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); 2206 if (r) 2207 return r; 2208 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2209 } 2210 if (i == 3) 2211 alu.last = 1; 2212 r = r600_bc_add_alu(ctx->bc, &alu); 2213 if (r) 2214 return r; 2215 } 2216 return 0; 2217} 2218 2219static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2220{ 2221 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2222 struct r600_bc_alu alu; 2223 int r; 2224 2225 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2226 alu.inst = opcode; 2227 alu.predicate = 1; 2228 2229 alu.dst.sel = ctx->temp_reg; 2230 alu.dst.write = 1; 2231 alu.dst.chan = 0; 2232 2233 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2234 if (r) 2235 return r; 2236 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2237 alu.src[1].sel = V_SQ_ALU_SRC_0; 2238 alu.src[1].chan = 0; 2239 2240 alu.last = 1; 2241 2242 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2243 if (r) 2244 return r; 2245 return 0; 2246} 2247 2248static int pops(struct r600_shader_ctx *ctx, int pops) 2249{ 2250 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2251 ctx->bc->cf_last->pop_count = pops; 2252 return 0; 2253} 2254 2255static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2256{ 2257 switch(reason) { 2258 case FC_PUSH_VPM: 2259 ctx->bc->callstack[ctx->bc->call_sp].current--; 2260 break; 2261 case FC_PUSH_WQM: 2262 case FC_LOOP: 2263 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2264 break; 2265 case FC_REP: 2266 /* TOODO : for 16 vp asic should -= 2; */ 2267 ctx->bc->callstack[ctx->bc->call_sp].current --; 2268 break; 2269 } 2270} 2271 2272static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2273{ 2274 if (check_max_only) { 2275 int diff; 2276 switch (reason) { 2277 case FC_PUSH_VPM: 2278 diff = 1; 2279 break; 2280 case FC_PUSH_WQM: 2281 diff = 4; 2282 break; 2283 } 2284 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2285 ctx->bc->callstack[ctx->bc->call_sp].max) { 2286 ctx->bc->callstack[ctx->bc->call_sp].max = 2287 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2288 } 2289 return; 2290 } 2291 switch (reason) { 2292 case FC_PUSH_VPM: 2293 ctx->bc->callstack[ctx->bc->call_sp].current++; 2294 break; 2295 case FC_PUSH_WQM: 2296 case FC_LOOP: 2297 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2298 break; 2299 case FC_REP: 2300 ctx->bc->callstack[ctx->bc->call_sp].current++; 2301 break; 2302 } 2303 2304 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2305 ctx->bc->callstack[ctx->bc->call_sp].max) { 2306 ctx->bc->callstack[ctx->bc->call_sp].max = 2307 ctx->bc->callstack[ctx->bc->call_sp].current; 2308 } 2309} 2310 2311static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2312{ 2313 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2314 2315 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2316 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2317 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2318 sp->num_mid++; 2319} 2320 2321static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2322{ 2323 ctx->bc->fc_sp++; 2324 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2325 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2326} 2327 2328static void fc_poplevel(struct r600_shader_ctx *ctx) 2329{ 2330 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2331 if (sp->mid) { 2332 free(sp->mid); 2333 sp->mid = NULL; 2334 } 2335 sp->num_mid = 0; 2336 sp->start = NULL; 2337 sp->type = 0; 2338 ctx->bc->fc_sp--; 2339} 2340 2341#if 0 2342static int emit_return(struct r600_shader_ctx *ctx) 2343{ 2344 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2345 return 0; 2346} 2347 2348static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2349{ 2350 2351 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2352 ctx->bc->cf_last->pop_count = pops; 2353 /* TODO work out offset */ 2354 return 0; 2355} 2356 2357static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2358{ 2359 return 0; 2360} 2361 2362static void emit_testflag(struct r600_shader_ctx *ctx) 2363{ 2364 2365} 2366 2367static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2368{ 2369 emit_testflag(ctx); 2370 emit_jump_to_offset(ctx, 1, 4); 2371 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2372 pops(ctx, ifidx + 1); 2373 emit_return(ctx); 2374} 2375 2376static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2377{ 2378 emit_testflag(ctx); 2379 2380 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2381 ctx->bc->cf_last->pop_count = 1; 2382 2383 fc_set_mid(ctx, fc_sp); 2384 2385 pops(ctx, 1); 2386} 2387#endif 2388 2389static int tgsi_if(struct r600_shader_ctx *ctx) 2390{ 2391 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2392 2393 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2394 2395 fc_pushlevel(ctx, FC_IF); 2396 2397 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2398 return 0; 2399} 2400 2401static int tgsi_else(struct r600_shader_ctx *ctx) 2402{ 2403 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2404 ctx->bc->cf_last->pop_count = 1; 2405 2406 fc_set_mid(ctx, ctx->bc->fc_sp); 2407 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2408 return 0; 2409} 2410 2411static int tgsi_endif(struct r600_shader_ctx *ctx) 2412{ 2413 pops(ctx, 1); 2414 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2415 R600_ERR("if/endif unbalanced in shader\n"); 2416 return -1; 2417 } 2418 2419 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2420 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2421 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2422 } else { 2423 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2424 } 2425 fc_poplevel(ctx); 2426 2427 callstack_decrease_current(ctx, FC_PUSH_VPM); 2428 return 0; 2429} 2430 2431static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2432{ 2433 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2434 2435 fc_pushlevel(ctx, FC_LOOP); 2436 2437 /* check stack depth */ 2438 callstack_check_depth(ctx, FC_LOOP, 0); 2439 return 0; 2440} 2441 2442static int tgsi_endloop(struct r600_shader_ctx *ctx) 2443{ 2444 int i; 2445 2446 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 2447 2448 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2449 R600_ERR("loop/endloop in shader code are not paired.\n"); 2450 return -EINVAL; 2451 } 2452 2453 /* fixup loop pointers - from r600isa 2454 LOOP END points to CF after LOOP START, 2455 LOOP START point to CF after LOOP END 2456 BRK/CONT point to LOOP END CF 2457 */ 2458 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2459 2460 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2461 2462 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2463 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2464 } 2465 /* TODO add LOOPRET support */ 2466 fc_poplevel(ctx); 2467 callstack_decrease_current(ctx, FC_LOOP); 2468 return 0; 2469} 2470 2471static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2472{ 2473 unsigned int fscp; 2474 2475 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2476 { 2477 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2478 break; 2479 } 2480 2481 if (fscp == 0) { 2482 R600_ERR("Break not inside loop/endloop pair\n"); 2483 return -EINVAL; 2484 } 2485 2486 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2487 ctx->bc->cf_last->pop_count = 1; 2488 2489 fc_set_mid(ctx, fscp); 2490 2491 pops(ctx, 1); 2492 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2493 return 0; 2494} 2495 2496static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 2497 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl}, 2498 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2499 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2500 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 2501 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 2502 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2503 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 2504 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2505 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2506 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2507 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2508 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2509 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2510 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2511 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2512 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2513 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2514 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2515 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2516 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2517 /* gap */ 2518 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2519 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2520 /* gap */ 2521 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2522 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2523 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2524 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2525 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2526 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2527 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2528 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2529 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2530 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2531 /* gap */ 2532 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2533 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2534 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2535 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2536 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2537 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2538 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2539 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2540 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2541 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2542 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2543 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2544 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2545 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2546 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2547 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2548 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2549 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2550 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2551 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2552 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2553 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2554 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2555 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2556 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2557 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2558 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2559 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2560 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2561 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2562 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2563 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2564 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2565 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2566 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2567 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2568 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2569 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2570 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2571 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2572 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2573 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2574 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2575 /* gap */ 2576 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2577 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2578 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2579 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2580 /* gap */ 2581 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2582 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2583 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2584 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2585 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2586 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2587 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2588 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 2589 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2590 /* gap */ 2591 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2592 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2593 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2594 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2595 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2596 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2597 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2598 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2599 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2600 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2601 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2602 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2603 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2604 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2605 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2606 /* gap */ 2607 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2608 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2609 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2610 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2611 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2612 /* gap */ 2613 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2614 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2615 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2616 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2617 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2618 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2619 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2620 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2621 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2622 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2623 /* gap */ 2624 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2625 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2626 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2627 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2628 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2629 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2630 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2631 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2632 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2633 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2634 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2635 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2636 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2637 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2638 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2639 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2640 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2641 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2642 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2643 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2644 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2645 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2646 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2647 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2648 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2649 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2650 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2651 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2652}; 2653