r600_shader.c revision 5d66a8606d68caf0fb4754c144c5fb7d87fbf7df
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_screen.h" 29#include "r600_context.h" 30#include "r600_shader.h" 31#include "r600_asm.h" 32#include "r600_sq.h" 33#include "r600d.h" 34#include <stdio.h> 35#include <errno.h> 36 37 38struct r600_shader_tgsi_instruction; 39 40struct r600_shader_ctx { 41 struct tgsi_shader_info info; 42 struct tgsi_parse_context parse; 43 const struct tgsi_token *tokens; 44 unsigned type; 45 unsigned file_offset[TGSI_FILE_COUNT]; 46 unsigned temp_reg; 47 struct r600_shader_tgsi_instruction *inst_info; 48 struct r600_bc *bc; 49 struct r600_shader *shader; 50 u32 value[4]; 51 u32 *literals; 52 u32 nliterals; 53 u32 max_driver_temp_used; 54}; 55 56struct r600_shader_tgsi_instruction { 57 unsigned tgsi_opcode; 58 unsigned is_op3; 59 unsigned r600_opcode; 60 int (*process)(struct r600_shader_ctx *ctx); 61}; 62 63static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]; 64static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 65 66static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) 67{ 68 struct r600_context *rctx = r600_context(ctx); 69 const struct util_format_description *desc; 70 enum pipe_format resource_format[160]; 71 unsigned i, nresources = 0; 72 struct r600_bc *bc = &shader->bc; 73 struct r600_bc_cf *cf; 74 struct r600_bc_vtx *vtx; 75 76 if (shader->processor_type != TGSI_PROCESSOR_VERTEX) 77 return 0; 78 for (i = 0; i < rctx->vertex_elements->count; i++) { 79 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; 80 } 81 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 82 switch (cf->inst) { 83 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 84 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 85 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 86 desc = util_format_description(resource_format[vtx->buffer_id]); 87 if (desc == NULL) { 88 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); 89 return -EINVAL; 90 } 91 vtx->dst_sel_x = desc->swizzle[0]; 92 vtx->dst_sel_y = desc->swizzle[1]; 93 vtx->dst_sel_z = desc->swizzle[2]; 94 vtx->dst_sel_w = desc->swizzle[3]; 95 } 96 break; 97 default: 98 break; 99 } 100 } 101 return r600_bc_build(&shader->bc); 102} 103 104int r600_pipe_shader_create(struct pipe_context *ctx, 105 struct r600_context_state *rpshader, 106 const struct tgsi_token *tokens) 107{ 108 struct r600_screen *rscreen = r600_screen(ctx->screen); 109 int r; 110 111//fprintf(stderr, "--------------------------------------------------------------\n"); 112//tgsi_dump(tokens, 0); 113 if (rpshader == NULL) 114 return -ENOMEM; 115 rpshader->shader.family = radeon_get_family(rscreen->rw); 116 r = r600_shader_from_tgsi(tokens, &rpshader->shader); 117 if (r) { 118 R600_ERR("translation from TGSI failed !\n"); 119 return r; 120 } 121 r = r600_bc_build(&rpshader->shader.bc); 122 if (r) { 123 R600_ERR("building bytecode failed !\n"); 124 return r; 125 } 126//fprintf(stderr, "______________________________________________________________\n"); 127 return 0; 128} 129 130static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader) 131{ 132 struct r600_screen *rscreen = r600_screen(ctx->screen); 133 struct r600_shader *rshader = &rpshader->shader; 134 struct radeon_state *state; 135 unsigned i, tmp; 136 137 rpshader->rstate = radeon_state_decref(rpshader->rstate); 138 state = radeon_state_shader(rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS); 139 if (state == NULL) 140 return -ENOMEM; 141 for (i = 0; i < 10; i++) { 142 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; 143 } 144 /* so far never got proper semantic id from tgsi */ 145 for (i = 0; i < 32; i++) { 146 tmp = i << ((i & 3) * 8); 147 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; 148 } 149 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); 150 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | 151 S_028868_STACK_SIZE(rshader->bc.nstack); 152 rpshader->rstate = state; 153 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); 154 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); 155 rpshader->rstate->nbo = 2; 156 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; 157 rpshader->rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; 158 return radeon_state_pm4(state); 159} 160 161static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader) 162{ 163 const struct pipe_rasterizer_state *rasterizer; 164 struct r600_screen *rscreen = r600_screen(ctx->screen); 165 struct r600_shader *rshader = &rpshader->shader; 166 struct r600_context *rctx = r600_context(ctx); 167 struct radeon_state *state; 168 unsigned i, tmp, exports_ps, num_cout; 169 boolean have_pos = FALSE; 170 171 rasterizer = &rctx->rasterizer->state.rasterizer; 172 rpshader->rstate = radeon_state_decref(rpshader->rstate); 173 state = radeon_state_shader(rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS); 174 if (state == NULL) 175 return -ENOMEM; 176 for (i = 0; i < rshader->ninput; i++) { 177 tmp = S_028644_SEMANTIC(i); 178 tmp |= S_028644_SEL_CENTROID(1); 179 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 180 have_pos = TRUE; 181 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || 182 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) { 183 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); 184 } 185 if (rasterizer->sprite_coord_enable & (1 << i)) { 186 tmp |= S_028644_PT_SPRITE_TEX(1); 187 } 188 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; 189 } 190 191 exports_ps = 0; 192 num_cout = 0; 193 for (i = 0; i < rshader->noutput; i++) { 194 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 195 exports_ps |= 1; 196 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { 197 exports_ps |= (1 << (num_cout+1)); 198 num_cout++; 199 } 200 } 201 if (!exports_ps) { 202 /* always at least export 1 component per pixel */ 203 exports_ps = 2; 204 } 205 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | 206 S_0286CC_PERSP_GRADIENT_ENA(1); 207 if (have_pos) { 208 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1); 209 S_0286CC_BARYC_SAMPLE_CNTL(1); 210 } 211 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; 212 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | 213 S_028868_STACK_SIZE(rshader->bc.nstack); 214 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; 215 rpshader->rstate = state; 216 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); 217 rpshader->rstate->nbo = 1; 218 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; 219 return radeon_state_pm4(state); 220} 221 222static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader) 223{ 224 struct r600_screen *rscreen = r600_screen(ctx->screen); 225 struct r600_context *rctx = r600_context(ctx); 226 struct r600_shader *rshader = &rpshader->shader; 227 int r; 228 229 /* copy new shader */ 230 radeon_bo_decref(rscreen->rw, rpshader->bo); 231 rpshader->bo = NULL; 232 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4, 233 4096, NULL); 234 if (rpshader->bo == NULL) { 235 return -ENOMEM; 236 } 237 radeon_bo_map(rscreen->rw, rpshader->bo); 238 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4); 239 radeon_bo_unmap(rscreen->rw, rpshader->bo); 240 /* build state */ 241 rshader->flat_shade = rctx->flat_shade; 242 switch (rshader->processor_type) { 243 case TGSI_PROCESSOR_VERTEX: 244 r = r600_pipe_shader_vs(ctx, rpshader); 245 break; 246 case TGSI_PROCESSOR_FRAGMENT: 247 r = r600_pipe_shader_ps(ctx, rpshader); 248 break; 249 default: 250 r = -EINVAL; 251 break; 252 } 253 return r; 254} 255 256int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader) 257{ 258 struct r600_context *rctx = r600_context(ctx); 259 int r; 260 261 if (rpshader == NULL) 262 return -EINVAL; 263 /* there should be enough input */ 264 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) { 265 R600_ERR("%d resources provided, expecting %d\n", 266 rctx->vertex_elements->count, rpshader->shader.bc.nresource); 267 return -EINVAL; 268 } 269 r = r600_shader_update(ctx, &rpshader->shader); 270 if (r) 271 return r; 272 return r600_pipe_shader(ctx, rpshader); 273} 274 275static int tgsi_is_supported(struct r600_shader_ctx *ctx) 276{ 277 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 278 int j; 279 280 if (i->Instruction.NumDstRegs > 1) { 281 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 282 return -EINVAL; 283 } 284 if (i->Instruction.Predicate) { 285 R600_ERR("predicate unsupported\n"); 286 return -EINVAL; 287 } 288#if 0 289 if (i->Instruction.Label) { 290 R600_ERR("label unsupported\n"); 291 return -EINVAL; 292 } 293#endif 294 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 295 if (i->Src[j].Register.Dimension || 296 i->Src[j].Register.Absolute) { 297 R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j, 298 i->Src[j].Register.Dimension, 299 i->Src[j].Register.Absolute); 300 return -EINVAL; 301 } 302 } 303 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 304 if (i->Dst[j].Register.Dimension) { 305 R600_ERR("unsupported dst (dimension)\n"); 306 return -EINVAL; 307 } 308 } 309 return 0; 310} 311 312static int tgsi_declaration(struct r600_shader_ctx *ctx) 313{ 314 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 315 struct r600_bc_vtx vtx; 316 unsigned i; 317 int r; 318 319 switch (d->Declaration.File) { 320 case TGSI_FILE_INPUT: 321 i = ctx->shader->ninput++; 322 ctx->shader->input[i].name = d->Semantic.Name; 323 ctx->shader->input[i].sid = d->Semantic.Index; 324 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 325 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 326 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 327 /* turn input into fetch */ 328 memset(&vtx, 0, sizeof(struct r600_bc_vtx)); 329 vtx.inst = 0; 330 vtx.fetch_type = 0; 331 vtx.buffer_id = i; 332 /* register containing the index into the buffer */ 333 vtx.src_gpr = 0; 334 vtx.src_sel_x = 0; 335 vtx.mega_fetch_count = 0x1F; 336 vtx.dst_gpr = ctx->shader->input[i].gpr; 337 vtx.dst_sel_x = 0; 338 vtx.dst_sel_y = 1; 339 vtx.dst_sel_z = 2; 340 vtx.dst_sel_w = 3; 341 r = r600_bc_add_vtx(ctx->bc, &vtx); 342 if (r) 343 return r; 344 } 345 break; 346 case TGSI_FILE_OUTPUT: 347 i = ctx->shader->noutput++; 348 ctx->shader->output[i].name = d->Semantic.Name; 349 ctx->shader->output[i].sid = d->Semantic.Index; 350 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 351 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 352 break; 353 case TGSI_FILE_CONSTANT: 354 case TGSI_FILE_TEMPORARY: 355 case TGSI_FILE_SAMPLER: 356 case TGSI_FILE_ADDRESS: 357 break; 358 default: 359 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 360 return -EINVAL; 361 } 362 return 0; 363} 364 365static int r600_get_temp(struct r600_shader_ctx *ctx) 366{ 367 return ctx->temp_reg + ctx->max_driver_temp_used++; 368} 369 370int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 371{ 372 struct tgsi_full_immediate *immediate; 373 struct r600_shader_ctx ctx; 374 struct r600_bc_output output[32]; 375 unsigned output_done, noutput; 376 unsigned opcode; 377 int i, r = 0, pos0; 378 379 ctx.bc = &shader->bc; 380 ctx.shader = shader; 381 r = r600_bc_init(ctx.bc, shader->family); 382 if (r) 383 return r; 384 ctx.tokens = tokens; 385 tgsi_scan_shader(tokens, &ctx.info); 386 tgsi_parse_init(&ctx.parse, tokens); 387 ctx.type = ctx.parse.FullHeader.Processor.Processor; 388 shader->processor_type = ctx.type; 389 390 /* register allocations */ 391 /* Values [0,127] correspond to GPR[0..127]. 392 * Values [128,159] correspond to constant buffer bank 0 393 * Values [160,191] correspond to constant buffer bank 1 394 * Values [256,511] correspond to cfile constants c[0..255]. 395 * Other special values are shown in the list below. 396 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 397 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 398 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 399 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 400 * 248 SQ_ALU_SRC_0: special constant 0.0. 401 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 402 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 403 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 404 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 405 * 253 SQ_ALU_SRC_LITERAL: literal constant. 406 * 254 SQ_ALU_SRC_PV: previous vector result. 407 * 255 SQ_ALU_SRC_PS: previous scalar result. 408 */ 409 for (i = 0; i < TGSI_FILE_COUNT; i++) { 410 ctx.file_offset[i] = 0; 411 } 412 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 413 ctx.file_offset[TGSI_FILE_INPUT] = 1; 414 } 415 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 416 ctx.info.file_count[TGSI_FILE_INPUT]; 417 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 418 ctx.info.file_count[TGSI_FILE_OUTPUT]; 419 ctx.file_offset[TGSI_FILE_CONSTANT] = 256; 420 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 421 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 422 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 423 424 ctx.nliterals = 0; 425 ctx.literals = NULL; 426 427 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 428 tgsi_parse_token(&ctx.parse); 429 switch (ctx.parse.FullToken.Token.Type) { 430 case TGSI_TOKEN_TYPE_IMMEDIATE: 431 immediate = &ctx.parse.FullToken.FullImmediate; 432 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 433 if(ctx.literals == NULL) { 434 r = -ENOMEM; 435 goto out_err; 436 } 437 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 438 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 439 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 440 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 441 ctx.nliterals++; 442 break; 443 case TGSI_TOKEN_TYPE_DECLARATION: 444 r = tgsi_declaration(&ctx); 445 if (r) 446 goto out_err; 447 break; 448 case TGSI_TOKEN_TYPE_INSTRUCTION: 449 r = tgsi_is_supported(&ctx); 450 if (r) 451 goto out_err; 452 ctx.max_driver_temp_used = 0; 453 /* reserve first tmp for everyone */ 454 r600_get_temp(&ctx); 455 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 456 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 457 r = ctx.inst_info->process(&ctx); 458 if (r) 459 goto out_err; 460 r = r600_bc_add_literal(ctx.bc, ctx.value); 461 if (r) 462 goto out_err; 463 break; 464 default: 465 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 466 r = -EINVAL; 467 goto out_err; 468 } 469 } 470 /* export output */ 471 noutput = shader->noutput; 472 for (i = 0, pos0 = 0; i < noutput; i++) { 473 memset(&output[i], 0, sizeof(struct r600_bc_output)); 474 output[i].gpr = shader->output[i].gpr; 475 output[i].elem_size = 3; 476 output[i].swizzle_x = 0; 477 output[i].swizzle_y = 1; 478 output[i].swizzle_z = 2; 479 output[i].swizzle_w = 3; 480 output[i].barrier = 1; 481 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 482 output[i].array_base = i - pos0; 483 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; 484 switch (ctx.type) { 485 case TGSI_PROCESSOR_VERTEX: 486 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 487 output[i].array_base = 60; 488 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 489 /* position doesn't count in array_base */ 490 pos0++; 491 } 492 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 493 output[i].array_base = 61; 494 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 495 /* position doesn't count in array_base */ 496 pos0++; 497 } 498 break; 499 case TGSI_PROCESSOR_FRAGMENT: 500 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 501 output[i].array_base = shader->output[i].sid; 502 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 503 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 504 output[i].array_base = 61; 505 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 506 } else { 507 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 508 r = -EINVAL; 509 goto out_err; 510 } 511 break; 512 default: 513 R600_ERR("unsupported processor type %d\n", ctx.type); 514 r = -EINVAL; 515 goto out_err; 516 } 517 } 518 /* add fake param output for vertex shader if no param is exported */ 519 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 520 for (i = 0, pos0 = 0; i < noutput; i++) { 521 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 522 pos0 = 1; 523 break; 524 } 525 } 526 if (!pos0) { 527 memset(&output[i], 0, sizeof(struct r600_bc_output)); 528 output[i].gpr = 0; 529 output[i].elem_size = 3; 530 output[i].swizzle_x = 0; 531 output[i].swizzle_y = 1; 532 output[i].swizzle_z = 2; 533 output[i].swizzle_w = 3; 534 output[i].barrier = 1; 535 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 536 output[i].array_base = 0; 537 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; 538 noutput++; 539 } 540 } 541 /* add fake pixel export */ 542 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 543 memset(&output[0], 0, sizeof(struct r600_bc_output)); 544 output[0].gpr = 0; 545 output[0].elem_size = 3; 546 output[0].swizzle_x = 7; 547 output[0].swizzle_y = 7; 548 output[0].swizzle_z = 7; 549 output[0].swizzle_w = 7; 550 output[0].barrier = 1; 551 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 552 output[0].array_base = 0; 553 output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; 554 noutput++; 555 } 556 /* set export done on last export of each type */ 557 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 558 if (i == (noutput - 1)) { 559 output[i].end_of_program = 1; 560 } 561 if (!(output_done & (1 << output[i].type))) { 562 output_done |= (1 << output[i].type); 563 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; 564 } 565 } 566 /* add output to bytecode */ 567 for (i = 0; i < noutput; i++) { 568 r = r600_bc_add_output(ctx.bc, &output[i]); 569 if (r) 570 goto out_err; 571 } 572 free(ctx.literals); 573 tgsi_parse_free(&ctx.parse); 574 return 0; 575out_err: 576 free(ctx.literals); 577 tgsi_parse_free(&ctx.parse); 578 return r; 579} 580 581static int tgsi_unsupported(struct r600_shader_ctx *ctx) 582{ 583 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 584 return -EINVAL; 585} 586 587static int tgsi_end(struct r600_shader_ctx *ctx) 588{ 589 return 0; 590} 591 592static int tgsi_src(struct r600_shader_ctx *ctx, 593 const struct tgsi_full_src_register *tgsi_src, 594 struct r600_bc_alu_src *r600_src) 595{ 596 int index; 597 memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); 598 r600_src->sel = tgsi_src->Register.Index; 599 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 600 r600_src->sel = 0; 601 index = tgsi_src->Register.Index; 602 ctx->value[0] = ctx->literals[index * 4 + 0]; 603 ctx->value[1] = ctx->literals[index * 4 + 1]; 604 ctx->value[2] = ctx->literals[index * 4 + 2]; 605 ctx->value[3] = ctx->literals[index * 4 + 3]; 606 } 607 if (tgsi_src->Register.Indirect) 608 r600_src->rel = V_SQ_REL_RELATIVE; 609 r600_src->neg = tgsi_src->Register.Negate; 610 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 611 return 0; 612} 613 614static int tgsi_dst(struct r600_shader_ctx *ctx, 615 const struct tgsi_full_dst_register *tgsi_dst, 616 unsigned swizzle, 617 struct r600_bc_alu_dst *r600_dst) 618{ 619 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 620 621 r600_dst->sel = tgsi_dst->Register.Index; 622 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 623 r600_dst->chan = swizzle; 624 r600_dst->write = 1; 625 if (tgsi_dst->Register.Indirect) 626 r600_dst->rel = V_SQ_REL_RELATIVE; 627 if (inst->Instruction.Saturate) { 628 r600_dst->clamp = 1; 629 } 630 return 0; 631} 632 633static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) 634{ 635 switch (swizzle) { 636 case 0: 637 return tgsi_src->Register.SwizzleX; 638 case 1: 639 return tgsi_src->Register.SwizzleY; 640 case 2: 641 return tgsi_src->Register.SwizzleZ; 642 case 3: 643 return tgsi_src->Register.SwizzleW; 644 default: 645 return 0; 646 } 647} 648 649static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 650{ 651 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 652 struct r600_bc_alu alu; 653 int i, j, k, nconst, r; 654 655 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 656 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 657 nconst++; 658 } 659 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); 660 if (r) { 661 return r; 662 } 663 } 664 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 665 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) { 666 int treg = r600_get_temp(ctx); 667 for (k = 0; k < 4; k++) { 668 memset(&alu, 0, sizeof(struct r600_bc_alu)); 669 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 670 alu.src[0].sel = r600_src[j].sel; 671 alu.src[0].chan = k; 672 alu.dst.sel = treg; 673 alu.dst.chan = k; 674 alu.dst.write = 1; 675 if (k == 3) 676 alu.last = 1; 677 r = r600_bc_add_alu(ctx->bc, &alu); 678 if (r) 679 return r; 680 } 681 r600_src[j].sel = treg; 682 j--; 683 } 684 } 685 return 0; 686} 687 688/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 689static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 690{ 691 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 692 struct r600_bc_alu alu; 693 int i, j, k, nliteral, r; 694 695 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 696 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 697 nliteral++; 698 } 699 } 700 for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) { 701 if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) { 702 int treg = r600_get_temp(ctx); 703 for (k = 0; k < 4; k++) { 704 memset(&alu, 0, sizeof(struct r600_bc_alu)); 705 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 706 alu.src[0].sel = r600_src[j].sel; 707 alu.src[0].chan = k; 708 alu.dst.sel = treg; 709 alu.dst.chan = k; 710 alu.dst.write = 1; 711 if (k == 3) 712 alu.last = 1; 713 r = r600_bc_add_alu(ctx->bc, &alu); 714 if (r) 715 return r; 716 } 717 r = r600_bc_add_literal(ctx->bc, ctx->value); 718 if (r) 719 return r; 720 r600_src[j].sel = treg; 721 j++; 722 } 723 } 724 return 0; 725} 726 727static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 728{ 729 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 730 struct r600_bc_alu_src r600_src[3]; 731 struct r600_bc_alu alu; 732 int i, j, r; 733 int lasti = 0; 734 735 for (i = 0; i < 4; i++) { 736 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 737 lasti = i; 738 } 739 } 740 741 r = tgsi_split_constant(ctx, r600_src); 742 if (r) 743 return r; 744 for (i = 0; i < lasti + 1; i++) { 745 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 746 continue; 747 748 memset(&alu, 0, sizeof(struct r600_bc_alu)); 749 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 750 if (r) 751 return r; 752 753 alu.inst = ctx->inst_info->r600_opcode; 754 if (!swap) { 755 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 756 alu.src[j] = r600_src[j]; 757 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 758 } 759 } else { 760 alu.src[0] = r600_src[1]; 761 alu.src[0].chan = tgsi_chan(&inst->Src[1], i); 762 763 alu.src[1] = r600_src[0]; 764 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 765 } 766 /* handle some special cases */ 767 switch (ctx->inst_info->tgsi_opcode) { 768 case TGSI_OPCODE_SUB: 769 alu.src[1].neg = 1; 770 break; 771 case TGSI_OPCODE_ABS: 772 alu.src[0].abs = 1; 773 break; 774 default: 775 break; 776 } 777 if (i == lasti) { 778 alu.last = 1; 779 } 780 r = r600_bc_add_alu(ctx->bc, &alu); 781 if (r) 782 return r; 783 } 784 return 0; 785} 786 787static int tgsi_op2(struct r600_shader_ctx *ctx) 788{ 789 return tgsi_op2_s(ctx, 0); 790} 791 792static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 793{ 794 return tgsi_op2_s(ctx, 1); 795} 796 797/* 798 * r600 - trunc to -PI..PI range 799 * r700 - normalize by dividing by 2PI 800 * see fdo bug 27901 801 */ 802static int tgsi_setup_trig(struct r600_shader_ctx *ctx, 803 struct r600_bc_alu_src r600_src[3]) 804{ 805 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 806 int r; 807 uint32_t lit_vals[4]; 808 struct r600_bc_alu alu; 809 810 memset(lit_vals, 0, 4*4); 811 r = tgsi_split_constant(ctx, r600_src); 812 if (r) 813 return r; 814 815 r = tgsi_split_literal_constant(ctx, r600_src); 816 if (r) 817 return r; 818 819 lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); 820 lit_vals[1] = fui(0.5f); 821 822 memset(&alu, 0, sizeof(struct r600_bc_alu)); 823 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; 824 alu.is_op3 = 1; 825 826 alu.dst.chan = 0; 827 alu.dst.sel = ctx->temp_reg; 828 alu.dst.write = 1; 829 830 alu.src[0] = r600_src[0]; 831 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 832 833 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 834 alu.src[1].chan = 0; 835 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 836 alu.src[2].chan = 1; 837 alu.last = 1; 838 r = r600_bc_add_alu(ctx->bc, &alu); 839 if (r) 840 return r; 841 r = r600_bc_add_literal(ctx->bc, lit_vals); 842 if (r) 843 return r; 844 845 memset(&alu, 0, sizeof(struct r600_bc_alu)); 846 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT; 847 848 alu.dst.chan = 0; 849 alu.dst.sel = ctx->temp_reg; 850 alu.dst.write = 1; 851 852 alu.src[0].sel = ctx->temp_reg; 853 alu.src[0].chan = 0; 854 alu.last = 1; 855 r = r600_bc_add_alu(ctx->bc, &alu); 856 if (r) 857 return r; 858 859 if (ctx->bc->chiprev == 0) { 860 lit_vals[0] = fui(3.1415926535897f * 2.0f); 861 lit_vals[1] = fui(-3.1415926535897f); 862 } else { 863 lit_vals[0] = fui(1.0f); 864 lit_vals[1] = fui(-0.5f); 865 } 866 867 memset(&alu, 0, sizeof(struct r600_bc_alu)); 868 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; 869 alu.is_op3 = 1; 870 871 alu.dst.chan = 0; 872 alu.dst.sel = ctx->temp_reg; 873 alu.dst.write = 1; 874 875 alu.src[0].sel = ctx->temp_reg; 876 alu.src[0].chan = 0; 877 878 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 879 alu.src[1].chan = 0; 880 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 881 alu.src[2].chan = 1; 882 alu.last = 1; 883 r = r600_bc_add_alu(ctx->bc, &alu); 884 if (r) 885 return r; 886 r = r600_bc_add_literal(ctx->bc, lit_vals); 887 if (r) 888 return r; 889 return 0; 890} 891 892static int tgsi_trig(struct r600_shader_ctx *ctx) 893{ 894 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 895 struct r600_bc_alu_src r600_src[3]; 896 struct r600_bc_alu alu; 897 int i, r; 898 int lasti = 0; 899 900 r = tgsi_setup_trig(ctx, r600_src); 901 if (r) 902 return r; 903 904 memset(&alu, 0, sizeof(struct r600_bc_alu)); 905 alu.inst = ctx->inst_info->r600_opcode; 906 alu.dst.chan = 0; 907 alu.dst.sel = ctx->temp_reg; 908 alu.dst.write = 1; 909 910 alu.src[0].sel = ctx->temp_reg; 911 alu.src[0].chan = 0; 912 alu.last = 1; 913 r = r600_bc_add_alu(ctx->bc, &alu); 914 if (r) 915 return r; 916 917 /* replicate result */ 918 for (i = 0; i < 4; i++) { 919 if (inst->Dst[0].Register.WriteMask & (1 << i)) 920 lasti = i; 921 } 922 for (i = 0; i < lasti + 1; i++) { 923 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 924 continue; 925 926 memset(&alu, 0, sizeof(struct r600_bc_alu)); 927 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 928 929 alu.src[0].sel = ctx->temp_reg; 930 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 931 if (r) 932 return r; 933 if (i == lasti) 934 alu.last = 1; 935 r = r600_bc_add_alu(ctx->bc, &alu); 936 if (r) 937 return r; 938 } 939 return 0; 940} 941 942static int tgsi_scs(struct r600_shader_ctx *ctx) 943{ 944 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 945 struct r600_bc_alu_src r600_src[3]; 946 struct r600_bc_alu alu; 947 int r; 948 949 r = tgsi_setup_trig(ctx, r600_src); 950 if (r) 951 return r; 952 953 954 /* dst.x = COS */ 955 memset(&alu, 0, sizeof(struct r600_bc_alu)); 956 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS; 957 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 958 if (r) 959 return r; 960 961 alu.src[0].sel = ctx->temp_reg; 962 alu.src[0].chan = 0; 963 alu.last = 1; 964 r = r600_bc_add_alu(ctx->bc, &alu); 965 if (r) 966 return r; 967 968 /* dst.y = SIN */ 969 memset(&alu, 0, sizeof(struct r600_bc_alu)); 970 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN; 971 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 972 if (r) 973 return r; 974 975 alu.src[0].sel = ctx->temp_reg; 976 alu.src[0].chan = 0; 977 alu.last = 1; 978 r = r600_bc_add_alu(ctx->bc, &alu); 979 if (r) 980 return r; 981 return 0; 982} 983 984static int tgsi_kill(struct r600_shader_ctx *ctx) 985{ 986 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 987 struct r600_bc_alu alu; 988 int i, r; 989 990 for (i = 0; i < 4; i++) { 991 memset(&alu, 0, sizeof(struct r600_bc_alu)); 992 alu.inst = ctx->inst_info->r600_opcode; 993 994 alu.dst.chan = i; 995 996 alu.src[0].sel = V_SQ_ALU_SRC_0; 997 998 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 999 alu.src[1].sel = V_SQ_ALU_SRC_1; 1000 alu.src[1].neg = 1; 1001 } else { 1002 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1003 if (r) 1004 return r; 1005 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1006 } 1007 if (i == 3) { 1008 alu.last = 1; 1009 } 1010 r = r600_bc_add_alu(ctx->bc, &alu); 1011 if (r) 1012 return r; 1013 } 1014 r = r600_bc_add_literal(ctx->bc, ctx->value); 1015 if (r) 1016 return r; 1017 1018 /* kill must be last in ALU */ 1019 ctx->bc->force_add_cf = 1; 1020 ctx->shader->uses_kill = TRUE; 1021 return 0; 1022} 1023 1024static int tgsi_lit(struct r600_shader_ctx *ctx) 1025{ 1026 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1027 struct r600_bc_alu alu; 1028 struct r600_bc_alu_src r600_src[3]; 1029 int r; 1030 1031 r = tgsi_split_constant(ctx, r600_src); 1032 if (r) 1033 return r; 1034 r = tgsi_split_literal_constant(ctx, r600_src); 1035 if (r) 1036 return r; 1037 1038 /* dst.x, <- 1.0 */ 1039 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1040 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 1041 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1042 alu.src[0].chan = 0; 1043 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1044 if (r) 1045 return r; 1046 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1047 r = r600_bc_add_alu(ctx->bc, &alu); 1048 if (r) 1049 return r; 1050 1051 /* dst.y = max(src.x, 0.0) */ 1052 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1053 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX; 1054 alu.src[0] = r600_src[0]; 1055 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1056 alu.src[1].chan = 0; 1057 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1058 if (r) 1059 return r; 1060 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1061 r = r600_bc_add_alu(ctx->bc, &alu); 1062 if (r) 1063 return r; 1064 1065 /* dst.w, <- 1.0 */ 1066 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1067 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 1068 alu.src[0].sel = V_SQ_ALU_SRC_1; 1069 alu.src[0].chan = 0; 1070 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1071 if (r) 1072 return r; 1073 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1074 alu.last = 1; 1075 r = r600_bc_add_alu(ctx->bc, &alu); 1076 if (r) 1077 return r; 1078 1079 r = r600_bc_add_literal(ctx->bc, ctx->value); 1080 if (r) 1081 return r; 1082 1083 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1084 { 1085 int chan; 1086 int sel; 1087 1088 /* dst.z = log(src.y) */ 1089 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1090 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED; 1091 alu.src[0] = r600_src[0]; 1092 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1093 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1094 if (r) 1095 return r; 1096 alu.last = 1; 1097 r = r600_bc_add_alu(ctx->bc, &alu); 1098 if (r) 1099 return r; 1100 1101 r = r600_bc_add_literal(ctx->bc, ctx->value); 1102 if (r) 1103 return r; 1104 1105 chan = alu.dst.chan; 1106 sel = alu.dst.sel; 1107 1108 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1109 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1110 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; 1111 alu.src[0] = r600_src[0]; 1112 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1113 alu.src[1].sel = sel; 1114 alu.src[1].chan = chan; 1115 1116 alu.src[2] = r600_src[0]; 1117 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); 1118 alu.dst.sel = ctx->temp_reg; 1119 alu.dst.chan = 0; 1120 alu.dst.write = 1; 1121 alu.is_op3 = 1; 1122 alu.last = 1; 1123 r = r600_bc_add_alu(ctx->bc, &alu); 1124 if (r) 1125 return r; 1126 1127 r = r600_bc_add_literal(ctx->bc, ctx->value); 1128 if (r) 1129 return r; 1130 /* dst.z = exp(tmp.x) */ 1131 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1132 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; 1133 alu.src[0].sel = ctx->temp_reg; 1134 alu.src[0].chan = 0; 1135 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1136 if (r) 1137 return r; 1138 alu.last = 1; 1139 r = r600_bc_add_alu(ctx->bc, &alu); 1140 if (r) 1141 return r; 1142 } 1143 return 0; 1144} 1145 1146static int tgsi_trans(struct r600_shader_ctx *ctx) 1147{ 1148 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1149 struct r600_bc_alu alu; 1150 int i, j, r; 1151 1152 for (i = 0; i < 4; i++) { 1153 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1154 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 1155 alu.inst = ctx->inst_info->r600_opcode; 1156 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1157 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]); 1158 if (r) 1159 return r; 1160 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1161 } 1162 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1163 if (r) 1164 return r; 1165 alu.last = 1; 1166 r = r600_bc_add_alu(ctx->bc, &alu); 1167 if (r) 1168 return r; 1169 } 1170 } 1171 return 0; 1172} 1173 1174static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1175{ 1176 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1177 struct r600_bc_alu alu; 1178 int i, r; 1179 1180 for (i = 0; i < 4; i++) { 1181 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1182 alu.src[0].sel = ctx->temp_reg; 1183 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 1184 alu.dst.chan = i; 1185 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1186 if (r) 1187 return r; 1188 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1189 if (i == 3) 1190 alu.last = 1; 1191 r = r600_bc_add_alu(ctx->bc, &alu); 1192 if (r) 1193 return r; 1194 } 1195 return 0; 1196} 1197 1198static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1199{ 1200 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1201 struct r600_bc_alu alu; 1202 int i, r; 1203 1204 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1205 alu.inst = ctx->inst_info->r600_opcode; 1206 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1207 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1208 if (r) 1209 return r; 1210 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1211 } 1212 alu.dst.sel = ctx->temp_reg; 1213 alu.dst.write = 1; 1214 alu.last = 1; 1215 r = r600_bc_add_alu(ctx->bc, &alu); 1216 if (r) 1217 return r; 1218 r = r600_bc_add_literal(ctx->bc, ctx->value); 1219 if (r) 1220 return r; 1221 /* replicate result */ 1222 return tgsi_helper_tempx_replicate(ctx); 1223} 1224 1225static int tgsi_pow(struct r600_shader_ctx *ctx) 1226{ 1227 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1228 struct r600_bc_alu alu; 1229 int r; 1230 1231 /* LOG2(a) */ 1232 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1233 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE; 1234 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1235 if (r) 1236 return r; 1237 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1238 alu.dst.sel = ctx->temp_reg; 1239 alu.dst.write = 1; 1240 alu.last = 1; 1241 r = r600_bc_add_alu(ctx->bc, &alu); 1242 if (r) 1243 return r; 1244 r = r600_bc_add_literal(ctx->bc,ctx->value); 1245 if (r) 1246 return r; 1247 /* b * LOG2(a) */ 1248 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1249 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE; 1250 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); 1251 if (r) 1252 return r; 1253 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); 1254 alu.src[1].sel = ctx->temp_reg; 1255 alu.dst.sel = ctx->temp_reg; 1256 alu.dst.write = 1; 1257 alu.last = 1; 1258 r = r600_bc_add_alu(ctx->bc, &alu); 1259 if (r) 1260 return r; 1261 r = r600_bc_add_literal(ctx->bc,ctx->value); 1262 if (r) 1263 return r; 1264 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1265 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1266 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; 1267 alu.src[0].sel = ctx->temp_reg; 1268 alu.dst.sel = ctx->temp_reg; 1269 alu.dst.write = 1; 1270 alu.last = 1; 1271 r = r600_bc_add_alu(ctx->bc, &alu); 1272 if (r) 1273 return r; 1274 r = r600_bc_add_literal(ctx->bc,ctx->value); 1275 if (r) 1276 return r; 1277 return tgsi_helper_tempx_replicate(ctx); 1278} 1279 1280static int tgsi_ssg(struct r600_shader_ctx *ctx) 1281{ 1282 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1283 struct r600_bc_alu alu; 1284 struct r600_bc_alu_src r600_src[3]; 1285 int i, r; 1286 1287 r = tgsi_split_constant(ctx, r600_src); 1288 if (r) 1289 return r; 1290 1291 /* tmp = (src > 0 ? 1 : src) */ 1292 for (i = 0; i < 4; i++) { 1293 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1294 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT; 1295 alu.is_op3 = 1; 1296 1297 alu.dst.sel = ctx->temp_reg; 1298 alu.dst.chan = i; 1299 1300 alu.src[0] = r600_src[0]; 1301 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1302 1303 alu.src[1].sel = V_SQ_ALU_SRC_1; 1304 1305 alu.src[2] = r600_src[0]; 1306 alu.src[2].chan = tgsi_chan(&inst->Src[0], i); 1307 if (i == 3) 1308 alu.last = 1; 1309 r = r600_bc_add_alu(ctx->bc, &alu); 1310 if (r) 1311 return r; 1312 } 1313 r = r600_bc_add_literal(ctx->bc, ctx->value); 1314 if (r) 1315 return r; 1316 1317 /* dst = (-tmp > 0 ? -1 : tmp) */ 1318 for (i = 0; i < 4; i++) { 1319 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1320 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT; 1321 alu.is_op3 = 1; 1322 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1323 if (r) 1324 return r; 1325 1326 alu.src[0].sel = ctx->temp_reg; 1327 alu.src[0].chan = i; 1328 alu.src[0].neg = 1; 1329 1330 alu.src[1].sel = V_SQ_ALU_SRC_1; 1331 alu.src[1].neg = 1; 1332 1333 alu.src[2].sel = ctx->temp_reg; 1334 alu.src[2].chan = i; 1335 1336 if (i == 3) 1337 alu.last = 1; 1338 r = r600_bc_add_alu(ctx->bc, &alu); 1339 if (r) 1340 return r; 1341 } 1342 return 0; 1343} 1344 1345static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1346{ 1347 struct r600_bc_alu alu; 1348 int i, r; 1349 1350 r = r600_bc_add_literal(ctx->bc, ctx->value); 1351 if (r) 1352 return r; 1353 for (i = 0; i < 4; i++) { 1354 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1355 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1356 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; 1357 alu.dst.chan = i; 1358 } else { 1359 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 1360 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1361 if (r) 1362 return r; 1363 alu.src[0].sel = ctx->temp_reg; 1364 alu.src[0].chan = i; 1365 } 1366 if (i == 3) { 1367 alu.last = 1; 1368 } 1369 r = r600_bc_add_alu(ctx->bc, &alu); 1370 if (r) 1371 return r; 1372 } 1373 return 0; 1374} 1375 1376static int tgsi_op3(struct r600_shader_ctx *ctx) 1377{ 1378 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1379 struct r600_bc_alu_src r600_src[3]; 1380 struct r600_bc_alu alu; 1381 int i, j, r; 1382 1383 r = tgsi_split_constant(ctx, r600_src); 1384 if (r) 1385 return r; 1386 /* do it in 2 step as op3 doesn't support writemask */ 1387 for (i = 0; i < 4; i++) { 1388 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1389 alu.inst = ctx->inst_info->r600_opcode; 1390 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1391 alu.src[j] = r600_src[j]; 1392 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1393 } 1394 alu.dst.sel = ctx->temp_reg; 1395 alu.dst.chan = i; 1396 alu.dst.write = 1; 1397 alu.is_op3 = 1; 1398 if (i == 3) { 1399 alu.last = 1; 1400 } 1401 r = r600_bc_add_alu(ctx->bc, &alu); 1402 if (r) 1403 return r; 1404 } 1405 return tgsi_helper_copy(ctx, inst); 1406} 1407 1408static int tgsi_dp(struct r600_shader_ctx *ctx) 1409{ 1410 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1411 struct r600_bc_alu_src r600_src[3]; 1412 struct r600_bc_alu alu; 1413 int i, j, r; 1414 1415 r = tgsi_split_constant(ctx, r600_src); 1416 if (r) 1417 return r; 1418 for (i = 0; i < 4; i++) { 1419 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1420 alu.inst = ctx->inst_info->r600_opcode; 1421 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1422 alu.src[j] = r600_src[j]; 1423 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1424 } 1425 alu.dst.sel = ctx->temp_reg; 1426 alu.dst.chan = i; 1427 alu.dst.write = 1; 1428 /* handle some special cases */ 1429 switch (ctx->inst_info->tgsi_opcode) { 1430 case TGSI_OPCODE_DP2: 1431 if (i > 1) { 1432 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1433 alu.src[0].chan = alu.src[1].chan = 0; 1434 } 1435 break; 1436 case TGSI_OPCODE_DP3: 1437 if (i > 2) { 1438 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1439 alu.src[0].chan = alu.src[1].chan = 0; 1440 } 1441 break; 1442 case TGSI_OPCODE_DPH: 1443 if (i == 3) { 1444 alu.src[0].sel = V_SQ_ALU_SRC_1; 1445 alu.src[0].chan = 0; 1446 alu.src[0].neg = 0; 1447 } 1448 break; 1449 default: 1450 break; 1451 } 1452 if (i == 3) { 1453 alu.last = 1; 1454 } 1455 r = r600_bc_add_alu(ctx->bc, &alu); 1456 if (r) 1457 return r; 1458 } 1459 return tgsi_helper_copy(ctx, inst); 1460} 1461 1462static int tgsi_tex(struct r600_shader_ctx *ctx) 1463{ 1464 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1465 struct r600_bc_tex tex; 1466 struct r600_bc_alu alu; 1467 unsigned src_gpr; 1468 int r, i; 1469 1470 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1471 1472 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1473 /* Add perspective divide */ 1474 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1475 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; 1476 alu.src[0].sel = src_gpr; 1477 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1478 alu.dst.sel = ctx->temp_reg; 1479 alu.dst.chan = 3; 1480 alu.last = 1; 1481 alu.dst.write = 1; 1482 r = r600_bc_add_alu(ctx->bc, &alu); 1483 if (r) 1484 return r; 1485 1486 for (i = 0; i < 3; i++) { 1487 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1488 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; 1489 alu.src[0].sel = ctx->temp_reg; 1490 alu.src[0].chan = 3; 1491 alu.src[1].sel = src_gpr; 1492 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1493 alu.dst.sel = ctx->temp_reg; 1494 alu.dst.chan = i; 1495 alu.dst.write = 1; 1496 r = r600_bc_add_alu(ctx->bc, &alu); 1497 if (r) 1498 return r; 1499 } 1500 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1501 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 1502 alu.src[0].sel = V_SQ_ALU_SRC_1; 1503 alu.src[0].chan = 0; 1504 alu.dst.sel = ctx->temp_reg; 1505 alu.dst.chan = 3; 1506 alu.last = 1; 1507 alu.dst.write = 1; 1508 r = r600_bc_add_alu(ctx->bc, &alu); 1509 if (r) 1510 return r; 1511 src_gpr = ctx->temp_reg; 1512 } else if (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY) { 1513 for (i = 0; i < 4; i++) { 1514 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1515 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 1516 alu.src[0].sel = src_gpr; 1517 alu.src[0].chan = i; 1518 alu.dst.sel = ctx->temp_reg; 1519 alu.dst.chan = i; 1520 if (i == 3) 1521 alu.last = 1; 1522 alu.dst.write = 1; 1523 r = r600_bc_add_alu(ctx->bc, &alu); 1524 if (r) 1525 return r; 1526 } 1527 src_gpr = ctx->temp_reg; 1528 } 1529 1530 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1531 tex.inst = ctx->inst_info->r600_opcode; 1532 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1533 tex.sampler_id = tex.resource_id; 1534 tex.src_gpr = src_gpr; 1535 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1536 tex.dst_sel_x = 0; 1537 tex.dst_sel_y = 1; 1538 tex.dst_sel_z = 2; 1539 tex.dst_sel_w = 3; 1540 tex.src_sel_x = 0; 1541 tex.src_sel_y = 1; 1542 tex.src_sel_z = 2; 1543 tex.src_sel_w = 3; 1544 1545 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1546 tex.coord_type_x = 1; 1547 tex.coord_type_y = 1; 1548 tex.coord_type_z = 1; 1549 tex.coord_type_w = 1; 1550 } 1551 return r600_bc_add_tex(ctx->bc, &tex); 1552} 1553 1554static int tgsi_lrp(struct r600_shader_ctx *ctx) 1555{ 1556 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1557 struct r600_bc_alu_src r600_src[3]; 1558 struct r600_bc_alu alu; 1559 unsigned i; 1560 int r; 1561 1562 r = tgsi_split_constant(ctx, r600_src); 1563 if (r) 1564 return r; 1565 /* 1 - src0 */ 1566 for (i = 0; i < 4; i++) { 1567 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1568 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD; 1569 alu.src[0].sel = V_SQ_ALU_SRC_1; 1570 alu.src[0].chan = 0; 1571 alu.src[1] = r600_src[0]; 1572 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1573 alu.src[1].neg = 1; 1574 alu.dst.sel = ctx->temp_reg; 1575 alu.dst.chan = i; 1576 if (i == 3) { 1577 alu.last = 1; 1578 } 1579 alu.dst.write = 1; 1580 r = r600_bc_add_alu(ctx->bc, &alu); 1581 if (r) 1582 return r; 1583 } 1584 r = r600_bc_add_literal(ctx->bc, ctx->value); 1585 if (r) 1586 return r; 1587 1588 /* (1 - src0) * src2 */ 1589 for (i = 0; i < 4; i++) { 1590 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1591 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; 1592 alu.src[0].sel = ctx->temp_reg; 1593 alu.src[0].chan = i; 1594 alu.src[1] = r600_src[2]; 1595 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 1596 alu.dst.sel = ctx->temp_reg; 1597 alu.dst.chan = i; 1598 if (i == 3) { 1599 alu.last = 1; 1600 } 1601 alu.dst.write = 1; 1602 r = r600_bc_add_alu(ctx->bc, &alu); 1603 if (r) 1604 return r; 1605 } 1606 r = r600_bc_add_literal(ctx->bc, ctx->value); 1607 if (r) 1608 return r; 1609 1610 /* src0 * src1 + (1 - src0) * src2 */ 1611 for (i = 0; i < 4; i++) { 1612 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1613 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; 1614 alu.is_op3 = 1; 1615 alu.src[0] = r600_src[0]; 1616 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1617 alu.src[1] = r600_src[1]; 1618 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 1619 alu.src[2].sel = ctx->temp_reg; 1620 alu.src[2].chan = i; 1621 alu.dst.sel = ctx->temp_reg; 1622 alu.dst.chan = i; 1623 if (i == 3) { 1624 alu.last = 1; 1625 } 1626 r = r600_bc_add_alu(ctx->bc, &alu); 1627 if (r) 1628 return r; 1629 } 1630 return tgsi_helper_copy(ctx, inst); 1631} 1632 1633static int tgsi_cmp(struct r600_shader_ctx *ctx) 1634{ 1635 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1636 struct r600_bc_alu_src r600_src[3]; 1637 struct r600_bc_alu alu; 1638 int use_temp = 0; 1639 int i, r; 1640 1641 r = tgsi_split_constant(ctx, r600_src); 1642 if (r) 1643 return r; 1644 1645 if (inst->Dst[0].Register.WriteMask != 0xf) 1646 use_temp = 1; 1647 1648 for (i = 0; i < 4; i++) { 1649 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1650 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE; 1651 alu.src[0] = r600_src[0]; 1652 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1653 1654 alu.src[1] = r600_src[2]; 1655 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 1656 1657 alu.src[2] = r600_src[1]; 1658 alu.src[2].chan = tgsi_chan(&inst->Src[1], i); 1659 1660 if (use_temp) 1661 alu.dst.sel = ctx->temp_reg; 1662 else { 1663 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1664 if (r) 1665 return r; 1666 } 1667 alu.dst.chan = i; 1668 alu.dst.write = 1; 1669 alu.is_op3 = 1; 1670 if (i == 3) 1671 alu.last = 1; 1672 r = r600_bc_add_alu(ctx->bc, &alu); 1673 if (r) 1674 return r; 1675 } 1676 if (use_temp) 1677 return tgsi_helper_copy(ctx, inst); 1678 return 0; 1679} 1680 1681static int tgsi_xpd(struct r600_shader_ctx *ctx) 1682{ 1683 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1684 struct r600_bc_alu_src r600_src[3]; 1685 struct r600_bc_alu alu; 1686 uint32_t use_temp = 0; 1687 int i, r; 1688 1689 if (inst->Dst[0].Register.WriteMask != 0xf) 1690 use_temp = 1; 1691 1692 r = tgsi_split_constant(ctx, r600_src); 1693 if (r) 1694 return r; 1695 1696 for (i = 0; i < 4; i++) { 1697 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1698 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; 1699 1700 alu.src[0] = r600_src[0]; 1701 switch (i) { 1702 case 0: 1703 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 1704 break; 1705 case 1: 1706 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1707 break; 1708 case 2: 1709 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1710 break; 1711 case 3: 1712 alu.src[0].sel = V_SQ_ALU_SRC_0; 1713 alu.src[0].chan = i; 1714 } 1715 1716 alu.src[1] = r600_src[1]; 1717 switch (i) { 1718 case 0: 1719 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 1720 break; 1721 case 1: 1722 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 1723 break; 1724 case 2: 1725 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 1726 break; 1727 case 3: 1728 alu.src[1].sel = V_SQ_ALU_SRC_0; 1729 alu.src[1].chan = i; 1730 } 1731 1732 alu.dst.sel = ctx->temp_reg; 1733 alu.dst.chan = i; 1734 alu.dst.write = 1; 1735 1736 if (i == 3) 1737 alu.last = 1; 1738 r = r600_bc_add_alu(ctx->bc, &alu); 1739 if (r) 1740 return r; 1741 } 1742 1743 for (i = 0; i < 4; i++) { 1744 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1745 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; 1746 1747 alu.src[0] = r600_src[0]; 1748 switch (i) { 1749 case 0: 1750 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1751 break; 1752 case 1: 1753 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 1754 break; 1755 case 2: 1756 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1757 break; 1758 case 3: 1759 alu.src[0].sel = V_SQ_ALU_SRC_0; 1760 alu.src[0].chan = i; 1761 } 1762 1763 alu.src[1] = r600_src[1]; 1764 switch (i) { 1765 case 0: 1766 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 1767 break; 1768 case 1: 1769 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 1770 break; 1771 case 2: 1772 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 1773 break; 1774 case 3: 1775 alu.src[1].sel = V_SQ_ALU_SRC_0; 1776 alu.src[1].chan = i; 1777 } 1778 1779 alu.src[2].sel = ctx->temp_reg; 1780 alu.src[2].neg = 1; 1781 alu.src[2].chan = i; 1782 1783 if (use_temp) 1784 alu.dst.sel = ctx->temp_reg; 1785 else { 1786 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1787 if (r) 1788 return r; 1789 } 1790 alu.dst.chan = i; 1791 alu.dst.write = 1; 1792 alu.is_op3 = 1; 1793 if (i == 3) 1794 alu.last = 1; 1795 r = r600_bc_add_alu(ctx->bc, &alu); 1796 if (r) 1797 return r; 1798 } 1799 if (use_temp) 1800 return tgsi_helper_copy(ctx, inst); 1801 return 0; 1802} 1803 1804static int tgsi_exp(struct r600_shader_ctx *ctx) 1805{ 1806 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1807 struct r600_bc_alu_src r600_src[3]; 1808 struct r600_bc_alu alu; 1809 int r; 1810 1811 /* result.x = 2^floor(src); */ 1812 if (inst->Dst[0].Register.WriteMask & 1) { 1813 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1814 1815 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 1816 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1817 if (r) 1818 return r; 1819 1820 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1821 1822 alu.dst.sel = ctx->temp_reg; 1823 alu.dst.chan = 0; 1824 alu.dst.write = 1; 1825 alu.last = 1; 1826 r = r600_bc_add_alu(ctx->bc, &alu); 1827 if (r) 1828 return r; 1829 1830 r = r600_bc_add_literal(ctx->bc, ctx->value); 1831 if (r) 1832 return r; 1833 1834 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; 1835 alu.src[0].sel = ctx->temp_reg; 1836 alu.src[0].chan = 0; 1837 1838 alu.dst.sel = ctx->temp_reg; 1839 alu.dst.chan = 0; 1840 alu.dst.write = 1; 1841 alu.last = 1; 1842 r = r600_bc_add_alu(ctx->bc, &alu); 1843 if (r) 1844 return r; 1845 1846 r = r600_bc_add_literal(ctx->bc, ctx->value); 1847 if (r) 1848 return r; 1849 } 1850 1851 /* result.y = tmp - floor(tmp); */ 1852 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 1853 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1854 1855 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT; 1856 alu.src[0] = r600_src[0]; 1857 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1858 if (r) 1859 return r; 1860 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1861 1862 alu.dst.sel = ctx->temp_reg; 1863// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1864// if (r) 1865// return r; 1866 alu.dst.write = 1; 1867 alu.dst.chan = 1; 1868 1869 alu.last = 1; 1870 1871 r = r600_bc_add_alu(ctx->bc, &alu); 1872 if (r) 1873 return r; 1874 r = r600_bc_add_literal(ctx->bc, ctx->value); 1875 if (r) 1876 return r; 1877 } 1878 1879 /* result.z = RoughApprox2ToX(tmp);*/ 1880 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 1881 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1882 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; 1883 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1884 if (r) 1885 return r; 1886 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1887 1888 alu.dst.sel = ctx->temp_reg; 1889 alu.dst.write = 1; 1890 alu.dst.chan = 2; 1891 1892 alu.last = 1; 1893 1894 r = r600_bc_add_alu(ctx->bc, &alu); 1895 if (r) 1896 return r; 1897 r = r600_bc_add_literal(ctx->bc, ctx->value); 1898 if (r) 1899 return r; 1900 } 1901 1902 /* result.w = 1.0;*/ 1903 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 1904 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1905 1906 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 1907 alu.src[0].sel = V_SQ_ALU_SRC_1; 1908 alu.src[0].chan = 0; 1909 1910 alu.dst.sel = ctx->temp_reg; 1911 alu.dst.chan = 3; 1912 alu.dst.write = 1; 1913 alu.last = 1; 1914 r = r600_bc_add_alu(ctx->bc, &alu); 1915 if (r) 1916 return r; 1917 r = r600_bc_add_literal(ctx->bc, ctx->value); 1918 if (r) 1919 return r; 1920 } 1921 return tgsi_helper_copy(ctx, inst); 1922} 1923 1924static int tgsi_arl(struct r600_shader_ctx *ctx) 1925{ 1926 /* TODO from r600c, ar values don't persist between clauses */ 1927 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1928 struct r600_bc_alu alu; 1929 int r; 1930 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1931 1932 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; 1933 1934 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1935 if (r) 1936 return r; 1937 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1938 1939 alu.last = 1; 1940 1941 r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU); 1942 if (r) 1943 return r; 1944 return 0; 1945} 1946 1947static int tgsi_opdst(struct r600_shader_ctx *ctx) 1948{ 1949 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1950 struct r600_bc_alu alu; 1951 int i, r = 0; 1952 1953 for (i = 0; i < 4; i++) { 1954 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1955 1956 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; 1957 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1958 if (r) 1959 return r; 1960 1961 if (i == 0 || i == 3) { 1962 alu.src[0].sel = V_SQ_ALU_SRC_1; 1963 } else { 1964 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1965 if (r) 1966 return r; 1967 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1968 } 1969 1970 if (i == 0 || i == 2) { 1971 alu.src[1].sel = V_SQ_ALU_SRC_1; 1972 } else { 1973 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); 1974 if (r) 1975 return r; 1976 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 1977 } 1978 if (i == 3) 1979 alu.last = 1; 1980 r = r600_bc_add_alu(ctx->bc, &alu); 1981 if (r) 1982 return r; 1983 } 1984 return 0; 1985} 1986 1987static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 1988{ 1989 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1990 struct r600_bc_alu alu; 1991 int r; 1992 1993 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1994 alu.inst = opcode; 1995 alu.predicate = 1; 1996 1997 alu.dst.sel = ctx->temp_reg; 1998 alu.dst.write = 1; 1999 alu.dst.chan = 0; 2000 2001 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2002 if (r) 2003 return r; 2004 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2005 alu.src[1].sel = V_SQ_ALU_SRC_0; 2006 alu.src[1].chan = 0; 2007 2008 alu.last = 1; 2009 2010 r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE); 2011 if (r) 2012 return r; 2013 return 0; 2014} 2015 2016static int pops(struct r600_shader_ctx *ctx, int pops) 2017{ 2018 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_POP); 2019 ctx->bc->cf_last->pop_count = pops; 2020 return 0; 2021} 2022 2023static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2024{ 2025 switch(reason) { 2026 case FC_PUSH_VPM: 2027 ctx->bc->callstack[ctx->bc->call_sp].current--; 2028 break; 2029 case FC_PUSH_WQM: 2030 case FC_LOOP: 2031 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2032 break; 2033 case FC_REP: 2034 /* TOODO : for 16 vp asic should -= 2; */ 2035 ctx->bc->callstack[ctx->bc->call_sp].current --; 2036 break; 2037 } 2038} 2039 2040static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2041{ 2042 if (check_max_only) { 2043 int diff; 2044 switch (reason) { 2045 case FC_PUSH_VPM: 2046 diff = 1; 2047 break; 2048 case FC_PUSH_WQM: 2049 diff = 4; 2050 break; 2051 } 2052 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2053 ctx->bc->callstack[ctx->bc->call_sp].max) { 2054 ctx->bc->callstack[ctx->bc->call_sp].max = 2055 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2056 } 2057 return; 2058 } 2059 switch (reason) { 2060 case FC_PUSH_VPM: 2061 ctx->bc->callstack[ctx->bc->call_sp].current++; 2062 break; 2063 case FC_PUSH_WQM: 2064 case FC_LOOP: 2065 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2066 break; 2067 case FC_REP: 2068 ctx->bc->callstack[ctx->bc->call_sp].current++; 2069 break; 2070 } 2071 2072 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2073 ctx->bc->callstack[ctx->bc->call_sp].max) { 2074 ctx->bc->callstack[ctx->bc->call_sp].max = 2075 ctx->bc->callstack[ctx->bc->call_sp].current; 2076 } 2077} 2078 2079static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2080{ 2081 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2082 2083 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2084 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2085 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2086 sp->num_mid++; 2087} 2088 2089static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2090{ 2091 ctx->bc->fc_sp++; 2092 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2093 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2094} 2095 2096static void fc_poplevel(struct r600_shader_ctx *ctx) 2097{ 2098 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2099 if (sp->mid) { 2100 free(sp->mid); 2101 sp->mid = NULL; 2102 } 2103 sp->num_mid = 0; 2104 sp->start = NULL; 2105 sp->type = 0; 2106 ctx->bc->fc_sp--; 2107} 2108 2109#if 0 2110static int emit_return(struct r600_shader_ctx *ctx) 2111{ 2112 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2113 return 0; 2114} 2115 2116static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2117{ 2118 2119 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2120 ctx->bc->cf_last->pop_count = pops; 2121 /* TODO work out offset */ 2122 return 0; 2123} 2124 2125static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2126{ 2127 return 0; 2128} 2129 2130static void emit_testflag(struct r600_shader_ctx *ctx) 2131{ 2132 2133} 2134 2135static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2136{ 2137 emit_testflag(ctx); 2138 emit_jump_to_offset(ctx, 1, 4); 2139 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2140 pops(ctx, ifidx + 1); 2141 emit_return(ctx); 2142} 2143 2144static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2145{ 2146 emit_testflag(ctx); 2147 2148 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2149 ctx->bc->cf_last->pop_count = 1; 2150 2151 fc_set_mid(ctx, fc_sp); 2152 2153 pops(ctx, 1); 2154} 2155#endif 2156 2157static int tgsi_if(struct r600_shader_ctx *ctx) 2158{ 2159 emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE); 2160 2161 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2162 2163 fc_pushlevel(ctx, FC_IF); 2164 2165 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2166 return 0; 2167} 2168 2169static int tgsi_else(struct r600_shader_ctx *ctx) 2170{ 2171 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE); 2172 ctx->bc->cf_last->pop_count = 1; 2173 2174 fc_set_mid(ctx, ctx->bc->fc_sp); 2175 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2176 return 0; 2177} 2178 2179static int tgsi_endif(struct r600_shader_ctx *ctx) 2180{ 2181 pops(ctx, 1); 2182 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2183 R600_ERR("if/endif unbalanced in shader\n"); 2184 return -1; 2185 } 2186 2187 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2188 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2189 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2190 } else { 2191 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2192 } 2193 fc_poplevel(ctx); 2194 2195 callstack_decrease_current(ctx, FC_PUSH_VPM); 2196 return 0; 2197} 2198 2199static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2200{ 2201 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL); 2202 2203 fc_pushlevel(ctx, FC_LOOP); 2204 2205 /* check stack depth */ 2206 callstack_check_depth(ctx, FC_LOOP, 0); 2207 return 0; 2208} 2209 2210static int tgsi_endloop(struct r600_shader_ctx *ctx) 2211{ 2212 int i; 2213 2214 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END); 2215 2216 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2217 R600_ERR("loop/endloop in shader code are not paired.\n"); 2218 return -EINVAL; 2219 } 2220 2221 /* fixup loop pointers - from r600isa 2222 LOOP END points to CF after LOOP START, 2223 LOOP START point to CF after LOOP END 2224 BRK/CONT point to LOOP END CF 2225 */ 2226 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2227 2228 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2229 2230 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2231 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2232 } 2233 /* TODO add LOOPRET support */ 2234 fc_poplevel(ctx); 2235 callstack_decrease_current(ctx, FC_LOOP); 2236 return 0; 2237} 2238 2239static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2240{ 2241 unsigned int fscp; 2242 2243 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2244 { 2245 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2246 break; 2247 } 2248 2249 if (fscp == 0) { 2250 R600_ERR("Break not inside loop/endloop pair\n"); 2251 return -EINVAL; 2252 } 2253 2254 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2255 ctx->bc->cf_last->pop_count = 1; 2256 2257 fc_set_mid(ctx, fscp); 2258 2259 pops(ctx, 1); 2260 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2261 return 0; 2262} 2263 2264static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 2265 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl}, 2266 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2267 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2268 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 2269 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 2270 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2271 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2272 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2273 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2274 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2275 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2276 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2277 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2278 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2279 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2280 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2281 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2282 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2283 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2284 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2285 /* gap */ 2286 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2287 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2288 /* gap */ 2289 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2290 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2291 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2292 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2293 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2294 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2295 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2296 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2297 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2298 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2299 /* gap */ 2300 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2301 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2302 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2303 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2304 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2305 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2306 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2307 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2308 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2309 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2310 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2311 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2312 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2313 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2314 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2315 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2316 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2317 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2318 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2319 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2320 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2321 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2322 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2323 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2324 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2325 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2326 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2327 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2328 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2329 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2330 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2331 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2332 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2333 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2334 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2335 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2336 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2337 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2338 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2339 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2340 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2341 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2342 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2343 /* gap */ 2344 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2345 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2346 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2347 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2348 /* gap */ 2349 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2350 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2351 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2352 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2353 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2354 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2355 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2356 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 2357 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2358 /* gap */ 2359 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2360 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2361 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2362 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2363 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2364 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2365 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2366 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2367 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2368 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2369 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2370 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2371 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2372 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2373 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2374 /* gap */ 2375 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2376 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2377 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2378 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2379 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2380 /* gap */ 2381 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2382 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2383 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2384 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2385 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2386 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2387 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2388 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2389 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2390 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2391 /* gap */ 2392 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2393 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2394 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2395 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2396 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2397 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2398 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2399 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2400 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2401 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2402 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2403 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2404 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2405 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2406 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2407 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2408 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2409 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2410 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2411 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2412 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2413 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2414 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2415 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2416 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2417 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2418 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2419 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2420}; 2421