r600_shader.c revision 0e6a02d29915db2ca460206656ab517ddaf0b455
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_screen.h" 29#include "r600_context.h" 30#include "r600_shader.h" 31#include "r600_asm.h" 32#include "r600_sq.h" 33#include "r600d.h" 34#include <stdio.h> 35#include <errno.h> 36 37 38struct r600_shader_tgsi_instruction; 39 40struct r600_shader_ctx { 41 struct tgsi_shader_info info; 42 struct tgsi_parse_context parse; 43 const struct tgsi_token *tokens; 44 unsigned type; 45 unsigned file_offset[TGSI_FILE_COUNT]; 46 unsigned temp_reg; 47 struct r600_shader_tgsi_instruction *inst_info; 48 struct r600_bc *bc; 49 struct r600_shader *shader; 50 u32 value[4]; 51}; 52 53struct r600_shader_tgsi_instruction { 54 unsigned tgsi_opcode; 55 unsigned is_op3; 56 unsigned r600_opcode; 57 int (*process)(struct r600_shader_ctx *ctx); 58}; 59 60static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]; 61static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 62 63static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) 64{ 65 struct r600_context *rctx = r600_context(ctx); 66 const struct util_format_description *desc; 67 enum pipe_format resource_format[160]; 68 unsigned i, nresources = 0; 69 struct r600_bc *bc = &shader->bc; 70 struct r600_bc_cf *cf; 71 struct r600_bc_vtx *vtx; 72 73 if (shader->processor_type != TGSI_PROCESSOR_VERTEX) 74 return 0; 75 for (i = 0; i < rctx->vertex_elements->count; i++) { 76 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; 77 } 78 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 79 switch (cf->inst) { 80 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 81 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 82 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 83 desc = util_format_description(resource_format[vtx->buffer_id]); 84 if (desc == NULL) { 85 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); 86 return -EINVAL; 87 } 88 vtx->dst_sel_x = desc->swizzle[0]; 89 vtx->dst_sel_y = desc->swizzle[1]; 90 vtx->dst_sel_z = desc->swizzle[2]; 91 vtx->dst_sel_w = desc->swizzle[3]; 92 } 93 break; 94 default: 95 break; 96 } 97 } 98 return r600_bc_build(&shader->bc); 99} 100 101int r600_pipe_shader_create(struct pipe_context *ctx, 102 struct r600_context_state *rpshader, 103 const struct tgsi_token *tokens) 104{ 105 struct r600_screen *rscreen = r600_screen(ctx->screen); 106 int r; 107 108//fprintf(stderr, "--------------------------------------------------------------\n"); 109//tgsi_dump(tokens, 0); 110 if (rpshader == NULL) 111 return -ENOMEM; 112 rpshader->shader.family = radeon_get_family(rscreen->rw); 113 r = r600_shader_from_tgsi(tokens, &rpshader->shader); 114 if (r) { 115 R600_ERR("translation from TGSI failed !\n"); 116 return r; 117 } 118 r = r600_bc_build(&rpshader->shader.bc); 119 if (r) { 120 R600_ERR("building bytecode failed !\n"); 121 return r; 122 } 123//fprintf(stderr, "______________________________________________________________\n"); 124 return 0; 125} 126 127static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader) 128{ 129 struct r600_screen *rscreen = r600_screen(ctx->screen); 130 struct r600_shader *rshader = &rpshader->shader; 131 struct radeon_state *state; 132 unsigned i, tmp; 133 134 rpshader->rstate = radeon_state_decref(rpshader->rstate); 135 state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); 136 if (state == NULL) 137 return -ENOMEM; 138 for (i = 0; i < 10; i++) { 139 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; 140 } 141 /* so far never got proper semantic id from tgsi */ 142 for (i = 0; i < 32; i++) { 143 tmp = i << ((i & 3) * 8); 144 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; 145 } 146 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); 147 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); 148 rpshader->rstate = state; 149 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); 150 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); 151 rpshader->rstate->nbo = 2; 152 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; 153 rpshader->rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; 154 return radeon_state_pm4(state); 155} 156 157static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader) 158{ 159 const struct pipe_rasterizer_state *rasterizer; 160 struct r600_screen *rscreen = r600_screen(ctx->screen); 161 struct r600_shader *rshader = &rpshader->shader; 162 struct r600_context *rctx = r600_context(ctx); 163 struct radeon_state *state; 164 unsigned i, tmp, exports_ps, num_cout; 165 166 rasterizer = &rctx->rasterizer->state.rasterizer; 167 rpshader->rstate = radeon_state_decref(rpshader->rstate); 168 state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); 169 if (state == NULL) 170 return -ENOMEM; 171 for (i = 0; i < rshader->ninput; i++) { 172 tmp = S_028644_SEMANTIC(i); 173 tmp |= S_028644_SEL_CENTROID(1); 174 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || 175 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) { 176 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); 177 } 178 if (rasterizer->sprite_coord_enable & (1 << i)) { 179 tmp |= S_028644_PT_SPRITE_TEX(1); 180 } 181 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; 182 } 183 184 exports_ps = 0; 185 num_cout = 0; 186 for (i = 0; i < rshader->noutput; i++) { 187 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 188 exports_ps |= 1; 189 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { 190 exports_ps |= (1 << (num_cout+1)); 191 num_cout++; 192 } 193 } 194 if (!exports_ps) { 195 /* always at least export 1 component per pixel */ 196 exports_ps = 2; 197 } 198 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | 199 S_0286CC_PERSP_GRADIENT_ENA(1); 200 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; 201 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); 202 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; 203 rpshader->rstate = state; 204 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); 205 rpshader->rstate->nbo = 1; 206 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; 207 return radeon_state_pm4(state); 208} 209 210static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader) 211{ 212 struct r600_screen *rscreen = r600_screen(ctx->screen); 213 struct r600_context *rctx = r600_context(ctx); 214 struct r600_shader *rshader = &rpshader->shader; 215 int r; 216 217 /* copy new shader */ 218 radeon_bo_decref(rscreen->rw, rpshader->bo); 219 rpshader->bo = NULL; 220 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4, 221 4096, NULL); 222 if (rpshader->bo == NULL) { 223 return -ENOMEM; 224 } 225 radeon_bo_map(rscreen->rw, rpshader->bo); 226 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4); 227 radeon_bo_unmap(rscreen->rw, rpshader->bo); 228 /* build state */ 229 rshader->flat_shade = rctx->flat_shade; 230 switch (rshader->processor_type) { 231 case TGSI_PROCESSOR_VERTEX: 232 r = r600_pipe_shader_vs(ctx, rpshader); 233 break; 234 case TGSI_PROCESSOR_FRAGMENT: 235 r = r600_pipe_shader_ps(ctx, rpshader); 236 break; 237 default: 238 r = -EINVAL; 239 break; 240 } 241 return r; 242} 243 244int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader) 245{ 246 struct r600_context *rctx = r600_context(ctx); 247 int r; 248 249 if (rpshader == NULL) 250 return -EINVAL; 251 /* there should be enough input */ 252 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) { 253 R600_ERR("%d resources provided, expecting %d\n", 254 rctx->vertex_elements->count, rpshader->shader.bc.nresource); 255 return -EINVAL; 256 } 257 r = r600_shader_update(ctx, &rpshader->shader); 258 if (r) 259 return r; 260 return r600_pipe_shader(ctx, rpshader); 261} 262 263static int tgsi_is_supported(struct r600_shader_ctx *ctx) 264{ 265 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 266 int j; 267 268 if (i->Instruction.NumDstRegs > 1) { 269 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 270 return -EINVAL; 271 } 272 if (i->Instruction.Predicate) { 273 R600_ERR("predicate unsupported\n"); 274 return -EINVAL; 275 } 276 if (i->Instruction.Label) { 277 R600_ERR("label unsupported\n"); 278 return -EINVAL; 279 } 280 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 281 if (i->Src[j].Register.Indirect || 282 i->Src[j].Register.Dimension || 283 i->Src[j].Register.Absolute) { 284 R600_ERR("unsupported src (indirect|dimension|absolute)\n"); 285 return -EINVAL; 286 } 287 } 288 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 289 if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) { 290 R600_ERR("unsupported dst (indirect|dimension)\n"); 291 return -EINVAL; 292 } 293 } 294 return 0; 295} 296 297static int tgsi_declaration(struct r600_shader_ctx *ctx) 298{ 299 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 300 struct r600_bc_vtx vtx; 301 unsigned i; 302 int r; 303 304 switch (d->Declaration.File) { 305 case TGSI_FILE_INPUT: 306 i = ctx->shader->ninput++; 307 ctx->shader->input[i].name = d->Semantic.Name; 308 ctx->shader->input[i].sid = d->Semantic.Index; 309 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 310 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 311 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 312 /* turn input into fetch */ 313 memset(&vtx, 0, sizeof(struct r600_bc_vtx)); 314 vtx.inst = 0; 315 vtx.fetch_type = 0; 316 vtx.buffer_id = i; 317 /* register containing the index into the buffer */ 318 vtx.src_gpr = 0; 319 vtx.src_sel_x = 0; 320 vtx.mega_fetch_count = 0x1F; 321 vtx.dst_gpr = ctx->shader->input[i].gpr; 322 vtx.dst_sel_x = 0; 323 vtx.dst_sel_y = 1; 324 vtx.dst_sel_z = 2; 325 vtx.dst_sel_w = 3; 326 r = r600_bc_add_vtx(ctx->bc, &vtx); 327 if (r) 328 return r; 329 } 330 break; 331 case TGSI_FILE_OUTPUT: 332 i = ctx->shader->noutput++; 333 ctx->shader->output[i].name = d->Semantic.Name; 334 ctx->shader->output[i].sid = d->Semantic.Index; 335 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 336 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 337 break; 338 case TGSI_FILE_CONSTANT: 339 case TGSI_FILE_TEMPORARY: 340 case TGSI_FILE_SAMPLER: 341 break; 342 default: 343 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 344 return -EINVAL; 345 } 346 return 0; 347} 348 349int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 350{ 351 struct tgsi_full_immediate *immediate; 352 struct r600_shader_ctx ctx; 353 struct r600_bc_output output[32]; 354 unsigned output_done, noutput; 355 unsigned opcode; 356 int i, r = 0, pos0; 357 358 ctx.bc = &shader->bc; 359 ctx.shader = shader; 360 r = r600_bc_init(ctx.bc, shader->family); 361 if (r) 362 return r; 363 ctx.tokens = tokens; 364 tgsi_scan_shader(tokens, &ctx.info); 365 tgsi_parse_init(&ctx.parse, tokens); 366 ctx.type = ctx.parse.FullHeader.Processor.Processor; 367 shader->processor_type = ctx.type; 368 369 /* register allocations */ 370 /* Values [0,127] correspond to GPR[0..127]. 371 * Values [128,159] correspond to constant buffer bank 0 372 * Values [160,191] correspond to constant buffer bank 1 373 * Values [256,511] correspond to cfile constants c[0..255]. 374 * Other special values are shown in the list below. 375 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 376 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 377 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 378 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 379 * 248 SQ_ALU_SRC_0: special constant 0.0. 380 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 381 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 382 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 383 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 384 * 253 SQ_ALU_SRC_LITERAL: literal constant. 385 * 254 SQ_ALU_SRC_PV: previous vector result. 386 * 255 SQ_ALU_SRC_PS: previous scalar result. 387 */ 388 for (i = 0; i < TGSI_FILE_COUNT; i++) { 389 ctx.file_offset[i] = 0; 390 } 391 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 392 ctx.file_offset[TGSI_FILE_INPUT] = 1; 393 } 394 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 395 ctx.info.file_count[TGSI_FILE_INPUT]; 396 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 397 ctx.info.file_count[TGSI_FILE_OUTPUT]; 398 ctx.file_offset[TGSI_FILE_CONSTANT] = 256; 399 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 400 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 401 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 402 403 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 404 tgsi_parse_token(&ctx.parse); 405 switch (ctx.parse.FullToken.Token.Type) { 406 case TGSI_TOKEN_TYPE_IMMEDIATE: 407 immediate = &ctx.parse.FullToken.FullImmediate; 408 ctx.value[0] = immediate->u[0].Uint; 409 ctx.value[1] = immediate->u[1].Uint; 410 ctx.value[2] = immediate->u[2].Uint; 411 ctx.value[3] = immediate->u[3].Uint; 412 break; 413 case TGSI_TOKEN_TYPE_DECLARATION: 414 r = tgsi_declaration(&ctx); 415 if (r) 416 goto out_err; 417 break; 418 case TGSI_TOKEN_TYPE_INSTRUCTION: 419 r = tgsi_is_supported(&ctx); 420 if (r) 421 goto out_err; 422 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 423 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 424 r = ctx.inst_info->process(&ctx); 425 if (r) 426 goto out_err; 427 r = r600_bc_add_literal(ctx.bc, ctx.value); 428 if (r) 429 goto out_err; 430 break; 431 default: 432 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 433 r = -EINVAL; 434 goto out_err; 435 } 436 } 437 /* export output */ 438 noutput = shader->noutput; 439 for (i = 0, pos0 = 0; i < noutput; i++) { 440 memset(&output[i], 0, sizeof(struct r600_bc_output)); 441 output[i].gpr = shader->output[i].gpr; 442 output[i].elem_size = 3; 443 output[i].swizzle_x = 0; 444 output[i].swizzle_y = 1; 445 output[i].swizzle_z = 2; 446 output[i].swizzle_w = 3; 447 output[i].barrier = 1; 448 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 449 output[i].array_base = i - pos0; 450 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; 451 switch (ctx.type) { 452 case TGSI_PROCESSOR_VERTEX: 453 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 454 output[i].array_base = 60; 455 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 456 /* position doesn't count in array_base */ 457 pos0++; 458 } 459 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 460 output[i].array_base = 61; 461 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 462 /* position doesn't count in array_base */ 463 pos0++; 464 } 465 break; 466 case TGSI_PROCESSOR_FRAGMENT: 467 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 468 output[i].array_base = shader->output[i].sid; 469 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 470 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 471 output[i].array_base = 61; 472 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 473 } else { 474 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 475 r = -EINVAL; 476 goto out_err; 477 } 478 break; 479 default: 480 R600_ERR("unsupported processor type %d\n", ctx.type); 481 r = -EINVAL; 482 goto out_err; 483 } 484 } 485 /* add fake param output for vertex shader if no param is exported */ 486 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 487 for (i = 0, pos0 = 0; i < noutput; i++) { 488 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 489 pos0 = 1; 490 break; 491 } 492 } 493 if (!pos0) { 494 memset(&output[i], 0, sizeof(struct r600_bc_output)); 495 output[i].gpr = 0; 496 output[i].elem_size = 3; 497 output[i].swizzle_x = 0; 498 output[i].swizzle_y = 1; 499 output[i].swizzle_z = 2; 500 output[i].swizzle_w = 3; 501 output[i].barrier = 1; 502 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 503 output[i].array_base = 0; 504 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; 505 noutput++; 506 } 507 } 508 /* add fake pixel export */ 509 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 510 memset(&output[0], 0, sizeof(struct r600_bc_output)); 511 output[0].gpr = 0; 512 output[0].elem_size = 3; 513 output[0].swizzle_x = 7; 514 output[0].swizzle_y = 7; 515 output[0].swizzle_z = 7; 516 output[0].swizzle_w = 7; 517 output[0].barrier = 1; 518 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 519 output[0].array_base = 0; 520 output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; 521 noutput++; 522 } 523 /* set export done on last export of each type */ 524 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 525 if (i == (noutput - 1)) { 526 output[i].end_of_program = 1; 527 } 528 if (!(output_done & (1 << output[i].type))) { 529 output_done |= (1 << output[i].type); 530 output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; 531 } 532 } 533 /* add output to bytecode */ 534 for (i = 0; i < noutput; i++) { 535 r = r600_bc_add_output(ctx.bc, &output[i]); 536 if (r) 537 goto out_err; 538 } 539 tgsi_parse_free(&ctx.parse); 540 return 0; 541out_err: 542 tgsi_parse_free(&ctx.parse); 543 return r; 544} 545 546static int tgsi_unsupported(struct r600_shader_ctx *ctx) 547{ 548 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 549 return -EINVAL; 550} 551 552static int tgsi_end(struct r600_shader_ctx *ctx) 553{ 554 return 0; 555} 556 557static int tgsi_src(struct r600_shader_ctx *ctx, 558 const struct tgsi_full_src_register *tgsi_src, 559 struct r600_bc_alu_src *r600_src) 560{ 561 memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); 562 r600_src->sel = tgsi_src->Register.Index; 563 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 564 r600_src->sel = 0; 565 } 566 r600_src->neg = tgsi_src->Register.Negate; 567 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 568 return 0; 569} 570 571static int tgsi_dst(struct r600_shader_ctx *ctx, 572 const struct tgsi_full_dst_register *tgsi_dst, 573 unsigned swizzle, 574 struct r600_bc_alu_dst *r600_dst) 575{ 576 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 577 578 r600_dst->sel = tgsi_dst->Register.Index; 579 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 580 r600_dst->chan = swizzle; 581 r600_dst->write = 1; 582 if (inst->Instruction.Saturate) { 583 r600_dst->clamp = 1; 584 } 585 return 0; 586} 587 588static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) 589{ 590 switch (swizzle) { 591 case 0: 592 return tgsi_src->Register.SwizzleX; 593 case 1: 594 return tgsi_src->Register.SwizzleY; 595 case 2: 596 return tgsi_src->Register.SwizzleZ; 597 case 3: 598 return tgsi_src->Register.SwizzleW; 599 default: 600 return 0; 601 } 602} 603 604static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 605{ 606 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 607 struct r600_bc_alu alu; 608 int i, j, k, nconst, r; 609 610 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 611 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 612 nconst++; 613 } 614 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); 615 if (r) { 616 return r; 617 } 618 } 619 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 620 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) { 621 for (k = 0; k < 4; k++) { 622 memset(&alu, 0, sizeof(struct r600_bc_alu)); 623 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 624 alu.src[0].sel = r600_src[0].sel; 625 alu.src[0].chan = k; 626 alu.dst.sel = ctx->temp_reg + j; 627 alu.dst.chan = k; 628 alu.dst.write = 1; 629 if (k == 3) 630 alu.last = 1; 631 r = r600_bc_add_alu(ctx->bc, &alu); 632 if (r) 633 return r; 634 } 635 r600_src[0].sel = ctx->temp_reg + j; 636 j--; 637 } 638 } 639 return 0; 640} 641 642static int tgsi_op2(struct r600_shader_ctx *ctx) 643{ 644 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 645 struct r600_bc_alu_src r600_src[3]; 646 struct r600_bc_alu alu; 647 int i, j, r; 648 649 r = tgsi_split_constant(ctx, r600_src); 650 if (r) 651 return r; 652 for (i = 0; i < 4; i++) { 653 memset(&alu, 0, sizeof(struct r600_bc_alu)); 654 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 655 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; 656 alu.dst.chan = i; 657 } else { 658 alu.inst = ctx->inst_info->r600_opcode; 659 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 660 alu.src[j] = r600_src[j]; 661 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 662 } 663 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 664 if (r) 665 return r; 666 } 667 /* handle some special cases */ 668 switch (ctx->inst_info->tgsi_opcode) { 669 case TGSI_OPCODE_SUB: 670 alu.src[1].neg = 1; 671 break; 672 case TGSI_OPCODE_ABS: 673 alu.src[0].abs = 1; 674 break; 675 default: 676 break; 677 } 678 if (i == 3) { 679 alu.last = 1; 680 } 681 r = r600_bc_add_alu(ctx->bc, &alu); 682 if (r) 683 return r; 684 } 685 return 0; 686} 687 688/* 689 * r600 - trunc to -PI..PI range 690 * r700 - normalize by dividing by 2PI 691 * see fdo bug 27901 692 */ 693static int tgsi_trig(struct r600_shader_ctx *ctx) 694{ 695 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 696 struct r600_bc_alu_src r600_src[3]; 697 struct r600_bc_alu alu; 698 int i, r; 699 uint32_t lit_vals[4]; 700 701 memset(lit_vals, 0, 4*4); 702 r = tgsi_split_constant(ctx, r600_src); 703 if (r) 704 return r; 705 lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); 706 lit_vals[1] = fui(0.5f); 707 708 memset(&alu, 0, sizeof(struct r600_bc_alu)); 709 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; 710 alu.is_op3 = 1; 711 712 alu.dst.chan = 0; 713 alu.dst.sel = ctx->temp_reg; 714 alu.dst.write = 1; 715 716 alu.src[0] = r600_src[0]; 717 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 718 719 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 720 alu.src[1].chan = 0; 721 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 722 alu.src[2].chan = 1; 723 alu.last = 1; 724 r = r600_bc_add_alu(ctx->bc, &alu); 725 if (r) 726 return r; 727 r = r600_bc_add_literal(ctx->bc, lit_vals); 728 if (r) 729 return r; 730 731 memset(&alu, 0, sizeof(struct r600_bc_alu)); 732 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT; 733 734 alu.dst.chan = 0; 735 alu.dst.sel = ctx->temp_reg; 736 alu.dst.write = 1; 737 738 alu.src[0].sel = ctx->temp_reg; 739 alu.src[0].chan = 0; 740 alu.last = 1; 741 r = r600_bc_add_alu(ctx->bc, &alu); 742 if (r) 743 return r; 744 745 if (ctx->bc->chiprev == 0) { 746 lit_vals[0] = fui(3.1415926535897f * 2.0f); 747 lit_vals[1] = fui(-3.1415926535897f); 748 } else { 749 lit_vals[0] = fui(1.0f); 750 lit_vals[1] = fui(-0.5f); 751 } 752 753 memset(&alu, 0, sizeof(struct r600_bc_alu)); 754 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; 755 alu.is_op3 = 1; 756 757 alu.dst.chan = 0; 758 alu.dst.sel = ctx->temp_reg; 759 alu.dst.write = 1; 760 761 alu.src[0].sel = ctx->temp_reg; 762 alu.src[0].chan = 0; 763 764 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 765 alu.src[1].chan = 0; 766 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 767 alu.src[2].chan = 1; 768 alu.last = 1; 769 r = r600_bc_add_alu(ctx->bc, &alu); 770 if (r) 771 return r; 772 r = r600_bc_add_literal(ctx->bc, lit_vals); 773 if (r) 774 return r; 775 776 memset(&alu, 0, sizeof(struct r600_bc_alu)); 777 alu.inst = ctx->inst_info->r600_opcode; 778 alu.dst.chan = 0; 779 alu.dst.sel = ctx->temp_reg; 780 alu.dst.write = 1; 781 782 alu.src[0].sel = ctx->temp_reg; 783 alu.src[0].chan = 0; 784 alu.last = 1; 785 r = r600_bc_add_alu(ctx->bc, &alu); 786 if (r) 787 return r; 788 789 /* replicate result */ 790 for (i = 0; i < 4; i++) { 791 memset(&alu, 0, sizeof(struct r600_bc_alu)); 792 alu.src[0].sel = ctx->temp_reg; 793 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 794 alu.dst.chan = i; 795 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 796 if (r) 797 return r; 798 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 799 if (i == 3) 800 alu.last = 1; 801 r = r600_bc_add_alu(ctx->bc, &alu); 802 if (r) 803 return r; 804 } 805 return 0; 806} 807 808static int tgsi_kill(struct r600_shader_ctx *ctx) 809{ 810 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 811 struct r600_bc_alu alu; 812 int i, r; 813 814 for (i = 0; i < 4; i++) { 815 memset(&alu, 0, sizeof(struct r600_bc_alu)); 816 alu.inst = ctx->inst_info->r600_opcode; 817 alu.dst.chan = i; 818 alu.src[0].sel = V_SQ_ALU_SRC_0; 819 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 820 if (r) 821 return r; 822 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 823 if (i == 3) { 824 alu.last = 1; 825 } 826 r = r600_bc_add_alu(ctx->bc, &alu); 827 if (r) 828 return r; 829 } 830 return 0; 831} 832 833static int tgsi_slt(struct r600_shader_ctx *ctx) 834{ 835 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 836 struct r600_bc_alu_src r600_src[3]; 837 struct r600_bc_alu alu; 838 int i, r; 839 840 r = tgsi_split_constant(ctx, r600_src); 841 if (r) 842 return r; 843 for (i = 0; i < 4; i++) { 844 memset(&alu, 0, sizeof(struct r600_bc_alu)); 845 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 846 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; 847 alu.dst.chan = i; 848 } else { 849 alu.inst = ctx->inst_info->r600_opcode; 850 alu.src[1] = r600_src[0]; 851 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 852 alu.src[0] = r600_src[1]; 853 alu.src[0].chan = tgsi_chan(&inst->Src[1], i); 854 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 855 if (r) 856 return r; 857 } 858 if (i == 3) { 859 alu.last = 1; 860 } 861 r = r600_bc_add_alu(ctx->bc, &alu); 862 if (r) 863 return r; 864 } 865 return 0; 866} 867 868static int tgsi_lit(struct r600_shader_ctx *ctx) 869{ 870 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 871 struct r600_bc_alu alu; 872 int r; 873 874 /* dst.x, <- 1.0 */ 875 memset(&alu, 0, sizeof(struct r600_bc_alu)); 876 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 877 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 878 alu.src[0].chan = 0; 879 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 880 if (r) 881 return r; 882 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 883 r = r600_bc_add_alu(ctx->bc, &alu); 884 if (r) 885 return r; 886 887 /* dst.y = max(src.x, 0.0) */ 888 memset(&alu, 0, sizeof(struct r600_bc_alu)); 889 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX; 890 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 891 if (r) 892 return r; 893 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 894 alu.src[1].chan = tgsi_chan(&inst->Src[0], 0); 895 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 896 if (r) 897 return r; 898 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 899 r = r600_bc_add_alu(ctx->bc, &alu); 900 if (r) 901 return r; 902 903 /* dst.z = NOP - fill Z slot */ 904 memset(&alu, 0, sizeof(struct r600_bc_alu)); 905 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; 906 alu.dst.chan = 2; 907 r = r600_bc_add_alu(ctx->bc, &alu); 908 if (r) 909 return r; 910 911 /* dst.w, <- 1.0 */ 912 memset(&alu, 0, sizeof(struct r600_bc_alu)); 913 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 914 alu.src[0].sel = V_SQ_ALU_SRC_1; 915 alu.src[0].chan = 0; 916 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 917 if (r) 918 return r; 919 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 920 alu.last = 1; 921 r = r600_bc_add_alu(ctx->bc, &alu); 922 if (r) 923 return r; 924 925 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 926 { 927 int chan; 928 int sel; 929 930 /* dst.z = log(src.y) */ 931 memset(&alu, 0, sizeof(struct r600_bc_alu)); 932 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED; 933 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 934 if (r) 935 return r; 936 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 937 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 938 if (r) 939 return r; 940 alu.last = 1; 941 r = r600_bc_add_alu(ctx->bc, &alu); 942 if (r) 943 return r; 944 945 chan = alu.dst.chan; 946 sel = alu.dst.sel; 947 948 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 949 memset(&alu, 0, sizeof(struct r600_bc_alu)); 950 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; 951 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 952 if (r) 953 return r; 954 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 955 alu.src[1].sel = sel; 956 alu.src[1].chan = chan; 957 r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]); 958 if (r) 959 return r; 960 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); 961 alu.dst.sel = ctx->temp_reg; 962 alu.dst.chan = 0; 963 alu.dst.write = 1; 964 alu.is_op3 = 1; 965 alu.last = 1; 966 r = r600_bc_add_alu(ctx->bc, &alu); 967 if (r) 968 return r; 969 970 /* dst.z = exp(tmp.x) */ 971 memset(&alu, 0, sizeof(struct r600_bc_alu)); 972 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; 973 alu.src[0].sel = ctx->temp_reg; 974 alu.src[0].chan = 0; 975 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 976 if (r) 977 return r; 978 alu.last = 1; 979 r = r600_bc_add_alu(ctx->bc, &alu); 980 if (r) 981 return r; 982 } 983 return 0; 984} 985 986static int tgsi_trans(struct r600_shader_ctx *ctx) 987{ 988 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 989 struct r600_bc_alu alu; 990 int i, j, r; 991 992 for (i = 0; i < 4; i++) { 993 memset(&alu, 0, sizeof(struct r600_bc_alu)); 994 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 995 alu.inst = ctx->inst_info->r600_opcode; 996 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 997 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]); 998 if (r) 999 return r; 1000 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1001 } 1002 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1003 if (r) 1004 return r; 1005 alu.last = 1; 1006 r = r600_bc_add_alu(ctx->bc, &alu); 1007 if (r) 1008 return r; 1009 } 1010 } 1011 return 0; 1012} 1013 1014static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1015{ 1016 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1017 struct r600_bc_alu alu; 1018 int i, r; 1019 1020 for (i = 0; i < 4; i++) { 1021 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1022 alu.src[0].sel = ctx->temp_reg; 1023 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 1024 alu.dst.chan = i; 1025 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1026 if (r) 1027 return r; 1028 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1029 if (i == 3) 1030 alu.last = 1; 1031 r = r600_bc_add_alu(ctx->bc, &alu); 1032 if (r) 1033 return r; 1034 } 1035 return 0; 1036} 1037 1038static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1039{ 1040 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1041 struct r600_bc_alu alu; 1042 int i, r; 1043 1044 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1045 alu.inst = ctx->inst_info->r600_opcode; 1046 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1047 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1048 if (r) 1049 return r; 1050 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1051 } 1052 alu.dst.sel = ctx->temp_reg; 1053 alu.dst.write = 1; 1054 alu.last = 1; 1055 r = r600_bc_add_alu(ctx->bc, &alu); 1056 if (r) 1057 return r; 1058 /* replicate result */ 1059 return tgsi_helper_tempx_replicate(ctx); 1060} 1061 1062static int tgsi_pow(struct r600_shader_ctx *ctx) 1063{ 1064 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1065 struct r600_bc_alu alu; 1066 int r; 1067 1068 /* LOG2(a) */ 1069 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1070 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE; 1071 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1072 if (r) 1073 return r; 1074 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1075 alu.dst.sel = ctx->temp_reg; 1076 alu.dst.write = 1; 1077 alu.last = 1; 1078 r = r600_bc_add_alu(ctx->bc, &alu); 1079 if (r) 1080 return r; 1081 /* b * LOG2(a) */ 1082 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1083 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE; 1084 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); 1085 if (r) 1086 return r; 1087 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); 1088 alu.src[1].sel = ctx->temp_reg; 1089 alu.dst.sel = ctx->temp_reg; 1090 alu.dst.write = 1; 1091 alu.last = 1; 1092 r = r600_bc_add_alu(ctx->bc, &alu); 1093 if (r) 1094 return r; 1095 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1096 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1097 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; 1098 alu.src[0].sel = ctx->temp_reg; 1099 alu.dst.sel = ctx->temp_reg; 1100 alu.dst.write = 1; 1101 alu.last = 1; 1102 r = r600_bc_add_alu(ctx->bc, &alu); 1103 if (r) 1104 return r; 1105 return tgsi_helper_tempx_replicate(ctx); 1106} 1107 1108static int tgsi_ssg(struct r600_shader_ctx *ctx) 1109{ 1110 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1111 struct r600_bc_alu alu; 1112 struct r600_bc_alu_src r600_src[3]; 1113 int i, r; 1114 1115 r = tgsi_split_constant(ctx, r600_src); 1116 if (r) 1117 return r; 1118 1119 /* tmp = (src > 0 ? 1 : src) */ 1120 for (i = 0; i < 4; i++) { 1121 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1122 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT; 1123 alu.is_op3 = 1; 1124 alu.dst.sel = ctx->temp_reg; 1125 alu.dst.write = 1; 1126 1127 alu.src[0] = r600_src[0]; 1128 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1129 1130 alu.src[1].sel = V_SQ_ALU_SRC_1; 1131 1132 alu.src[2] = r600_src[0]; 1133 alu.src[2].chan = tgsi_chan(&inst->Src[0], i); 1134 if (i == 3) 1135 alu.last = 1; 1136 r = r600_bc_add_alu(ctx->bc, &alu); 1137 if (r) 1138 return r; 1139 } 1140 1141 /* dst = (-tmp > 0 ? -1 : tmp) */ 1142 for (i = 0; i < 4; i++) { 1143 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1144 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT; 1145 alu.is_op3 = 1; 1146 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1147 if (r) 1148 return r; 1149 1150 alu.src[0].sel = ctx->temp_reg; 1151 alu.src[0].neg = 1; 1152 1153 alu.src[1].sel = V_SQ_ALU_SRC_1; 1154 alu.src[1].neg = 1; 1155 1156 alu.src[2].sel = ctx->temp_reg; 1157 1158 alu.dst.write = 1; 1159 if (i == 3) 1160 alu.last = 1; 1161 r = r600_bc_add_alu(ctx->bc, &alu); 1162 if (r) 1163 return r; 1164 } 1165 return 0; 1166} 1167 1168static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1169{ 1170 struct r600_bc_alu alu; 1171 int i, r; 1172 1173 r = r600_bc_add_literal(ctx->bc, ctx->value); 1174 if (r) 1175 return r; 1176 for (i = 0; i < 4; i++) { 1177 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1178 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1179 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; 1180 alu.dst.chan = i; 1181 } else { 1182 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 1183 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1184 if (r) 1185 return r; 1186 alu.src[0].sel = ctx->temp_reg; 1187 alu.src[0].chan = i; 1188 } 1189 if (i == 3) { 1190 alu.last = 1; 1191 } 1192 r = r600_bc_add_alu(ctx->bc, &alu); 1193 if (r) 1194 return r; 1195 } 1196 return 0; 1197} 1198 1199static int tgsi_op3(struct r600_shader_ctx *ctx) 1200{ 1201 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1202 struct r600_bc_alu_src r600_src[3]; 1203 struct r600_bc_alu alu; 1204 int i, j, r; 1205 1206 r = tgsi_split_constant(ctx, r600_src); 1207 if (r) 1208 return r; 1209 /* do it in 2 step as op3 doesn't support writemask */ 1210 for (i = 0; i < 4; i++) { 1211 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1212 alu.inst = ctx->inst_info->r600_opcode; 1213 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1214 alu.src[j] = r600_src[j]; 1215 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1216 } 1217 alu.dst.sel = ctx->temp_reg; 1218 alu.dst.chan = i; 1219 alu.dst.write = 1; 1220 alu.is_op3 = 1; 1221 if (i == 3) { 1222 alu.last = 1; 1223 } 1224 r = r600_bc_add_alu(ctx->bc, &alu); 1225 if (r) 1226 return r; 1227 } 1228 return tgsi_helper_copy(ctx, inst); 1229} 1230 1231static int tgsi_dp(struct r600_shader_ctx *ctx) 1232{ 1233 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1234 struct r600_bc_alu_src r600_src[3]; 1235 struct r600_bc_alu alu; 1236 int i, j, r; 1237 1238 r = tgsi_split_constant(ctx, r600_src); 1239 if (r) 1240 return r; 1241 for (i = 0; i < 4; i++) { 1242 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1243 alu.inst = ctx->inst_info->r600_opcode; 1244 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1245 alu.src[j] = r600_src[j]; 1246 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1247 } 1248 alu.dst.sel = ctx->temp_reg; 1249 alu.dst.chan = i; 1250 alu.dst.write = 1; 1251 /* handle some special cases */ 1252 switch (ctx->inst_info->tgsi_opcode) { 1253 case TGSI_OPCODE_DP2: 1254 if (i > 1) { 1255 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1256 alu.src[0].chan = alu.src[1].chan = 0; 1257 } 1258 break; 1259 case TGSI_OPCODE_DP3: 1260 if (i > 2) { 1261 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1262 alu.src[0].chan = alu.src[1].chan = 0; 1263 } 1264 break; 1265 default: 1266 break; 1267 } 1268 if (i == 3) { 1269 alu.last = 1; 1270 } 1271 r = r600_bc_add_alu(ctx->bc, &alu); 1272 if (r) 1273 return r; 1274 } 1275 return tgsi_helper_copy(ctx, inst); 1276} 1277 1278static int tgsi_tex(struct r600_shader_ctx *ctx) 1279{ 1280 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1281 struct r600_bc_tex tex; 1282 struct r600_bc_alu alu; 1283 unsigned src_gpr; 1284 int r, i; 1285 1286 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1287 1288 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1289 /* Add perspective divide */ 1290 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1291 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; 1292 alu.src[0].sel = src_gpr; 1293 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1294 alu.dst.sel = ctx->temp_reg; 1295 alu.dst.chan = 3; 1296 alu.last = 1; 1297 alu.dst.write = 1; 1298 r = r600_bc_add_alu(ctx->bc, &alu); 1299 if (r) 1300 return r; 1301 1302 for (i = 0; i < 3; i++) { 1303 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1304 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; 1305 alu.src[0].sel = ctx->temp_reg; 1306 alu.src[0].chan = 3; 1307 alu.src[1].sel = src_gpr; 1308 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1309 alu.dst.sel = ctx->temp_reg; 1310 alu.dst.chan = i; 1311 alu.dst.write = 1; 1312 r = r600_bc_add_alu(ctx->bc, &alu); 1313 if (r) 1314 return r; 1315 } 1316 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1317 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 1318 alu.src[0].sel = V_SQ_ALU_SRC_1; 1319 alu.src[0].chan = 0; 1320 alu.dst.sel = ctx->temp_reg; 1321 alu.dst.chan = 3; 1322 alu.last = 1; 1323 alu.dst.write = 1; 1324 r = r600_bc_add_alu(ctx->bc, &alu); 1325 if (r) 1326 return r; 1327 src_gpr = ctx->temp_reg; 1328 } else if (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY) { 1329 for (i = 0; i < 4; i++) { 1330 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1331 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 1332 alu.src[0].sel = src_gpr; 1333 alu.src[0].chan = i; 1334 alu.dst.sel = ctx->temp_reg; 1335 alu.dst.chan = i; 1336 if (i == 3) 1337 alu.last = 1; 1338 alu.dst.write = 1; 1339 r = r600_bc_add_alu(ctx->bc, &alu); 1340 if (r) 1341 return r; 1342 } 1343 src_gpr = ctx->temp_reg; 1344 } 1345 1346 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1347 tex.inst = ctx->inst_info->r600_opcode; 1348 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1349 tex.sampler_id = tex.resource_id; 1350 tex.src_gpr = src_gpr; 1351 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1352 tex.dst_sel_x = 0; 1353 tex.dst_sel_y = 1; 1354 tex.dst_sel_z = 2; 1355 tex.dst_sel_w = 3; 1356 tex.src_sel_x = 0; 1357 tex.src_sel_y = 1; 1358 tex.src_sel_z = 2; 1359 tex.src_sel_w = 3; 1360 1361 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1362 tex.coord_type_x = 1; 1363 tex.coord_type_y = 1; 1364 tex.coord_type_z = 1; 1365 tex.coord_type_w = 1; 1366 } 1367 return r600_bc_add_tex(ctx->bc, &tex); 1368} 1369 1370static int tgsi_lrp(struct r600_shader_ctx *ctx) 1371{ 1372 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1373 struct r600_bc_alu_src r600_src[3]; 1374 struct r600_bc_alu alu; 1375 unsigned i; 1376 int r; 1377 1378 r = tgsi_split_constant(ctx, r600_src); 1379 if (r) 1380 return r; 1381 /* 1 - src0 */ 1382 for (i = 0; i < 4; i++) { 1383 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1384 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD; 1385 alu.src[0].sel = V_SQ_ALU_SRC_1; 1386 alu.src[0].chan = 0; 1387 alu.src[1] = r600_src[0]; 1388 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1389 alu.src[1].neg = 1; 1390 alu.dst.sel = ctx->temp_reg; 1391 alu.dst.chan = i; 1392 if (i == 3) { 1393 alu.last = 1; 1394 } 1395 alu.dst.write = 1; 1396 r = r600_bc_add_alu(ctx->bc, &alu); 1397 if (r) 1398 return r; 1399 } 1400 r = r600_bc_add_literal(ctx->bc, ctx->value); 1401 if (r) 1402 return r; 1403 1404 /* (1 - src0) * src2 */ 1405 for (i = 0; i < 4; i++) { 1406 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1407 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; 1408 alu.src[0].sel = ctx->temp_reg; 1409 alu.src[0].chan = i; 1410 alu.src[1] = r600_src[2]; 1411 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 1412 alu.dst.sel = ctx->temp_reg; 1413 alu.dst.chan = i; 1414 if (i == 3) { 1415 alu.last = 1; 1416 } 1417 alu.dst.write = 1; 1418 r = r600_bc_add_alu(ctx->bc, &alu); 1419 if (r) 1420 return r; 1421 } 1422 r = r600_bc_add_literal(ctx->bc, ctx->value); 1423 if (r) 1424 return r; 1425 1426 /* src0 * src1 + (1 - src0) * src2 */ 1427 for (i = 0; i < 4; i++) { 1428 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1429 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; 1430 alu.is_op3 = 1; 1431 alu.src[0] = r600_src[0]; 1432 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1433 alu.src[1] = r600_src[1]; 1434 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 1435 alu.src[2].sel = ctx->temp_reg; 1436 alu.src[2].chan = i; 1437 alu.dst.sel = ctx->temp_reg; 1438 alu.dst.chan = i; 1439 if (i == 3) { 1440 alu.last = 1; 1441 } 1442 r = r600_bc_add_alu(ctx->bc, &alu); 1443 if (r) 1444 return r; 1445 } 1446 return tgsi_helper_copy(ctx, inst); 1447} 1448 1449static int tgsi_cmp(struct r600_shader_ctx *ctx) 1450{ 1451 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1452 struct r600_bc_alu_src r600_src[3]; 1453 struct r600_bc_alu alu; 1454 int use_temp = 0; 1455 int i, r; 1456 1457 r = tgsi_split_constant(ctx, r600_src); 1458 if (r) 1459 return r; 1460 1461 if (inst->Dst[0].Register.WriteMask != 0xf) 1462 use_temp = 1; 1463 1464 for (i = 0; i < 4; i++) { 1465 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1466 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE; 1467 alu.src[0] = r600_src[0]; 1468 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1469 1470 alu.src[1] = r600_src[2]; 1471 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 1472 1473 alu.src[2] = r600_src[1]; 1474 alu.src[2].chan = tgsi_chan(&inst->Src[1], i); 1475 1476 if (use_temp) 1477 alu.dst.sel = ctx->temp_reg; 1478 else { 1479 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1480 if (r) 1481 return r; 1482 } 1483 alu.dst.chan = i; 1484 alu.dst.write = 1; 1485 alu.is_op3 = 1; 1486 if (i == 3) 1487 alu.last = 1; 1488 r = r600_bc_add_alu(ctx->bc, &alu); 1489 if (r) 1490 return r; 1491 } 1492 if (use_temp) 1493 return tgsi_helper_copy(ctx, inst); 1494 return 0; 1495} 1496 1497static int tgsi_xpd(struct r600_shader_ctx *ctx) 1498{ 1499 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1500 struct r600_bc_alu_src r600_src[3]; 1501 struct r600_bc_alu alu; 1502 uint32_t use_temp = 0; 1503 int i, r; 1504 1505 if (inst->Dst[0].Register.WriteMask != 0xf) 1506 use_temp = 1; 1507 1508 r = tgsi_split_constant(ctx, r600_src); 1509 if (r) 1510 return r; 1511 1512 for (i = 0; i < 4; i++) { 1513 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1514 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; 1515 1516 alu.src[0] = r600_src[0]; 1517 switch (i) { 1518 case 0: 1519 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 1520 break; 1521 case 1: 1522 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1523 break; 1524 case 2: 1525 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1526 break; 1527 case 3: 1528 alu.src[0].sel = V_SQ_ALU_SRC_0; 1529 alu.src[0].chan = i; 1530 } 1531 1532 alu.src[1] = r600_src[1]; 1533 switch (i) { 1534 case 0: 1535 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 1536 break; 1537 case 1: 1538 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 1539 break; 1540 case 2: 1541 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 1542 break; 1543 case 3: 1544 alu.src[1].sel = V_SQ_ALU_SRC_0; 1545 alu.src[1].chan = i; 1546 } 1547 1548 alu.dst.sel = ctx->temp_reg; 1549 alu.dst.chan = i; 1550 alu.dst.write = 1; 1551 1552 if (i == 3) 1553 alu.last = 1; 1554 r = r600_bc_add_alu(ctx->bc, &alu); 1555 if (r) 1556 return r; 1557 } 1558 1559 for (i = 0; i < 4; i++) { 1560 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1561 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; 1562 1563 alu.src[0] = r600_src[0]; 1564 switch (i) { 1565 case 0: 1566 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1567 break; 1568 case 1: 1569 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 1570 break; 1571 case 2: 1572 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1573 break; 1574 case 3: 1575 alu.src[0].sel = V_SQ_ALU_SRC_0; 1576 alu.src[0].chan = i; 1577 } 1578 1579 alu.src[1] = r600_src[1]; 1580 switch (i) { 1581 case 0: 1582 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 1583 break; 1584 case 1: 1585 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 1586 break; 1587 case 2: 1588 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 1589 break; 1590 case 3: 1591 alu.src[1].sel = V_SQ_ALU_SRC_0; 1592 alu.src[1].chan = i; 1593 } 1594 1595 alu.src[2].sel = ctx->temp_reg; 1596 alu.src[2].neg = 1; 1597 alu.src[2].chan = i; 1598 1599 if (use_temp) 1600 alu.dst.sel = ctx->temp_reg; 1601 else { 1602 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1603 if (r) 1604 return r; 1605 } 1606 alu.dst.chan = i; 1607 alu.dst.write = 1; 1608 alu.is_op3 = 1; 1609 if (i == 3) 1610 alu.last = 1; 1611 r = r600_bc_add_alu(ctx->bc, &alu); 1612 if (r) 1613 return r; 1614 } 1615 if (use_temp) 1616 return tgsi_helper_copy(ctx, inst); 1617 return 0; 1618} 1619 1620 1621static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 1622 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1623 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 1624 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 1625 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 1626 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 1627 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1628 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1629 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 1630 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 1631 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 1632 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 1633 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1634 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 1635 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 1636 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt}, 1637 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 1638 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 1639 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 1640 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 1641 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1642 /* gap */ 1643 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1644 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1645 /* gap */ 1646 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1647 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1648 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 1649 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1650 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 1651 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1652 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 1653 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 1654 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 1655 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 1656 /* gap */ 1657 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1658 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 1659 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1660 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1661 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 1662 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 1663 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 1664 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */ 1665 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1666 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1667 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1668 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1669 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1670 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 1671 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1672 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 1673 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 1674 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_slt}, 1675 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 1676 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1677 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 1678 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1679 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 1680 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1681 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1682 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1683 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1684 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1685 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1686 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1687 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1688 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1689 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1690 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 1691 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 1692 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1693 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 1694 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1695 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1696 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 1697 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1698 {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1699 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1700 /* gap */ 1701 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1702 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1703 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1704 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1705 /* gap */ 1706 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1707 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1708 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1709 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1710 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1711 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1712 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1713 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 1714 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1715 /* gap */ 1716 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1717 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1718 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1719 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1720 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1721 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1722 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1723 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1724 {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1725 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1726 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1727 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1728 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1729 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1730 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1731 /* gap */ 1732 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1733 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1734 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1735 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1736 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1737 /* gap */ 1738 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1739 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1740 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1741 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1742 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1743 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1744 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1745 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1746 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 1747 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 1748 /* gap */ 1749 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1750 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1751 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1752 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1753 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1754 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1755 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1756 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1757 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1758 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1759 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1760 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1761 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1762 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1763 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1764 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1765 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1766 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1767 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1768 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1769 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1770 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1771 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1772 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1773 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1774 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1775 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1776 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1777}; 1778