r600_shader.c revision f031817450fe75d3224f767d79938813287ac445
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_screen.h" 29#include "r600_context.h" 30#include "r600_shader.h" 31#include "r600_asm.h" 32#include "r600_sq.h" 33#include "r600d.h" 34#include <stdio.h> 35#include <errno.h> 36 37 38struct r600_shader_tgsi_instruction; 39 40struct r600_shader_ctx { 41 struct tgsi_shader_info info; 42 struct tgsi_parse_context parse; 43 const struct tgsi_token *tokens; 44 unsigned type; 45 unsigned file_offset[TGSI_FILE_COUNT]; 46 unsigned temp_reg; 47 struct r600_shader_tgsi_instruction *inst_info; 48 struct r600_bc *bc; 49 struct r600_shader *shader; 50 u32 value[4]; 51}; 52 53struct r600_shader_tgsi_instruction { 54 unsigned tgsi_opcode; 55 unsigned is_op3; 56 unsigned r600_opcode; 57 int (*process)(struct r600_shader_ctx *ctx); 58}; 59 60static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]; 61static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 62 63static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) 64{ 65 struct r600_context *rctx = r600_context(ctx); 66 const struct util_format_description *desc; 67 enum pipe_format resource_format[160]; 68 unsigned i, nresources = 0; 69 struct r600_bc *bc = &shader->bc; 70 struct r600_bc_cf *cf; 71 struct r600_bc_vtx *vtx; 72 73 if (shader->processor_type != TGSI_PROCESSOR_VERTEX) 74 return 0; 75 for (i = 0; i < rctx->vertex_elements->count; i++) { 76 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; 77 } 78 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 79 switch (cf->inst) { 80 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 81 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 82 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 83 desc = util_format_description(resource_format[vtx->buffer_id]); 84 if (desc == NULL) { 85 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); 86 return -EINVAL; 87 } 88 vtx->dst_sel_x = desc->swizzle[0]; 89 vtx->dst_sel_y = desc->swizzle[1]; 90 vtx->dst_sel_z = desc->swizzle[2]; 91 vtx->dst_sel_w = desc->swizzle[3]; 92 } 93 break; 94 default: 95 break; 96 } 97 } 98 return r600_bc_build(&shader->bc); 99} 100 101int r600_pipe_shader_create(struct pipe_context *ctx, 102 struct r600_context_state *rpshader, 103 const struct tgsi_token *tokens) 104{ 105 struct r600_screen *rscreen = r600_screen(ctx->screen); 106 int r; 107 108fprintf(stderr, "--------------------------------------------------------------\n"); 109tgsi_dump(tokens, 0); 110 if (rpshader == NULL) 111 return -ENOMEM; 112 rpshader->shader.family = radeon_get_family(rscreen->rw); 113 r = r600_shader_from_tgsi(tokens, &rpshader->shader); 114 if (r) { 115 R600_ERR("translation from TGSI failed !\n"); 116 return r; 117 } 118 r = r600_bc_build(&rpshader->shader.bc); 119 if (r) { 120 R600_ERR("building bytecode failed !\n"); 121 return r; 122 } 123fprintf(stderr, "______________________________________________________________\n"); 124 return 0; 125} 126 127static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader) 128{ 129 struct r600_screen *rscreen = r600_screen(ctx->screen); 130 struct r600_shader *rshader = &rpshader->shader; 131 struct radeon_state *state; 132 unsigned i, tmp; 133 134 rpshader->rstate = radeon_state_decref(rpshader->rstate); 135 state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); 136 if (state == NULL) 137 return -ENOMEM; 138 for (i = 0; i < 10; i++) { 139 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; 140 } 141 /* so far never got proper semantic id from tgsi */ 142 for (i = 0; i < 32; i++) { 143 tmp = i << ((i & 3) * 8); 144 state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; 145 } 146 state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); 147 state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); 148 rpshader->rstate = state; 149 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); 150 rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); 151 rpshader->rstate->nbo = 2; 152 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; 153 return radeon_state_pm4(state); 154} 155 156static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader) 157{ 158 struct r600_screen *rscreen = r600_screen(ctx->screen); 159 struct r600_shader *rshader = &rpshader->shader; 160 struct radeon_state *state; 161 unsigned i, tmp; 162 163 rpshader->rstate = radeon_state_decref(rpshader->rstate); 164 state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); 165 if (state == NULL) 166 return -ENOMEM; 167 for (i = 0; i < rshader->ninput; i++) { 168 tmp = S_028644_SEMANTIC(i); 169 tmp |= S_028644_SEL_CENTROID(1); 170 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || 171 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) { 172 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); 173 } 174 state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp; 175 } 176 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | 177 S_0286CC_PERSP_GRADIENT_ENA(1); 178 state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; 179 state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); 180 state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; 181 rpshader->rstate = state; 182 rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); 183 rpshader->rstate->nbo = 1; 184 rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; 185 return radeon_state_pm4(state); 186} 187 188static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader) 189{ 190 struct r600_screen *rscreen = r600_screen(ctx->screen); 191 struct r600_context *rctx = r600_context(ctx); 192 struct r600_shader *rshader = &rpshader->shader; 193 int r; 194 195 /* copy new shader */ 196 radeon_bo_decref(rscreen->rw, rpshader->bo); 197 rpshader->bo = NULL; 198 rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4, 199 4096, NULL); 200 if (rpshader->bo == NULL) { 201 return -ENOMEM; 202 } 203 radeon_bo_map(rscreen->rw, rpshader->bo); 204 memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4); 205 radeon_bo_unmap(rscreen->rw, rpshader->bo); 206 /* build state */ 207 rshader->flat_shade = rctx->flat_shade; 208 switch (rshader->processor_type) { 209 case TGSI_PROCESSOR_VERTEX: 210 r = r600_pipe_shader_vs(ctx, rpshader); 211 break; 212 case TGSI_PROCESSOR_FRAGMENT: 213 r = r600_pipe_shader_ps(ctx, rpshader); 214 break; 215 default: 216 r = -EINVAL; 217 break; 218 } 219 return r; 220} 221 222int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader) 223{ 224 struct r600_context *rctx = r600_context(ctx); 225 int r; 226 227 if (rpshader == NULL) 228 return -EINVAL; 229 /* there should be enough input */ 230 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) { 231 R600_ERR("%d resources provided, expecting %d\n", 232 rctx->vertex_elements->count, rpshader->shader.bc.nresource); 233 return -EINVAL; 234 } 235 r = r600_shader_update(ctx, &rpshader->shader); 236 if (r) 237 return r; 238 return r600_pipe_shader(ctx, rpshader); 239} 240 241static int tgsi_is_supported(struct r600_shader_ctx *ctx) 242{ 243 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 244 int j; 245 246 if (i->Instruction.NumDstRegs > 1) { 247 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 248 return -EINVAL; 249 } 250 if (i->Instruction.Predicate) { 251 R600_ERR("predicate unsupported\n"); 252 return -EINVAL; 253 } 254 if (i->Instruction.Label) { 255 R600_ERR("label unsupported\n"); 256 return -EINVAL; 257 } 258 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 259 if (i->Src[j].Register.Indirect || 260 i->Src[j].Register.Dimension || 261 i->Src[j].Register.Absolute) { 262 R600_ERR("unsupported src (indirect|dimension|absolute)\n"); 263 return -EINVAL; 264 } 265 } 266 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 267 if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) { 268 R600_ERR("unsupported dst (indirect|dimension)\n"); 269 return -EINVAL; 270 } 271 } 272 return 0; 273} 274 275static int tgsi_declaration(struct r600_shader_ctx *ctx) 276{ 277 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 278 struct r600_bc_vtx vtx; 279 unsigned i; 280 int r; 281 282 switch (d->Declaration.File) { 283 case TGSI_FILE_INPUT: 284 i = ctx->shader->ninput++; 285 ctx->shader->input[i].name = d->Semantic.Name; 286 ctx->shader->input[i].sid = d->Semantic.Index; 287 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 288 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 289 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 290 /* turn input into fetch */ 291 memset(&vtx, 0, sizeof(struct r600_bc_vtx)); 292 vtx.inst = 0; 293 vtx.fetch_type = 0; 294 vtx.buffer_id = i; 295 /* register containing the index into the buffer */ 296 vtx.src_gpr = 0; 297 vtx.src_sel_x = 0; 298 vtx.mega_fetch_count = 0x1F; 299 vtx.dst_gpr = ctx->shader->input[i].gpr; 300 vtx.dst_sel_x = 0; 301 vtx.dst_sel_y = 1; 302 vtx.dst_sel_z = 2; 303 vtx.dst_sel_w = 3; 304 r = r600_bc_add_vtx(ctx->bc, &vtx); 305 if (r) 306 return r; 307 } 308 break; 309 case TGSI_FILE_OUTPUT: 310 i = ctx->shader->noutput++; 311 ctx->shader->output[i].name = d->Semantic.Name; 312 ctx->shader->output[i].sid = d->Semantic.Index; 313 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 314 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 315 break; 316 case TGSI_FILE_CONSTANT: 317 case TGSI_FILE_TEMPORARY: 318 case TGSI_FILE_SAMPLER: 319 break; 320 default: 321 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 322 return -EINVAL; 323 } 324 return 0; 325} 326 327int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 328{ 329 struct tgsi_full_immediate *immediate; 330 struct r600_shader_ctx ctx; 331 struct r600_bc_output output; 332 unsigned opcode; 333 int i, r = 0, pos0; 334 335 ctx.bc = &shader->bc; 336 ctx.shader = shader; 337 r = r600_bc_init(ctx.bc, shader->family); 338 if (r) 339 return r; 340 ctx.tokens = tokens; 341 tgsi_scan_shader(tokens, &ctx.info); 342 tgsi_parse_init(&ctx.parse, tokens); 343 ctx.type = ctx.parse.FullHeader.Processor.Processor; 344 shader->processor_type = ctx.type; 345 346 /* register allocations */ 347 /* Values [0,127] correspond to GPR[0..127]. 348 * Values [256,511] correspond to cfile constants c[0..255]. 349 * Other special values are shown in the list below. 350 * 248 SQ_ALU_SRC_0: special constant 0.0. 351 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 352 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 353 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 354 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 355 * 253 SQ_ALU_SRC_LITERAL: literal constant. 356 * 254 SQ_ALU_SRC_PV: previous vector result. 357 * 255 SQ_ALU_SRC_PS: previous scalar result. 358 */ 359 for (i = 0; i < TGSI_FILE_COUNT; i++) { 360 ctx.file_offset[i] = 0; 361 } 362 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 363 ctx.file_offset[TGSI_FILE_INPUT] = 1; 364 } 365 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 366 ctx.info.file_count[TGSI_FILE_INPUT]; 367 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 368 ctx.info.file_count[TGSI_FILE_OUTPUT]; 369 ctx.file_offset[TGSI_FILE_CONSTANT] = 256; 370 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 371 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 372 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 373 374 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 375 tgsi_parse_token(&ctx.parse); 376 switch (ctx.parse.FullToken.Token.Type) { 377 case TGSI_TOKEN_TYPE_IMMEDIATE: 378 immediate = &ctx.parse.FullToken.FullImmediate; 379 ctx.value[0] = immediate->u[0].Uint; 380 ctx.value[1] = immediate->u[1].Uint; 381 ctx.value[2] = immediate->u[2].Uint; 382 ctx.value[3] = immediate->u[3].Uint; 383 break; 384 case TGSI_TOKEN_TYPE_DECLARATION: 385 r = tgsi_declaration(&ctx); 386 if (r) 387 goto out_err; 388 break; 389 case TGSI_TOKEN_TYPE_INSTRUCTION: 390 r = tgsi_is_supported(&ctx); 391 if (r) 392 goto out_err; 393 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 394 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 395 r = ctx.inst_info->process(&ctx); 396 if (r) 397 goto out_err; 398 r = r600_bc_add_literal(ctx.bc, ctx.value); 399 if (r) 400 goto out_err; 401 break; 402 default: 403 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 404 r = -EINVAL; 405 goto out_err; 406 } 407 } 408 /* export output */ 409 for (i = 0, pos0 = 0; i < shader->noutput; i++) { 410 memset(&output, 0, sizeof(struct r600_bc_output)); 411 output.gpr = shader->output[i].gpr; 412 output.elem_size = 3; 413 output.swizzle_x = 0; 414 output.swizzle_y = 1; 415 output.swizzle_z = 2; 416 output.swizzle_w = 3; 417 output.barrier = 1; 418 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 419 output.array_base = i - pos0; 420 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE; 421 switch (ctx.type == TGSI_PROCESSOR_VERTEX) { 422 case TGSI_PROCESSOR_VERTEX: 423 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 424 output.array_base = 60; 425 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 426 /* position doesn't count in array_base */ 427 pos0 = 1; 428 } 429 break; 430 case TGSI_PROCESSOR_FRAGMENT: 431 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 432 output.array_base = 0; 433 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 434 } else { 435 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 436 r = -EINVAL; 437 goto out_err; 438 } 439 break; 440 default: 441 R600_ERR("unsupported processor type %d\n", ctx.type); 442 r = -EINVAL; 443 goto out_err; 444 } 445 if (i == (shader->noutput - 1)) { 446 output.end_of_program = 1; 447 } 448 r = r600_bc_add_output(ctx.bc, &output); 449 if (r) 450 goto out_err; 451 } 452 tgsi_parse_free(&ctx.parse); 453 return 0; 454out_err: 455 tgsi_parse_free(&ctx.parse); 456 return r; 457} 458 459static int tgsi_unsupported(struct r600_shader_ctx *ctx) 460{ 461 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 462 return -EINVAL; 463} 464 465static int tgsi_end(struct r600_shader_ctx *ctx) 466{ 467 return 0; 468} 469 470static int tgsi_src(struct r600_shader_ctx *ctx, 471 const struct tgsi_full_src_register *tgsi_src, 472 unsigned swizzle, 473 struct r600_bc_alu_src *r600_src) 474{ 475 r600_src->sel = tgsi_src->Register.Index; 476 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 477 r600_src->sel = 0; 478 } 479 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 480 switch (swizzle) { 481 case 0: 482 r600_src->chan = tgsi_src->Register.SwizzleX; 483 break; 484 case 1: 485 r600_src->chan = tgsi_src->Register.SwizzleY; 486 break; 487 case 2: 488 r600_src->chan = tgsi_src->Register.SwizzleZ; 489 break; 490 case 3: 491 r600_src->chan = tgsi_src->Register.SwizzleW; 492 break; 493 default: 494 return -EINVAL; 495 } 496 return 0; 497} 498 499static int tgsi_dst(struct r600_shader_ctx *ctx, 500 const struct tgsi_full_dst_register *tgsi_dst, 501 unsigned swizzle, 502 struct r600_bc_alu_dst *r600_dst) 503{ 504 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 505 506 r600_dst->sel = tgsi_dst->Register.Index; 507 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 508 r600_dst->chan = swizzle; 509 r600_dst->write = 1; 510 if (inst->Instruction.Saturate) { 511 r600_dst->clamp = 1; 512 } 513 return 0; 514} 515 516static int tgsi_op2(struct r600_shader_ctx *ctx) 517{ 518 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 519 struct r600_bc_alu alu; 520 int i, j, r; 521 522 for (i = 0; i < 4; i++) { 523 memset(&alu, 0, sizeof(struct r600_bc_alu)); 524 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 525 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; 526 alu.dst.chan = i; 527 } else { 528 alu.inst = ctx->inst_info->r600_opcode; 529 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 530 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); 531 if (r) 532 return r; 533 } 534 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 535 if (r) 536 return r; 537 } 538 /* handle some special cases */ 539 switch (ctx->inst_info->tgsi_opcode) { 540 case TGSI_OPCODE_SUB: 541 alu.src[1].neg = 1; 542 break; 543 case TGSI_OPCODE_ABS: 544 alu.src[0].abs = 1; 545 break; 546 default: 547 break; 548 } 549 if (i == 3) { 550 alu.last = 1; 551 } 552 r = r600_bc_add_alu(ctx->bc, &alu); 553 if (r) 554 return r; 555 } 556 return 0; 557} 558 559static int tgsi_kill(struct r600_shader_ctx *ctx) 560{ 561 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 562 struct r600_bc_alu alu; 563 int i, r; 564 565 for (i = 0; i < 4; i++) { 566 memset(&alu, 0, sizeof(struct r600_bc_alu)); 567 alu.inst = ctx->inst_info->r600_opcode; 568 alu.dst.chan = i; 569 alu.src[0].sel = 248; 570 r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); 571 if (r) 572 return r; 573 if (i == 3) { 574 alu.last = 1; 575 } 576 r = r600_bc_add_alu(ctx->bc, &alu); 577 if (r) 578 return r; 579 } 580 return 0; 581} 582 583static int tgsi_slt(struct r600_shader_ctx *ctx) 584{ 585 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 586 struct r600_bc_alu alu; 587 int i, r; 588 589 for (i = 0; i < 4; i++) { 590 memset(&alu, 0, sizeof(struct r600_bc_alu)); 591 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 592 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; 593 alu.dst.chan = i; 594 } else { 595 alu.inst = ctx->inst_info->r600_opcode; 596 r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); 597 if (r) 598 return r; 599 r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[0]); 600 if (r) 601 return r; 602 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 603 if (r) 604 return r; 605 } 606 if (i == 3) { 607 alu.last = 1; 608 } 609 r = r600_bc_add_alu(ctx->bc, &alu); 610 if (r) 611 return r; 612 } 613 return 0; 614} 615 616static int tgsi_lit(struct r600_shader_ctx *ctx) 617{ 618 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 619 struct r600_bc_alu alu; 620 int r; 621 622 if (inst->Dst[0].Register.WriteMask & (1 << 0)) 623 { 624 /* dst.x, <- 1.0 */ 625 memset(&alu, 0, sizeof(struct r600_bc_alu)); 626 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 627 alu.src[0].sel = 249; /*1.0*/ 628 alu.src[0].chan = 0; 629 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 630 if (r) 631 return r; 632 if ((inst->Dst[0].Register.WriteMask & 0xe) == 0) 633 alu.last = 1; 634 r = r600_bc_add_alu(ctx->bc, &alu); 635 if (r) 636 return r; 637 } 638 639 640 if (inst->Dst[0].Register.WriteMask & (1 << 1)) 641 { 642 /* dst.y = max(src.x, 0.0) */ 643 memset(&alu, 0, sizeof(struct r600_bc_alu)); 644 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX; 645 r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[0]); 646 if (r) 647 return r; 648 alu.src[1].sel = 248; /*0.0*/ 649 alu.src[1].chan = 0; 650 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 651 if (r) 652 return r; 653 if ((inst->Dst[0].Register.WriteMask & 0xa) == 0) 654 alu.last = 1; 655 r = r600_bc_add_alu(ctx->bc, &alu); 656 if (r) 657 return r; 658 } 659 660 if (inst->Dst[0].Register.WriteMask & (1 << 3)) 661 { 662 /* dst.w, <- 1.0 */ 663 memset(&alu, 0, sizeof(struct r600_bc_alu)); 664 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 665 alu.src[0].sel = 249; 666 alu.src[0].chan = 0; 667 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 668 if (r) 669 return r; 670 if ((inst->Dst[0].Register.WriteMask & 0x4) == 0) 671 alu.last = 1; 672 r = r600_bc_add_alu(ctx->bc, &alu); 673 if (r) 674 return r; 675 } 676 677 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 678 { 679 int chan; 680 int sel; 681 682 /* dst.z = log(src.y) */ 683 memset(&alu, 0, sizeof(struct r600_bc_alu)); 684 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED; 685 r = tgsi_src(ctx, &inst->Src[0], 1, &alu.src[0]); 686 if (r) 687 return r; 688 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 689 if (r) 690 return r; 691 alu.last = 1; 692 r = r600_bc_add_alu(ctx->bc, &alu); 693 if (r) 694 return r; 695 696 chan = alu.dst.chan; 697 sel = alu.dst.sel; 698 699 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 700 memset(&alu, 0, sizeof(struct r600_bc_alu)); 701 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; 702 r = tgsi_src(ctx, &inst->Src[0], 3, &alu.src[0]); 703 if (r) 704 return r; 705 alu.src[1].sel = sel; 706 alu.src[1].chan = chan; 707 r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[2]); 708 if (r) 709 return r; 710 alu.dst.sel = ctx->temp_reg; 711 alu.dst.chan = 0; 712 alu.dst.write = 1; 713 alu.is_op3 = 1; 714 alu.last = 1; 715 r = r600_bc_add_alu(ctx->bc, &alu); 716 if (r) 717 return r; 718 719 /* dst.z = exp(tmp.x) */ 720 memset(&alu, 0, sizeof(struct r600_bc_alu)); 721 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; 722 alu.src[0].sel = ctx->temp_reg; 723 alu.src[0].chan = 0; 724 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 725 if (r) 726 return r; 727 alu.last = 1; 728 r = r600_bc_add_alu(ctx->bc, &alu); 729 if (r) 730 return r; 731 } 732 return 0; 733} 734 735static int tgsi_trans(struct r600_shader_ctx *ctx) 736{ 737 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 738 struct r600_bc_alu alu; 739 int i, j, r; 740 741 for (i = 0; i < 4; i++) { 742 memset(&alu, 0, sizeof(struct r600_bc_alu)); 743 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 744 alu.inst = ctx->inst_info->r600_opcode; 745 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 746 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); 747 if (r) 748 return r; 749 } 750 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 751 if (r) 752 return r; 753 alu.last = 1; 754 r = r600_bc_add_alu(ctx->bc, &alu); 755 if (r) 756 return r; 757 } 758 } 759 return 0; 760} 761 762static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 763{ 764 struct r600_bc_alu alu; 765 int i, r; 766 767 r = r600_bc_add_literal(ctx->bc, ctx->value); 768 if (r) 769 return r; 770 for (i = 0; i < 4; i++) { 771 memset(&alu, 0, sizeof(struct r600_bc_alu)); 772 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 773 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; 774 alu.dst.chan = i; 775 } else { 776 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 777 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 778 if (r) 779 return r; 780 alu.src[0].sel = ctx->temp_reg; 781 alu.src[0].chan = i; 782 } 783 if (i == 3) { 784 alu.last = 1; 785 } 786 r = r600_bc_add_alu(ctx->bc, &alu); 787 if (r) 788 return r; 789 } 790 return 0; 791} 792 793static int tgsi_op3(struct r600_shader_ctx *ctx) 794{ 795 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 796 struct r600_bc_alu alu; 797 int i, j, r; 798 799 /* do it in 2 step as op3 doesn't support writemask */ 800 for (i = 0; i < 4; i++) { 801 memset(&alu, 0, sizeof(struct r600_bc_alu)); 802 alu.inst = ctx->inst_info->r600_opcode; 803 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 804 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); 805 if (r) 806 return r; 807 } 808 alu.dst.sel = ctx->temp_reg; 809 alu.dst.chan = i; 810 alu.dst.write = 1; 811 alu.is_op3 = 1; 812 if (i == 3) { 813 alu.last = 1; 814 } 815 r = r600_bc_add_alu(ctx->bc, &alu); 816 if (r) 817 return r; 818 } 819 return tgsi_helper_copy(ctx, inst); 820} 821 822static int tgsi_dp(struct r600_shader_ctx *ctx) 823{ 824 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 825 struct r600_bc_alu alu; 826 int i, j, r; 827 828 for (i = 0; i < 4; i++) { 829 memset(&alu, 0, sizeof(struct r600_bc_alu)); 830 alu.inst = ctx->inst_info->r600_opcode; 831 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 832 r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]); 833 if (r) 834 return r; 835 } 836 alu.dst.sel = ctx->temp_reg; 837 alu.dst.chan = i; 838 alu.dst.write = 1; 839 /* handle some special cases */ 840 switch (ctx->inst_info->tgsi_opcode) { 841 case TGSI_OPCODE_DP2: 842 if (i > 1) { 843 alu.src[0].sel = alu.src[1].sel = 248; 844 alu.src[0].chan = alu.src[1].chan = 0; 845 } 846 break; 847 case TGSI_OPCODE_DP3: 848 if (i > 2) { 849 alu.src[0].sel = alu.src[1].sel = 248; 850 alu.src[0].chan = alu.src[1].chan = 0; 851 } 852 break; 853 default: 854 break; 855 } 856 if (i == 3) { 857 alu.last = 1; 858 } 859 r = r600_bc_add_alu(ctx->bc, &alu); 860 if (r) 861 return r; 862 } 863 return tgsi_helper_copy(ctx, inst); 864} 865 866static int tgsi_tex(struct r600_shader_ctx *ctx) 867{ 868 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 869 struct r600_bc_tex tex; 870 struct r600_bc_alu alu; 871 unsigned src_gpr; 872 int r; 873 874 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 875 876 /* Add perspective divide */ 877 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_TXP) { 878 memset(&alu, 0, sizeof(struct r600_bc_alu)); 879 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; 880 alu.src[0].sel = src_gpr; 881 alu.src[0].chan = 3; 882 alu.dst.sel = ctx->temp_reg; 883 alu.dst.chan = 3; 884 alu.last = 1; 885 alu.dst.write = 1; 886 r = r600_bc_add_alu(ctx->bc, &alu); 887 if (r) 888 return r; 889 890 memset(&alu, 0, sizeof(struct r600_bc_alu)); 891 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; 892 alu.src[0].sel = ctx->temp_reg; 893 alu.src[0].chan = 3; 894 alu.src[1].sel = src_gpr; 895 alu.src[1].chan = 0; 896 alu.dst.sel = ctx->temp_reg; 897 alu.dst.chan = 0; 898 alu.dst.write = 1; 899 r = r600_bc_add_alu(ctx->bc, &alu); 900 if (r) 901 return r; 902 memset(&alu, 0, sizeof(struct r600_bc_alu)); 903 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; 904 alu.src[0].sel = ctx->temp_reg; 905 alu.src[0].chan = 3; 906 alu.src[1].sel = src_gpr; 907 alu.src[1].chan = 1; 908 alu.dst.sel = ctx->temp_reg; 909 alu.dst.chan = 1; 910 alu.dst.write = 1; 911 r = r600_bc_add_alu(ctx->bc, &alu); 912 if (r) 913 return r; 914 memset(&alu, 0, sizeof(struct r600_bc_alu)); 915 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; 916 alu.src[0].sel = ctx->temp_reg; 917 alu.src[0].chan = 3; 918 alu.src[1].sel = src_gpr; 919 alu.src[1].chan = 2; 920 alu.dst.sel = ctx->temp_reg; 921 alu.dst.chan = 2; 922 alu.dst.write = 1; 923 r = r600_bc_add_alu(ctx->bc, &alu); 924 if (r) 925 return r; 926 memset(&alu, 0, sizeof(struct r600_bc_alu)); 927 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 928 alu.src[0].sel = 249; 929 alu.src[0].chan = 0; 930 alu.dst.sel = ctx->temp_reg; 931 alu.dst.chan = 3; 932 alu.last = 1; 933 alu.dst.write = 1; 934 r = r600_bc_add_alu(ctx->bc, &alu); 935 if (r) 936 return r; 937 src_gpr = ctx->temp_reg; 938 } 939 940 /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */ 941 memset(&tex, 0, sizeof(struct r600_bc_tex)); 942 tex.inst = ctx->inst_info->r600_opcode; 943 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 944 tex.sampler_id = tex.resource_id; 945 tex.src_gpr = src_gpr; 946 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 947 tex.dst_sel_x = 0; 948 tex.dst_sel_y = 1; 949 tex.dst_sel_z = 2; 950 tex.dst_sel_w = 3; 951 tex.src_sel_x = 0; 952 tex.src_sel_y = 1; 953 tex.src_sel_z = 2; 954 tex.src_sel_w = 3; 955 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 956 tex.coord_type_x = 1; 957 tex.coord_type_y = 1; 958 tex.coord_type_z = 1; 959 tex.coord_type_w = 1; 960 } 961 return r600_bc_add_tex(ctx->bc, &tex); 962} 963 964static int tgsi_lrp(struct r600_shader_ctx *ctx) 965{ 966 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 967 struct r600_bc_alu alu; 968 unsigned i; 969 int r; 970 971 /* 1 - src0 */ 972 for (i = 0; i < 4; i++) { 973 memset(&alu, 0, sizeof(struct r600_bc_alu)); 974 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD; 975 alu.src[0].sel = 249; 976 alu.src[0].chan = 0; 977 r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]); 978 if (r) 979 return r; 980 alu.src[1].neg = 1; 981 alu.dst.sel = ctx->temp_reg; 982 alu.dst.chan = i; 983 if (i == 3) { 984 alu.last = 1; 985 } 986 alu.dst.write = 1; 987 r = r600_bc_add_alu(ctx->bc, &alu); 988 if (r) 989 return r; 990 } 991 r = r600_bc_add_literal(ctx->bc, ctx->value); 992 if (r) 993 return r; 994 995 /* (1 - src0) * src2 */ 996 for (i = 0; i < 4; i++) { 997 memset(&alu, 0, sizeof(struct r600_bc_alu)); 998 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; 999 alu.src[0].sel = ctx->temp_reg; 1000 alu.src[0].chan = i; 1001 r = tgsi_src(ctx, &inst->Src[2], i, &alu.src[1]); 1002 if (r) 1003 return r; 1004 alu.dst.sel = ctx->temp_reg; 1005 alu.dst.chan = i; 1006 if (i == 3) { 1007 alu.last = 1; 1008 } 1009 alu.dst.write = 1; 1010 r = r600_bc_add_alu(ctx->bc, &alu); 1011 if (r) 1012 return r; 1013 } 1014 r = r600_bc_add_literal(ctx->bc, ctx->value); 1015 if (r) 1016 return r; 1017 1018 /* src0 * src1 + (1 - src0) * src2 */ 1019 for (i = 0; i < 4; i++) { 1020 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1021 alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; 1022 alu.is_op3 = 1; 1023 r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[0]); 1024 if (r) 1025 return r; 1026 r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[1]); 1027 if (r) 1028 return r; 1029 alu.src[2].sel = ctx->temp_reg; 1030 alu.src[2].chan = i; 1031 alu.dst.sel = ctx->temp_reg; 1032 alu.dst.chan = i; 1033 if (i == 3) { 1034 alu.last = 1; 1035 } 1036 r = r600_bc_add_alu(ctx->bc, &alu); 1037 if (r) 1038 return r; 1039 } 1040 return tgsi_helper_copy(ctx, inst); 1041} 1042 1043static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 1044 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1045 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 1046 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 1047 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans}, 1048 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans}, 1049 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1050 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1051 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 1052 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 1053 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 1054 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 1055 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1056 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1057 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 1058 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt}, 1059 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1060 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 1061 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 1062 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 1063 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1064 /* gap */ 1065 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1066 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1067 /* gap */ 1068 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1069 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1070 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1071 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1072 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1073 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1074 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans}, 1075 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1076 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1077 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1078 /* gap */ 1079 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1080 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 1081 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1082 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1083 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1084 {TGSI_OPCODE_DDX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1085 {TGSI_OPCODE_DDY, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1086 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */ 1087 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1088 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1089 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1090 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1091 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1092 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1093 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1094 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1095 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1096 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1097 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1098 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1099 {TGSI_OPCODE_TEX, 0, 0x10, tgsi_tex}, 1100 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1101 {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex}, 1102 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1103 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1104 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1105 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1106 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1107 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1108 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1109 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1110 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1111 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1112 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */ 1113 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1114 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1115 {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1116 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1117 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1118 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 1119 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1120 {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1121 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1122 /* gap */ 1123 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1124 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1125 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1126 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1127 /* gap */ 1128 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1129 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1130 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1131 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1132 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1133 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1134 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1135 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1136 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1137 /* gap */ 1138 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1139 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1140 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1141 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1142 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1143 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1144 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1145 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1146 {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1147 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1148 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1149 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1150 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1151 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1152 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1153 /* gap */ 1154 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1155 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1156 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1157 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1158 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1159 /* gap */ 1160 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1161 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1162 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1163 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1164 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1165 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1166 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1167 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1168 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 1169 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 1170 /* gap */ 1171 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1172 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1173 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1174 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1175 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1176 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1177 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1178 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1179 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1180 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1181 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1182 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1183 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1184 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1185 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1186 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1187 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1188 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1189 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1190 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1191 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1192 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1193 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1194 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1195 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1196 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1197 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1198 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 1199}; 1200