r600_shader.c revision 6d3ad2dd2ba3ccdd211dbc618404519930631be2
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "r600_sq.h" 24#include "r600_llvm.h" 25#include "r600_formats.h" 26#include "r600_opcodes.h" 27#include "r600d.h" 28 29#include "pipe/p_shader_tokens.h" 30#include "tgsi/tgsi_info.h" 31#include "tgsi/tgsi_parse.h" 32#include "tgsi/tgsi_scan.h" 33#include "tgsi/tgsi_dump.h" 34#include "util/u_memory.h" 35#include <stdio.h> 36#include <errno.h> 37#include <byteswap.h> 38 39/* CAYMAN notes 40Why CAYMAN got loops for lots of instructions is explained here. 41 42-These 8xx t-slot only ops are implemented in all vector slots. 43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 44These 8xx t-slot only opcodes become vector ops, with all four 45slots expecting the arguments on sources a and b. Result is 46broadcast to all channels. 47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT 48These 8xx t-slot only opcodes become vector ops in the z, y, and 49x slots. 50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 52SQRT_IEEE/_64 53SIN/COS 54The w slot may have an independent co-issued operation, or if the 55result is required to be in the w slot, the opcode above may be 56issued in the w slot as well. 57The compiler must issue the source argument to slots z, y, and x 58*/ 59 60static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 61{ 62 struct r600_context *rctx = (struct r600_context *)ctx; 63 struct r600_shader *rshader = &shader->shader; 64 uint32_t *ptr; 65 int i; 66 67 /* copy new shader */ 68 if (shader->bo == NULL) { 69 shader->bo = (struct r600_resource*) 70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4); 71 if (shader->bo == NULL) { 72 return -ENOMEM; 73 } 74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); 75 if (R600_BIG_ENDIAN) { 76 for (i = 0; i < rshader->bc.ndw; ++i) { 77 ptr[i] = bswap_32(rshader->bc.bytecode[i]); 78 } 79 } else { 80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr)); 81 } 82 rctx->ws->buffer_unmap(shader->bo->cs_buf); 83 } 84 /* build state */ 85 switch (rshader->processor_type) { 86 case TGSI_PROCESSOR_VERTEX: 87 if (rctx->chip_class >= EVERGREEN) { 88 evergreen_pipe_shader_vs(ctx, shader); 89 } else { 90 r600_pipe_shader_vs(ctx, shader); 91 } 92 break; 93 case TGSI_PROCESSOR_FRAGMENT: 94 if (rctx->chip_class >= EVERGREEN) { 95 evergreen_pipe_shader_ps(ctx, shader); 96 } else { 97 r600_pipe_shader_ps(ctx, shader); 98 } 99 break; 100 default: 101 return -EINVAL; 102 } 103 return 0; 104} 105 106static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader); 107 108int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader) 109{ 110 static int dump_shaders = -1; 111 struct r600_context *rctx = (struct r600_context *)ctx; 112 struct r600_pipe_shader_selector *sel = shader->selector; 113 int r; 114 115 /* Would like some magic "get_bool_option_once" routine. 116 */ 117 if (dump_shaders == -1) 118 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 119 120 if (dump_shaders) { 121 fprintf(stderr, "--------------------------------------------------------------\n"); 122 tgsi_dump(sel->tokens, 0); 123 124 if (sel->so.num_outputs) { 125 unsigned i; 126 fprintf(stderr, "STREAMOUT\n"); 127 for (i = 0; i < sel->so.num_outputs; i++) { 128 unsigned mask = ((1 << sel->so.output[i].num_components) - 1) << 129 sel->so.output[i].start_component; 130 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i, 131 sel->so.output[i].output_buffer, sel->so.output[i].register_index, 132 mask & 1 ? "x" : "_", 133 (mask >> 1) & 1 ? "y" : "_", 134 (mask >> 2) & 1 ? "z" : "_", 135 (mask >> 3) & 1 ? "w" : "_"); 136 } 137 } 138 } 139 r = r600_shader_from_tgsi(rctx, shader); 140 if (r) { 141 R600_ERR("translation from TGSI failed !\n"); 142 return r; 143 } 144 r = r600_bytecode_build(&shader->shader.bc); 145 if (r) { 146 R600_ERR("building bytecode failed !\n"); 147 return r; 148 } 149 if (dump_shaders) { 150 r600_bytecode_dump(&shader->shader.bc); 151 fprintf(stderr, "______________________________________________________________\n"); 152 } 153 return r600_pipe_shader(ctx, shader); 154} 155 156void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 157{ 158 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 159 r600_bytecode_clear(&shader->shader.bc); 160} 161 162/* 163 * tgsi -> r600 shader 164 */ 165struct r600_shader_tgsi_instruction; 166 167struct r600_shader_src { 168 unsigned sel; 169 unsigned swizzle[4]; 170 unsigned neg; 171 unsigned abs; 172 unsigned rel; 173 uint32_t value[4]; 174}; 175 176struct r600_shader_ctx { 177 struct tgsi_shader_info info; 178 struct tgsi_parse_context parse; 179 const struct tgsi_token *tokens; 180 unsigned type; 181 unsigned file_offset[TGSI_FILE_COUNT]; 182 unsigned temp_reg; 183 struct r600_shader_tgsi_instruction *inst_info; 184 struct r600_bytecode *bc; 185 struct r600_shader *shader; 186 struct r600_shader_src src[4]; 187 uint32_t *literals; 188 uint32_t nliterals; 189 uint32_t max_driver_temp_used; 190 /* needed for evergreen interpolation */ 191 boolean input_centroid; 192 boolean input_linear; 193 boolean input_perspective; 194 int num_interp_gpr; 195 int face_gpr; 196 int colors_used; 197 boolean clip_vertex_write; 198 unsigned cv_output; 199 int fragcoord_input; 200 int native_integers; 201}; 202 203struct r600_shader_tgsi_instruction { 204 unsigned tgsi_opcode; 205 unsigned is_op3; 206 unsigned r600_opcode; 207 int (*process)(struct r600_shader_ctx *ctx); 208}; 209 210static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 211static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 212static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only); 213static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); 214static int tgsi_else(struct r600_shader_ctx *ctx); 215static int tgsi_endif(struct r600_shader_ctx *ctx); 216static int tgsi_bgnloop(struct r600_shader_ctx *ctx); 217static int tgsi_endloop(struct r600_shader_ctx *ctx); 218static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); 219 220/* 221 * bytestream -> r600 shader 222 * 223 * These functions are used to transform the output of the LLVM backend into 224 * struct r600_bytecode. 225 */ 226 227static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, 228 unsigned char * bytes, unsigned num_bytes); 229 230#ifdef HAVE_OPENCL 231int r600_compute_shader_create(struct pipe_context * ctx, 232 LLVMModuleRef mod, struct r600_bytecode * bytecode) 233{ 234 struct r600_context *r600_ctx = (struct r600_context *)ctx; 235 unsigned char * bytes; 236 unsigned byte_count; 237 struct r600_shader_ctx shader_ctx; 238 unsigned dump = 0; 239 240 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) { 241 dump = 1; 242 } 243 244 r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump); 245 shader_ctx.bc = bytecode; 246 r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family); 247 shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE; 248 r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count); 249 if (shader_ctx.bc->chip_class == CAYMAN) { 250 cm_bytecode_add_cf_end(shader_ctx.bc); 251 } 252 r600_bytecode_build(shader_ctx.bc); 253 if (dump) { 254 r600_bytecode_dump(shader_ctx.bc); 255 } 256 return 1; 257} 258 259#endif /* HAVE_OPENCL */ 260 261static uint32_t i32_from_byte_stream(unsigned char * bytes, 262 unsigned * bytes_read) 263{ 264 unsigned i; 265 uint32_t out = 0; 266 for (i = 0; i < 4; i++) { 267 out |= bytes[(*bytes_read)++] << (8 * i); 268 } 269 return out; 270} 271 272static unsigned r600_src_from_byte_stream(unsigned char * bytes, 273 unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx) 274{ 275 unsigned i; 276 unsigned sel0, sel1; 277 sel0 = bytes[bytes_read++]; 278 sel1 = bytes[bytes_read++]; 279 alu->src[src_idx].sel = sel0 | (sel1 << 8); 280 alu->src[src_idx].chan = bytes[bytes_read++]; 281 alu->src[src_idx].neg = bytes[bytes_read++]; 282 alu->src[src_idx].abs = bytes[bytes_read++]; 283 alu->src[src_idx].rel = bytes[bytes_read++]; 284 alu->src[src_idx].kc_bank = bytes[bytes_read++]; 285 for (i = 0; i < 4; i++) { 286 alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8); 287 } 288 return bytes_read; 289} 290 291static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx, 292 unsigned char * bytes, unsigned bytes_read) 293{ 294 unsigned src_idx; 295 unsigned inst0, inst1; 296 struct r600_bytecode_alu alu; 297 memset(&alu, 0, sizeof(alu)); 298 for(src_idx = 0; src_idx < 3; src_idx++) { 299 bytes_read = r600_src_from_byte_stream(bytes, bytes_read, 300 &alu, src_idx); 301 } 302 303 alu.dst.sel = bytes[bytes_read++]; 304 alu.dst.chan = bytes[bytes_read++]; 305 alu.dst.clamp = bytes[bytes_read++]; 306 alu.dst.write = bytes[bytes_read++]; 307 alu.dst.rel = bytes[bytes_read++]; 308 inst0 = bytes[bytes_read++]; 309 inst1 = bytes[bytes_read++]; 310 alu.inst = inst0 | (inst1 << 8); 311 alu.last = bytes[bytes_read++]; 312 alu.is_op3 = bytes[bytes_read++]; 313 alu.predicate = bytes[bytes_read++]; 314 alu.bank_swizzle = bytes[bytes_read++]; 315 alu.bank_swizzle_force = bytes[bytes_read++]; 316 alu.omod = bytes[bytes_read++]; 317 alu.index_mode = bytes[bytes_read++]; 318 r600_bytecode_add_alu(ctx->bc, &alu); 319 320 /* XXX: Handle other KILL instructions */ 321 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) { 322 ctx->shader->uses_kill = 1; 323 /* XXX: This should be enforced in the LLVM backend. */ 324 ctx->bc->force_add_cf = 1; 325 } 326 return bytes_read; 327} 328 329static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu, 330 unsigned pred_inst) 331{ 332 alu->inst = pred_inst; 333 alu->predicate = 1; 334 alu->dst.write = 0; 335 alu->src[1].sel = V_SQ_ALU_SRC_0; 336 alu->src[1].chan = 0; 337 alu->last = 1; 338 r600_bytecode_add_alu_type(ctx->bc, alu, 339 CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 340 341 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 342 fc_pushlevel(ctx, FC_IF); 343 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 344} 345 346static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx, 347 struct r600_bytecode_alu *alu, unsigned compare_opcode) 348{ 349 unsigned opcode = TGSI_OPCODE_BRK; 350 if (ctx->bc->chip_class == CAYMAN) 351 ctx->inst_info = &cm_shader_tgsi_instruction[opcode]; 352 else if (ctx->bc->chip_class >= EVERGREEN) 353 ctx->inst_info = &eg_shader_tgsi_instruction[opcode]; 354 else 355 ctx->inst_info = &r600_shader_tgsi_instruction[opcode]; 356 llvm_if(ctx, alu, compare_opcode); 357 tgsi_loop_brk_cont(ctx); 358 tgsi_endif(ctx); 359} 360 361static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx, 362 unsigned char * bytes, unsigned bytes_read) 363{ 364 struct r600_bytecode_alu alu; 365 unsigned inst; 366 memset(&alu, 0, sizeof(alu)); 367 bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0); 368 inst = bytes[bytes_read++]; 369 switch (inst) { 370 case 0: /* FC_IF */ 371 llvm_if(ctx, &alu, 372 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 373 break; 374 case 1: /* FC_IF_INT */ 375 llvm_if(ctx, &alu, 376 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)); 377 break; 378 case 2: /* FC_ELSE */ 379 tgsi_else(ctx); 380 break; 381 case 3: /* FC_ENDIF */ 382 tgsi_endif(ctx); 383 break; 384 case 4: /* FC_BGNLOOP */ 385 tgsi_bgnloop(ctx); 386 break; 387 case 5: /* FC_ENDLOOP */ 388 tgsi_endloop(ctx); 389 break; 390 case 6: /* FC_BREAK */ 391 r600_break_from_byte_stream(ctx, &alu, 392 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT)); 393 break; 394 case 7: /* FC_BREAK_NZ_INT */ 395 r600_break_from_byte_stream(ctx, &alu, 396 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)); 397 break; 398 case 8: /* FC_CONTINUE */ 399 { 400 unsigned opcode = TGSI_OPCODE_CONT; 401 if (ctx->bc->chip_class == CAYMAN) { 402 ctx->inst_info = 403 &cm_shader_tgsi_instruction[opcode]; 404 } else if (ctx->bc->chip_class >= EVERGREEN) { 405 ctx->inst_info = 406 &eg_shader_tgsi_instruction[opcode]; 407 } else { 408 ctx->inst_info = 409 &r600_shader_tgsi_instruction[opcode]; 410 } 411 tgsi_loop_brk_cont(ctx); 412 } 413 break; 414 case 9: /* FC_BREAK_Z_INT */ 415 r600_break_from_byte_stream(ctx, &alu, 416 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT)); 417 break; 418 case 10: /* FC_BREAK_NZ */ 419 r600_break_from_byte_stream(ctx, &alu, 420 CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 421 break; 422 } 423 424 return bytes_read; 425} 426 427static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx, 428 unsigned char * bytes, unsigned bytes_read) 429{ 430 struct r600_bytecode_tex tex; 431 432 tex.inst = bytes[bytes_read++]; 433 tex.resource_id = bytes[bytes_read++]; 434 tex.src_gpr = bytes[bytes_read++]; 435 tex.src_rel = bytes[bytes_read++]; 436 tex.dst_gpr = bytes[bytes_read++]; 437 tex.dst_rel = bytes[bytes_read++]; 438 tex.dst_sel_x = bytes[bytes_read++]; 439 tex.dst_sel_y = bytes[bytes_read++]; 440 tex.dst_sel_z = bytes[bytes_read++]; 441 tex.dst_sel_w = bytes[bytes_read++]; 442 tex.lod_bias = bytes[bytes_read++]; 443 tex.coord_type_x = bytes[bytes_read++]; 444 tex.coord_type_y = bytes[bytes_read++]; 445 tex.coord_type_z = bytes[bytes_read++]; 446 tex.coord_type_w = bytes[bytes_read++]; 447 tex.offset_x = bytes[bytes_read++]; 448 tex.offset_y = bytes[bytes_read++]; 449 tex.offset_z = bytes[bytes_read++]; 450 tex.sampler_id = bytes[bytes_read++]; 451 tex.src_sel_x = bytes[bytes_read++]; 452 tex.src_sel_y = bytes[bytes_read++]; 453 tex.src_sel_z = bytes[bytes_read++]; 454 tex.src_sel_w = bytes[bytes_read++]; 455 456 r600_bytecode_add_tex(ctx->bc, &tex); 457 458 return bytes_read; 459} 460 461static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx, 462 unsigned char * bytes, unsigned bytes_read) 463{ 464 struct r600_bytecode_vtx vtx; 465 466 uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read); 467 uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read); 468 uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read); 469 470 memset(&vtx, 0, sizeof(vtx)); 471 472 /* WORD0 */ 473 vtx.inst = G_SQ_VTX_WORD0_VTX_INST(word0); 474 vtx.fetch_type = G_SQ_VTX_WORD0_FETCH_TYPE(word0); 475 vtx.buffer_id = G_SQ_VTX_WORD0_BUFFER_ID(word0); 476 vtx.src_gpr = G_SQ_VTX_WORD0_SRC_GPR(word0); 477 vtx.src_sel_x = G_SQ_VTX_WORD0_SRC_SEL_X(word0); 478 vtx.mega_fetch_count = G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(word0); 479 480 /* WORD1 */ 481 vtx.dst_gpr = G_SQ_VTX_WORD1_GPR_DST_GPR(word1); 482 vtx.dst_sel_x = G_SQ_VTX_WORD1_DST_SEL_X(word1); 483 vtx.dst_sel_y = G_SQ_VTX_WORD1_DST_SEL_Y(word1); 484 vtx.dst_sel_z = G_SQ_VTX_WORD1_DST_SEL_Z(word1); 485 vtx.dst_sel_w = G_SQ_VTX_WORD1_DST_SEL_W(word1); 486 vtx.use_const_fields = G_SQ_VTX_WORD1_USE_CONST_FIELDS(word1); 487 vtx.data_format = G_SQ_VTX_WORD1_DATA_FORMAT(word1); 488 vtx.num_format_all = G_SQ_VTX_WORD1_NUM_FORMAT_ALL(word1); 489 vtx.format_comp_all = G_SQ_VTX_WORD1_FORMAT_COMP_ALL(word1); 490 vtx.srf_mode_all = G_SQ_VTX_WORD1_SRF_MODE_ALL(word1); 491 492 /* WORD 2*/ 493 vtx.offset = G_SQ_VTX_WORD2_OFFSET(word2); 494 vtx.endian = G_SQ_VTX_WORD2_ENDIAN_SWAP(word2); 495 496 if (r600_bytecode_add_vtx(ctx->bc, &vtx)) { 497 fprintf(stderr, "Error adding vtx\n"); 498 } 499 /* Use the Texture Cache */ 500 ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX; 501 return bytes_read; 502} 503 504static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, 505 unsigned char * bytes, unsigned num_bytes) 506{ 507 unsigned bytes_read = 0; 508 unsigned i, byte; 509 while (bytes_read < num_bytes) { 510 char inst_type = bytes[bytes_read++]; 511 switch (inst_type) { 512 case 0: 513 bytes_read = r600_alu_from_byte_stream(ctx, bytes, 514 bytes_read); 515 break; 516 case 1: 517 bytes_read = r600_tex_from_byte_stream(ctx, bytes, 518 bytes_read); 519 break; 520 case 2: 521 bytes_read = r600_fc_from_byte_stream(ctx, bytes, 522 bytes_read); 523 break; 524 case 3: 525 r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE); 526 for (i = 0; i < 2; i++) { 527 for (byte = 0 ; byte < 4; byte++) { 528 ctx->bc->cf_last->isa[i] |= 529 (bytes[bytes_read++] << (byte * 8)); 530 } 531 } 532 break; 533 534 case 4: 535 bytes_read = r600_vtx_from_byte_stream(ctx, bytes, 536 bytes_read); 537 break; 538 default: 539 /* XXX: Error here */ 540 break; 541 } 542 } 543} 544 545/* End bytestream -> r600 shader functions*/ 546 547static int tgsi_is_supported(struct r600_shader_ctx *ctx) 548{ 549 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 550 int j; 551 552 if (i->Instruction.NumDstRegs > 1) { 553 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 554 return -EINVAL; 555 } 556 if (i->Instruction.Predicate) { 557 R600_ERR("predicate unsupported\n"); 558 return -EINVAL; 559 } 560#if 0 561 if (i->Instruction.Label) { 562 R600_ERR("label unsupported\n"); 563 return -EINVAL; 564 } 565#endif 566 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 567 if (i->Src[j].Register.Dimension) { 568 R600_ERR("unsupported src %d (dimension %d)\n", j, 569 i->Src[j].Register.Dimension); 570 return -EINVAL; 571 } 572 } 573 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 574 if (i->Dst[j].Register.Dimension) { 575 R600_ERR("unsupported dst (dimension)\n"); 576 return -EINVAL; 577 } 578 } 579 return 0; 580} 581 582static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 583{ 584 int i, r; 585 struct r600_bytecode_alu alu; 586 int gpr = 0, base_chan = 0; 587 int ij_index = 0; 588 589 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 590 ij_index = 0; 591 if (ctx->shader->input[input].centroid) 592 ij_index++; 593 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 594 ij_index = 0; 595 /* if we have perspective add one */ 596 if (ctx->input_perspective) { 597 ij_index++; 598 /* if we have perspective centroid */ 599 if (ctx->input_centroid) 600 ij_index++; 601 } 602 if (ctx->shader->input[input].centroid) 603 ij_index++; 604 } 605 606 /* work out gpr and base_chan from index */ 607 gpr = ij_index / 2; 608 base_chan = (2 * (ij_index % 2)) + 1; 609 610 for (i = 0; i < 8; i++) { 611 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 612 613 if (i < 4) 614 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW; 615 else 616 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY; 617 618 if ((i > 1) && (i < 6)) { 619 alu.dst.sel = ctx->shader->input[input].gpr; 620 alu.dst.write = 1; 621 } 622 623 alu.dst.chan = i % 4; 624 625 alu.src[0].sel = gpr; 626 alu.src[0].chan = (base_chan - (i % 2)); 627 628 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 629 630 alu.bank_swizzle_force = SQ_ALU_VEC_210; 631 if ((i % 4) == 3) 632 alu.last = 1; 633 r = r600_bytecode_add_alu(ctx->bc, &alu); 634 if (r) 635 return r; 636 } 637 return 0; 638} 639 640static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 641{ 642 int i, r; 643 struct r600_bytecode_alu alu; 644 645 for (i = 0; i < 4; i++) { 646 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 647 648 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0; 649 650 alu.dst.sel = ctx->shader->input[input].gpr; 651 alu.dst.write = 1; 652 653 alu.dst.chan = i; 654 655 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 656 alu.src[0].chan = i; 657 658 if (i == 3) 659 alu.last = 1; 660 r = r600_bytecode_add_alu(ctx->bc, &alu); 661 if (r) 662 return r; 663 } 664 return 0; 665} 666 667/* 668 * Special export handling in shaders 669 * 670 * shader export ARRAY_BASE for EXPORT_POS: 671 * 60 is position 672 * 61 is misc vector 673 * 62, 63 are clip distance vectors 674 * 675 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 676 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 677 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 678 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 679 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 680 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 681 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 682 * exclusive from render target index) 683 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 684 * 685 * 686 * shader export ARRAY_BASE for EXPORT_PIXEL: 687 * 0-7 CB targets 688 * 61 computed Z vector 689 * 690 * The use of the values exported in the computed Z vector are controlled 691 * by DB_SHADER_CONTROL: 692 * Z_EXPORT_ENABLE - Z as a float in RED 693 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 694 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 695 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 696 * DB_SOURCE_FORMAT - export control restrictions 697 * 698 */ 699 700 701/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 702static int r600_spi_sid(struct r600_shader_io * io) 703{ 704 int index, name = io->name; 705 706 /* These params are handled differently, they don't need 707 * semantic indices, so we'll use 0 for them. 708 */ 709 if (name == TGSI_SEMANTIC_POSITION || 710 name == TGSI_SEMANTIC_PSIZE || 711 name == TGSI_SEMANTIC_FACE) 712 index = 0; 713 else { 714 if (name == TGSI_SEMANTIC_GENERIC) { 715 /* For generic params simply use sid from tgsi */ 716 index = io->sid; 717 } else { 718 /* For non-generic params - pack name and sid into 8 bits */ 719 index = 0x80 | (name<<3) | (io->sid); 720 } 721 722 /* Make sure that all really used indices have nonzero value, so 723 * we can just compare it to 0 later instead of comparing the name 724 * with different values to detect special cases. */ 725 index++; 726 } 727 728 return index; 729}; 730 731/* turn input into interpolate on EG */ 732static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 733{ 734 int r = 0; 735 736 if (ctx->shader->input[index].spi_sid) { 737 ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 738 if (ctx->shader->input[index].interpolate > 0) { 739 r = evergreen_interp_alu(ctx, index); 740 } else { 741 r = evergreen_interp_flat(ctx, index); 742 } 743 } 744 return r; 745} 746 747static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 748{ 749 struct r600_bytecode_alu alu; 750 int i, r; 751 int gpr_front = ctx->shader->input[front].gpr; 752 int gpr_back = ctx->shader->input[back].gpr; 753 754 for (i = 0; i < 4; i++) { 755 memset(&alu, 0, sizeof(alu)); 756 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 757 alu.is_op3 = 1; 758 alu.dst.write = 1; 759 alu.dst.sel = gpr_front; 760 alu.src[0].sel = ctx->face_gpr; 761 alu.src[1].sel = gpr_front; 762 alu.src[2].sel = gpr_back; 763 764 alu.dst.chan = i; 765 alu.src[1].chan = i; 766 alu.src[2].chan = i; 767 alu.last = (i==3); 768 769 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 770 return r; 771 } 772 773 return 0; 774} 775 776static int tgsi_declaration(struct r600_shader_ctx *ctx) 777{ 778 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 779 unsigned i; 780 int r; 781 782 switch (d->Declaration.File) { 783 case TGSI_FILE_INPUT: 784 i = ctx->shader->ninput++; 785 ctx->shader->input[i].name = d->Semantic.Name; 786 ctx->shader->input[i].sid = d->Semantic.Index; 787 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 788 ctx->shader->input[i].interpolate = d->Interp.Interpolate; 789 ctx->shader->input[i].centroid = d->Interp.Centroid; 790 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First; 791 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 792 switch (ctx->shader->input[i].name) { 793 case TGSI_SEMANTIC_FACE: 794 ctx->face_gpr = ctx->shader->input[i].gpr; 795 break; 796 case TGSI_SEMANTIC_COLOR: 797 ctx->colors_used++; 798 break; 799 case TGSI_SEMANTIC_POSITION: 800 ctx->fragcoord_input = i; 801 break; 802 } 803 if (ctx->bc->chip_class >= EVERGREEN) { 804 if ((r = evergreen_interp_input(ctx, i))) 805 return r; 806 } 807 } 808 break; 809 case TGSI_FILE_OUTPUT: 810 i = ctx->shader->noutput++; 811 ctx->shader->output[i].name = d->Semantic.Name; 812 ctx->shader->output[i].sid = d->Semantic.Index; 813 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 814 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; 815 ctx->shader->output[i].interpolate = d->Interp.Interpolate; 816 ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 817 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 818 switch (d->Semantic.Name) { 819 case TGSI_SEMANTIC_CLIPDIST: 820 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2); 821 break; 822 case TGSI_SEMANTIC_PSIZE: 823 ctx->shader->vs_out_misc_write = 1; 824 ctx->shader->vs_out_point_size = 1; 825 break; 826 case TGSI_SEMANTIC_CLIPVERTEX: 827 ctx->clip_vertex_write = TRUE; 828 ctx->cv_output = i; 829 break; 830 } 831 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 832 switch (d->Semantic.Name) { 833 case TGSI_SEMANTIC_COLOR: 834 ctx->shader->nr_ps_max_color_exports++; 835 break; 836 } 837 } 838 break; 839 case TGSI_FILE_CONSTANT: 840 case TGSI_FILE_TEMPORARY: 841 case TGSI_FILE_SAMPLER: 842 case TGSI_FILE_ADDRESS: 843 break; 844 845 case TGSI_FILE_SYSTEM_VALUE: 846 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 847 if (!ctx->native_integers) { 848 struct r600_bytecode_alu alu; 849 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 850 851 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 852 alu.src[0].sel = 0; 853 alu.src[0].chan = 3; 854 855 alu.dst.sel = 0; 856 alu.dst.chan = 3; 857 alu.dst.write = 1; 858 alu.last = 1; 859 860 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 861 return r; 862 } 863 break; 864 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 865 break; 866 default: 867 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 868 return -EINVAL; 869 } 870 return 0; 871} 872 873static int r600_get_temp(struct r600_shader_ctx *ctx) 874{ 875 return ctx->temp_reg + ctx->max_driver_temp_used++; 876} 877 878/* 879 * for evergreen we need to scan the shader to find the number of GPRs we need to 880 * reserve for interpolation. 881 * 882 * we need to know if we are going to emit 883 * any centroid inputs 884 * if perspective and linear are required 885*/ 886static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 887{ 888 int i; 889 int num_baryc; 890 891 ctx->input_linear = FALSE; 892 ctx->input_perspective = FALSE; 893 ctx->input_centroid = FALSE; 894 ctx->num_interp_gpr = 1; 895 896 /* any centroid inputs */ 897 for (i = 0; i < ctx->info.num_inputs; i++) { 898 /* skip position/face */ 899 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 900 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 901 continue; 902 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 903 ctx->input_linear = TRUE; 904 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 905 ctx->input_perspective = TRUE; 906 if (ctx->info.input_centroid[i]) 907 ctx->input_centroid = TRUE; 908 } 909 910 num_baryc = 0; 911 /* ignoring sample for now */ 912 if (ctx->input_perspective) 913 num_baryc++; 914 if (ctx->input_linear) 915 num_baryc++; 916 if (ctx->input_centroid) 917 num_baryc *= 2; 918 919 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 920 921 /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */ 922 return ctx->num_interp_gpr; 923} 924 925static void tgsi_src(struct r600_shader_ctx *ctx, 926 const struct tgsi_full_src_register *tgsi_src, 927 struct r600_shader_src *r600_src) 928{ 929 memset(r600_src, 0, sizeof(*r600_src)); 930 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 931 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 932 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 933 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 934 r600_src->neg = tgsi_src->Register.Negate; 935 r600_src->abs = tgsi_src->Register.Absolute; 936 937 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 938 int index; 939 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 940 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 941 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 942 943 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 944 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 945 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 946 return; 947 } 948 index = tgsi_src->Register.Index; 949 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 950 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 951 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 952 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 953 r600_src->swizzle[0] = 3; 954 r600_src->swizzle[1] = 3; 955 r600_src->swizzle[2] = 3; 956 r600_src->swizzle[3] = 3; 957 r600_src->sel = 0; 958 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 959 r600_src->swizzle[0] = 0; 960 r600_src->swizzle[1] = 0; 961 r600_src->swizzle[2] = 0; 962 r600_src->swizzle[3] = 0; 963 r600_src->sel = 0; 964 } 965 } else { 966 if (tgsi_src->Register.Indirect) 967 r600_src->rel = V_SQ_REL_RELATIVE; 968 r600_src->sel = tgsi_src->Register.Index; 969 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 970 } 971} 972 973static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) 974{ 975 struct r600_bytecode_vtx vtx; 976 unsigned int ar_reg; 977 int r; 978 979 if (offset) { 980 struct r600_bytecode_alu alu; 981 982 memset(&alu, 0, sizeof(alu)); 983 984 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 985 alu.src[0].sel = ctx->bc->ar_reg; 986 987 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 988 alu.src[1].value = offset; 989 990 alu.dst.sel = dst_reg; 991 alu.dst.write = 1; 992 alu.last = 1; 993 994 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 995 return r; 996 997 ar_reg = dst_reg; 998 } else { 999 ar_reg = ctx->bc->ar_reg; 1000 } 1001 1002 memset(&vtx, 0, sizeof(vtx)); 1003 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 1004 vtx.src_gpr = ar_reg; 1005 vtx.mega_fetch_count = 16; 1006 vtx.dst_gpr = dst_reg; 1007 vtx.dst_sel_x = 0; /* SEL_X */ 1008 vtx.dst_sel_y = 1; /* SEL_Y */ 1009 vtx.dst_sel_z = 2; /* SEL_Z */ 1010 vtx.dst_sel_w = 3; /* SEL_W */ 1011 vtx.data_format = FMT_32_32_32_32_FLOAT; 1012 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 1013 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 1014 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 1015 vtx.endian = r600_endian_swap(32); 1016 1017 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1018 return r; 1019 1020 return 0; 1021} 1022 1023static int tgsi_split_constant(struct r600_shader_ctx *ctx) 1024{ 1025 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1026 struct r600_bytecode_alu alu; 1027 int i, j, k, nconst, r; 1028 1029 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 1030 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 1031 nconst++; 1032 } 1033 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 1034 } 1035 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 1036 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 1037 continue; 1038 } 1039 1040 if (ctx->src[i].rel) { 1041 int treg = r600_get_temp(ctx); 1042 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) 1043 return r; 1044 1045 ctx->src[i].sel = treg; 1046 ctx->src[i].rel = 0; 1047 j--; 1048 } else if (j > 0) { 1049 int treg = r600_get_temp(ctx); 1050 for (k = 0; k < 4; k++) { 1051 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1052 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1053 alu.src[0].sel = ctx->src[i].sel; 1054 alu.src[0].chan = k; 1055 alu.src[0].rel = ctx->src[i].rel; 1056 alu.dst.sel = treg; 1057 alu.dst.chan = k; 1058 alu.dst.write = 1; 1059 if (k == 3) 1060 alu.last = 1; 1061 r = r600_bytecode_add_alu(ctx->bc, &alu); 1062 if (r) 1063 return r; 1064 } 1065 ctx->src[i].sel = treg; 1066 ctx->src[i].rel =0; 1067 j--; 1068 } 1069 } 1070 return 0; 1071} 1072 1073/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 1074static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 1075{ 1076 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1077 struct r600_bytecode_alu alu; 1078 int i, j, k, nliteral, r; 1079 1080 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 1081 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1082 nliteral++; 1083 } 1084 } 1085 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 1086 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1087 int treg = r600_get_temp(ctx); 1088 for (k = 0; k < 4; k++) { 1089 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1090 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1091 alu.src[0].sel = ctx->src[i].sel; 1092 alu.src[0].chan = k; 1093 alu.src[0].value = ctx->src[i].value[k]; 1094 alu.dst.sel = treg; 1095 alu.dst.chan = k; 1096 alu.dst.write = 1; 1097 if (k == 3) 1098 alu.last = 1; 1099 r = r600_bytecode_add_alu(ctx->bc, &alu); 1100 if (r) 1101 return r; 1102 } 1103 ctx->src[i].sel = treg; 1104 j--; 1105 } 1106 } 1107 return 0; 1108} 1109 1110static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 1111{ 1112 int i, r, count = ctx->shader->ninput; 1113 1114 /* additional inputs will be allocated right after the existing inputs, 1115 * we won't need them after the color selection, so we don't need to 1116 * reserve these gprs for the rest of the shader code and to adjust 1117 * output offsets etc. */ 1118 int gpr = ctx->file_offset[TGSI_FILE_INPUT] + 1119 ctx->info.file_max[TGSI_FILE_INPUT] + 1; 1120 1121 if (ctx->face_gpr == -1) { 1122 i = ctx->shader->ninput++; 1123 ctx->shader->input[i].name = TGSI_SEMANTIC_FACE; 1124 ctx->shader->input[i].spi_sid = 0; 1125 ctx->shader->input[i].gpr = gpr++; 1126 ctx->face_gpr = ctx->shader->input[i].gpr; 1127 } 1128 1129 for (i = 0; i < count; i++) { 1130 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 1131 int ni = ctx->shader->ninput++; 1132 memcpy(&ctx->shader->input[ni],&ctx->shader->input[i], sizeof(struct r600_shader_io)); 1133 ctx->shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 1134 ctx->shader->input[ni].spi_sid = r600_spi_sid(&ctx->shader->input[ni]); 1135 ctx->shader->input[ni].gpr = gpr++; 1136 1137 if (ctx->bc->chip_class >= EVERGREEN) { 1138 r = evergreen_interp_input(ctx, ni); 1139 if (r) 1140 return r; 1141 } 1142 1143 r = select_twoside_color(ctx, i, ni); 1144 if (r) 1145 return r; 1146 } 1147 } 1148 return 0; 1149} 1150 1151static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader) 1152{ 1153 struct r600_shader *shader = &pipeshader->shader; 1154 struct tgsi_token *tokens = pipeshader->selector->tokens; 1155 struct pipe_stream_output_info so = pipeshader->selector->so; 1156 struct tgsi_full_immediate *immediate; 1157 struct tgsi_full_property *property; 1158 struct r600_shader_ctx ctx; 1159 struct r600_bytecode_output output[32]; 1160 unsigned output_done, noutput; 1161 unsigned opcode; 1162 int i, j, k, r = 0; 1163 int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0; 1164 /* Declarations used by llvm code */ 1165 bool use_llvm = false; 1166 unsigned char * inst_bytes = NULL; 1167 unsigned inst_byte_count = 0; 1168 1169#ifdef R600_USE_LLVM 1170 use_llvm = debug_get_bool_option("R600_LLVM", TRUE); 1171#endif 1172 ctx.bc = &shader->bc; 1173 ctx.shader = shader; 1174 ctx.native_integers = true; 1175 1176 r600_bytecode_init(ctx.bc, rctx->chip_class, rctx->family); 1177 ctx.tokens = tokens; 1178 tgsi_scan_shader(tokens, &ctx.info); 1179 tgsi_parse_init(&ctx.parse, tokens); 1180 ctx.type = ctx.parse.FullHeader.Processor.Processor; 1181 shader->processor_type = ctx.type; 1182 ctx.bc->type = shader->processor_type; 1183 1184 ctx.face_gpr = -1; 1185 ctx.fragcoord_input = -1; 1186 ctx.colors_used = 0; 1187 ctx.clip_vertex_write = 0; 1188 1189 shader->nr_ps_color_exports = 0; 1190 shader->nr_ps_max_color_exports = 0; 1191 1192 shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side; 1193 1194 /* register allocations */ 1195 /* Values [0,127] correspond to GPR[0..127]. 1196 * Values [128,159] correspond to constant buffer bank 0 1197 * Values [160,191] correspond to constant buffer bank 1 1198 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 1199 * Values [256,287] correspond to constant buffer bank 2 (EG) 1200 * Values [288,319] correspond to constant buffer bank 3 (EG) 1201 * Other special values are shown in the list below. 1202 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 1203 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 1204 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 1205 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 1206 * 248 SQ_ALU_SRC_0: special constant 0.0. 1207 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 1208 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 1209 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 1210 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 1211 * 253 SQ_ALU_SRC_LITERAL: literal constant. 1212 * 254 SQ_ALU_SRC_PV: previous vector result. 1213 * 255 SQ_ALU_SRC_PS: previous scalar result. 1214 */ 1215 for (i = 0; i < TGSI_FILE_COUNT; i++) { 1216 ctx.file_offset[i] = 0; 1217 } 1218 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 1219 ctx.file_offset[TGSI_FILE_INPUT] = 1; 1220 if (ctx.bc->chip_class >= EVERGREEN) { 1221 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 1222 } else { 1223 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 1224 } 1225 } 1226 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { 1227 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 1228 } 1229 1230 /* LLVM backend setup */ 1231#ifdef R600_USE_LLVM 1232 if (use_llvm && ctx.info.indirect_files) { 1233 fprintf(stderr, "Warning: R600 LLVM backend does not support " 1234 "indirect adressing. Falling back to TGSI " 1235 "backend.\n"); 1236 use_llvm = 0; 1237 } 1238 if (use_llvm) { 1239 struct radeon_llvm_context radeon_llvm_ctx; 1240 LLVMModuleRef mod; 1241 unsigned dump = 0; 1242 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); 1243 radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT]; 1244 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); 1245 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) { 1246 dump = 1; 1247 } 1248 if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count, 1249 rctx->family, dump)) { 1250 FREE(inst_bytes); 1251 radeon_llvm_dispose(&radeon_llvm_ctx); 1252 use_llvm = 0; 1253 fprintf(stderr, "R600 LLVM backend failed to compile " 1254 "shader. Falling back to TGSI\n"); 1255 } else { 1256 ctx.file_offset[TGSI_FILE_OUTPUT] = 1257 ctx.file_offset[TGSI_FILE_INPUT]; 1258 } 1259 radeon_llvm_dispose(&radeon_llvm_ctx); 1260 } 1261#endif 1262 /* End of LLVM backend setup */ 1263 1264 if (!use_llvm) { 1265 ctx.file_offset[TGSI_FILE_OUTPUT] = 1266 ctx.file_offset[TGSI_FILE_INPUT] + 1267 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 1268 } 1269 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 1270 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 1271 1272 /* Outside the GPR range. This will be translated to one of the 1273 * kcache banks later. */ 1274 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 1275 1276 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 1277 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 1278 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 1279 ctx.temp_reg = ctx.bc->ar_reg + 1; 1280 1281 ctx.nliterals = 0; 1282 ctx.literals = NULL; 1283 shader->fs_write_all = FALSE; 1284 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 1285 tgsi_parse_token(&ctx.parse); 1286 switch (ctx.parse.FullToken.Token.Type) { 1287 case TGSI_TOKEN_TYPE_IMMEDIATE: 1288 immediate = &ctx.parse.FullToken.FullImmediate; 1289 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 1290 if(ctx.literals == NULL) { 1291 r = -ENOMEM; 1292 goto out_err; 1293 } 1294 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 1295 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 1296 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 1297 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 1298 ctx.nliterals++; 1299 break; 1300 case TGSI_TOKEN_TYPE_DECLARATION: 1301 r = tgsi_declaration(&ctx); 1302 if (r) 1303 goto out_err; 1304 break; 1305 case TGSI_TOKEN_TYPE_INSTRUCTION: 1306 break; 1307 case TGSI_TOKEN_TYPE_PROPERTY: 1308 property = &ctx.parse.FullToken.FullProperty; 1309 switch (property->Property.PropertyName) { 1310 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: 1311 if (property->u[0].Data == 1) 1312 shader->fs_write_all = TRUE; 1313 break; 1314 case TGSI_PROPERTY_VS_PROHIBIT_UCPS: 1315 if (property->u[0].Data == 1) 1316 shader->vs_prohibit_ucps = TRUE; 1317 break; 1318 } 1319 break; 1320 default: 1321 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 1322 r = -EINVAL; 1323 goto out_err; 1324 } 1325 } 1326 1327 if (shader->fs_write_all && rctx->chip_class >= EVERGREEN) 1328 shader->nr_ps_max_color_exports = 8; 1329 1330 if (ctx.fragcoord_input >= 0) { 1331 if (ctx.bc->chip_class == CAYMAN) { 1332 for (j = 0 ; j < 4; j++) { 1333 struct r600_bytecode_alu alu; 1334 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1335 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1336 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 1337 alu.src[0].chan = 3; 1338 1339 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 1340 alu.dst.chan = j; 1341 alu.dst.write = (j == 3); 1342 alu.last = 1; 1343 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 1344 return r; 1345 } 1346 } else { 1347 struct r600_bytecode_alu alu; 1348 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1349 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1350 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 1351 alu.src[0].chan = 3; 1352 1353 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 1354 alu.dst.chan = 3; 1355 alu.dst.write = 1; 1356 alu.last = 1; 1357 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 1358 return r; 1359 } 1360 } 1361 1362 if (shader->two_side && ctx.colors_used) { 1363 if ((r = process_twoside_color_inputs(&ctx))) 1364 return r; 1365 } 1366 1367 tgsi_parse_init(&ctx.parse, tokens); 1368 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 1369 tgsi_parse_token(&ctx.parse); 1370 switch (ctx.parse.FullToken.Token.Type) { 1371 case TGSI_TOKEN_TYPE_INSTRUCTION: 1372 if (use_llvm) { 1373 continue; 1374 } 1375 r = tgsi_is_supported(&ctx); 1376 if (r) 1377 goto out_err; 1378 ctx.max_driver_temp_used = 0; 1379 /* reserve first tmp for everyone */ 1380 r600_get_temp(&ctx); 1381 1382 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 1383 if ((r = tgsi_split_constant(&ctx))) 1384 goto out_err; 1385 if ((r = tgsi_split_literal_constant(&ctx))) 1386 goto out_err; 1387 if (ctx.bc->chip_class == CAYMAN) 1388 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 1389 else if (ctx.bc->chip_class >= EVERGREEN) 1390 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 1391 else 1392 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 1393 r = ctx.inst_info->process(&ctx); 1394 if (r) 1395 goto out_err; 1396 break; 1397 default: 1398 break; 1399 } 1400 } 1401 1402 /* Get instructions if we are using the LLVM backend. */ 1403 if (use_llvm) { 1404 r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count); 1405 FREE(inst_bytes); 1406 } 1407 1408 noutput = shader->noutput; 1409 1410 if (ctx.clip_vertex_write) { 1411 /* need to convert a clipvertex write into clipdistance writes and not export 1412 the clip vertex anymore */ 1413 1414 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 1415 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 1416 shader->output[noutput].gpr = ctx.temp_reg; 1417 noutput++; 1418 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 1419 shader->output[noutput].gpr = ctx.temp_reg+1; 1420 noutput++; 1421 1422 /* reset spi_sid for clipvertex output to avoid confusing spi */ 1423 shader->output[ctx.cv_output].spi_sid = 0; 1424 1425 shader->clip_dist_write = 0xFF; 1426 1427 for (i = 0; i < 8; i++) { 1428 int oreg = i >> 2; 1429 int ochan = i & 3; 1430 1431 for (j = 0; j < 4; j++) { 1432 struct r600_bytecode_alu alu; 1433 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1434 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4); 1435 alu.src[0].sel = shader->output[ctx.cv_output].gpr; 1436 alu.src[0].chan = j; 1437 1438 alu.src[1].sel = 512 + i; 1439 alu.src[1].kc_bank = 1; 1440 alu.src[1].chan = j; 1441 1442 alu.dst.sel = ctx.temp_reg + oreg; 1443 alu.dst.chan = j; 1444 alu.dst.write = (j == ochan); 1445 if (j == 3) 1446 alu.last = 1; 1447 r = r600_bytecode_add_alu(ctx.bc, &alu); 1448 if (r) 1449 return r; 1450 } 1451 } 1452 } 1453 1454 /* Add stream outputs. */ 1455 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) { 1456 for (i = 0; i < so.num_outputs; i++) { 1457 struct r600_bytecode_output output; 1458 1459 if (so.output[i].output_buffer >= 4) { 1460 R600_ERR("exceeded the max number of stream output buffers, got: %d\n", 1461 so.output[i].output_buffer); 1462 r = -EINVAL; 1463 goto out_err; 1464 } 1465 if (so.output[i].dst_offset < so.output[i].start_component) { 1466 R600_ERR("stream_output - dst_offset cannot be less than start_component\n"); 1467 r = -EINVAL; 1468 goto out_err; 1469 } 1470 1471 memset(&output, 0, sizeof(struct r600_bytecode_output)); 1472 output.gpr = shader->output[so.output[i].register_index].gpr; 1473 output.elem_size = 0; 1474 output.array_base = so.output[i].dst_offset - so.output[i].start_component; 1475 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 1476 output.burst_count = 1; 1477 output.barrier = 1; 1478 /* array_size is an upper limit for the burst_count 1479 * with MEM_STREAM instructions */ 1480 output.array_size = 0xFFF; 1481 output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component; 1482 if (ctx.bc->chip_class >= EVERGREEN) { 1483 switch (so.output[i].output_buffer) { 1484 case 0: 1485 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0; 1486 break; 1487 case 1: 1488 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1; 1489 break; 1490 case 2: 1491 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2; 1492 break; 1493 case 3: 1494 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3; 1495 break; 1496 } 1497 } else { 1498 switch (so.output[i].output_buffer) { 1499 case 0: 1500 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0; 1501 break; 1502 case 1: 1503 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1; 1504 break; 1505 case 2: 1506 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2; 1507 break; 1508 case 3: 1509 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3; 1510 break; 1511 } 1512 } 1513 r = r600_bytecode_add_output(ctx.bc, &output); 1514 if (r) 1515 goto out_err; 1516 } 1517 } 1518 1519 /* export output */ 1520 for (i = 0, j = 0; i < noutput; i++, j++) { 1521 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1522 output[j].gpr = shader->output[i].gpr; 1523 output[j].elem_size = 3; 1524 output[j].swizzle_x = 0; 1525 output[j].swizzle_y = 1; 1526 output[j].swizzle_z = 2; 1527 output[j].swizzle_w = 3; 1528 output[j].burst_count = 1; 1529 output[j].barrier = 1; 1530 output[j].type = -1; 1531 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1532 switch (ctx.type) { 1533 case TGSI_PROCESSOR_VERTEX: 1534 switch (shader->output[i].name) { 1535 case TGSI_SEMANTIC_POSITION: 1536 output[j].array_base = next_pos_base++; 1537 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1538 break; 1539 1540 case TGSI_SEMANTIC_PSIZE: 1541 output[j].array_base = next_pos_base++; 1542 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1543 break; 1544 case TGSI_SEMANTIC_CLIPVERTEX: 1545 j--; 1546 break; 1547 case TGSI_SEMANTIC_CLIPDIST: 1548 output[j].array_base = next_pos_base++; 1549 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1550 /* spi_sid is 0 for clipdistance outputs that were generated 1551 * for clipvertex - we don't need to pass them to PS */ 1552 if (shader->output[i].spi_sid) { 1553 j++; 1554 /* duplicate it as PARAM to pass to the pixel shader */ 1555 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 1556 output[j].array_base = next_param_base++; 1557 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1558 } 1559 break; 1560 case TGSI_SEMANTIC_FOG: 1561 output[j].swizzle_y = 4; /* 0 */ 1562 output[j].swizzle_z = 4; /* 0 */ 1563 output[j].swizzle_w = 5; /* 1 */ 1564 break; 1565 } 1566 break; 1567 case TGSI_PROCESSOR_FRAGMENT: 1568 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 1569 /* never export more colors than the number of CBs */ 1570 if (next_pixel_base && next_pixel_base >= (rctx->nr_cbufs + rctx->dual_src_blend * 1)) { 1571 /* skip export */ 1572 j--; 1573 continue; 1574 } 1575 output[j].swizzle_w = rctx->alpha_to_one && rctx->multisample_enable && !rctx->cb0_is_integer ? 5 : 3; 1576 output[j].array_base = next_pixel_base++; 1577 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1578 shader->nr_ps_color_exports++; 1579 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { 1580 for (k = 1; k < rctx->nr_cbufs; k++) { 1581 j++; 1582 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1583 output[j].gpr = shader->output[i].gpr; 1584 output[j].elem_size = 3; 1585 output[j].swizzle_x = 0; 1586 output[j].swizzle_y = 1; 1587 output[j].swizzle_z = 2; 1588 output[j].swizzle_w = rctx->alpha_to_one && rctx->multisample_enable && !rctx->cb0_is_integer ? 5 : 3; 1589 output[j].burst_count = 1; 1590 output[j].barrier = 1; 1591 output[j].array_base = next_pixel_base++; 1592 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1593 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1594 shader->nr_ps_color_exports++; 1595 } 1596 } 1597 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 1598 output[j].array_base = 61; 1599 output[j].swizzle_x = 2; 1600 output[j].swizzle_y = 7; 1601 output[j].swizzle_z = output[j].swizzle_w = 7; 1602 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1603 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 1604 output[j].array_base = 61; 1605 output[j].swizzle_x = 7; 1606 output[j].swizzle_y = 1; 1607 output[j].swizzle_z = output[j].swizzle_w = 7; 1608 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1609 } else { 1610 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 1611 r = -EINVAL; 1612 goto out_err; 1613 } 1614 break; 1615 default: 1616 R600_ERR("unsupported processor type %d\n", ctx.type); 1617 r = -EINVAL; 1618 goto out_err; 1619 } 1620 1621 if (output[j].type==-1) { 1622 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1623 output[j].array_base = next_param_base++; 1624 } 1625 } 1626 1627 /* add fake param output for vertex shader if no param is exported */ 1628 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { 1629 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1630 output[j].gpr = 0; 1631 output[j].elem_size = 3; 1632 output[j].swizzle_x = 7; 1633 output[j].swizzle_y = 7; 1634 output[j].swizzle_z = 7; 1635 output[j].swizzle_w = 7; 1636 output[j].burst_count = 1; 1637 output[j].barrier = 1; 1638 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1639 output[j].array_base = 0; 1640 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1641 j++; 1642 } 1643 1644 /* add fake pixel export */ 1645 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) { 1646 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1647 output[j].gpr = 0; 1648 output[j].elem_size = 3; 1649 output[j].swizzle_x = 7; 1650 output[j].swizzle_y = 7; 1651 output[j].swizzle_z = 7; 1652 output[j].swizzle_w = 7; 1653 output[j].burst_count = 1; 1654 output[j].barrier = 1; 1655 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1656 output[j].array_base = 0; 1657 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1658 j++; 1659 } 1660 1661 noutput = j; 1662 1663 /* set export done on last export of each type */ 1664 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 1665 if (ctx.bc->chip_class < CAYMAN) { 1666 if (i == (noutput - 1)) { 1667 output[i].end_of_program = 1; 1668 } 1669 } 1670 if (!(output_done & (1 << output[i].type))) { 1671 output_done |= (1 << output[i].type); 1672 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 1673 } 1674 } 1675 /* add output to bytecode */ 1676 for (i = 0; i < noutput; i++) { 1677 r = r600_bytecode_add_output(ctx.bc, &output[i]); 1678 if (r) 1679 goto out_err; 1680 } 1681 /* add program end */ 1682 if (ctx.bc->chip_class == CAYMAN) 1683 cm_bytecode_add_cf_end(ctx.bc); 1684 1685 /* check GPR limit - we have 124 = 128 - 4 1686 * (4 are reserved as alu clause temporary registers) */ 1687 if (ctx.bc->ngpr > 124) { 1688 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); 1689 r = -ENOMEM; 1690 goto out_err; 1691 } 1692 1693 free(ctx.literals); 1694 tgsi_parse_free(&ctx.parse); 1695 return 0; 1696out_err: 1697 free(ctx.literals); 1698 tgsi_parse_free(&ctx.parse); 1699 return r; 1700} 1701 1702static int tgsi_unsupported(struct r600_shader_ctx *ctx) 1703{ 1704 R600_ERR("%s tgsi opcode unsupported\n", 1705 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode)); 1706 return -EINVAL; 1707} 1708 1709static int tgsi_end(struct r600_shader_ctx *ctx) 1710{ 1711 return 0; 1712} 1713 1714static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 1715 const struct r600_shader_src *shader_src, 1716 unsigned chan) 1717{ 1718 bc_src->sel = shader_src->sel; 1719 bc_src->chan = shader_src->swizzle[chan]; 1720 bc_src->neg = shader_src->neg; 1721 bc_src->abs = shader_src->abs; 1722 bc_src->rel = shader_src->rel; 1723 bc_src->value = shader_src->value[bc_src->chan]; 1724} 1725 1726static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 1727{ 1728 bc_src->abs = 1; 1729 bc_src->neg = 0; 1730} 1731 1732static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 1733{ 1734 bc_src->neg = !bc_src->neg; 1735} 1736 1737static void tgsi_dst(struct r600_shader_ctx *ctx, 1738 const struct tgsi_full_dst_register *tgsi_dst, 1739 unsigned swizzle, 1740 struct r600_bytecode_alu_dst *r600_dst) 1741{ 1742 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1743 1744 r600_dst->sel = tgsi_dst->Register.Index; 1745 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 1746 r600_dst->chan = swizzle; 1747 r600_dst->write = 1; 1748 if (tgsi_dst->Register.Indirect) 1749 r600_dst->rel = V_SQ_REL_RELATIVE; 1750 if (inst->Instruction.Saturate) { 1751 r600_dst->clamp = 1; 1752 } 1753} 1754 1755static int tgsi_last_instruction(unsigned writemask) 1756{ 1757 int i, lasti = 0; 1758 1759 for (i = 0; i < 4; i++) { 1760 if (writemask & (1 << i)) { 1761 lasti = i; 1762 } 1763 } 1764 return lasti; 1765} 1766 1767static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 1768{ 1769 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1770 struct r600_bytecode_alu alu; 1771 int i, j, r; 1772 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1773 1774 for (i = 0; i < lasti + 1; i++) { 1775 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1776 continue; 1777 1778 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1779 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1780 1781 alu.inst = ctx->inst_info->r600_opcode; 1782 if (!swap) { 1783 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1784 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1785 } 1786 } else { 1787 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 1788 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1789 } 1790 /* handle some special cases */ 1791 switch (ctx->inst_info->tgsi_opcode) { 1792 case TGSI_OPCODE_SUB: 1793 r600_bytecode_src_toggle_neg(&alu.src[1]); 1794 break; 1795 case TGSI_OPCODE_ABS: 1796 r600_bytecode_src_set_abs(&alu.src[0]); 1797 break; 1798 default: 1799 break; 1800 } 1801 if (i == lasti || trans_only) { 1802 alu.last = 1; 1803 } 1804 r = r600_bytecode_add_alu(ctx->bc, &alu); 1805 if (r) 1806 return r; 1807 } 1808 return 0; 1809} 1810 1811static int tgsi_op2(struct r600_shader_ctx *ctx) 1812{ 1813 return tgsi_op2_s(ctx, 0, 0); 1814} 1815 1816static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1817{ 1818 return tgsi_op2_s(ctx, 1, 0); 1819} 1820 1821static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 1822{ 1823 return tgsi_op2_s(ctx, 0, 1); 1824} 1825 1826static int tgsi_ineg(struct r600_shader_ctx *ctx) 1827{ 1828 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1829 struct r600_bytecode_alu alu; 1830 int i, r; 1831 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1832 1833 for (i = 0; i < lasti + 1; i++) { 1834 1835 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1836 continue; 1837 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1838 alu.inst = ctx->inst_info->r600_opcode; 1839 1840 alu.src[0].sel = V_SQ_ALU_SRC_0; 1841 1842 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1843 1844 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1845 1846 if (i == lasti) { 1847 alu.last = 1; 1848 } 1849 r = r600_bytecode_add_alu(ctx->bc, &alu); 1850 if (r) 1851 return r; 1852 } 1853 return 0; 1854 1855} 1856 1857static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 1858{ 1859 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1860 int i, j, r; 1861 struct r600_bytecode_alu alu; 1862 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1863 1864 for (i = 0 ; i < last_slot; i++) { 1865 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1866 alu.inst = ctx->inst_info->r600_opcode; 1867 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1868 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 1869 } 1870 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1871 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1872 1873 if (i == last_slot - 1) 1874 alu.last = 1; 1875 r = r600_bytecode_add_alu(ctx->bc, &alu); 1876 if (r) 1877 return r; 1878 } 1879 return 0; 1880} 1881 1882static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) 1883{ 1884 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1885 int i, j, k, r; 1886 struct r600_bytecode_alu alu; 1887 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1888 for (k = 0; k < last_slot; k++) { 1889 if (!(inst->Dst[0].Register.WriteMask & (1 << k))) 1890 continue; 1891 1892 for (i = 0 ; i < 4; i++) { 1893 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1894 alu.inst = ctx->inst_info->r600_opcode; 1895 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1896 r600_bytecode_src(&alu.src[j], &ctx->src[j], k); 1897 } 1898 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1899 alu.dst.write = (i == k); 1900 if (i == 3) 1901 alu.last = 1; 1902 r = r600_bytecode_add_alu(ctx->bc, &alu); 1903 if (r) 1904 return r; 1905 } 1906 } 1907 return 0; 1908} 1909 1910/* 1911 * r600 - trunc to -PI..PI range 1912 * r700 - normalize by dividing by 2PI 1913 * see fdo bug 27901 1914 */ 1915static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 1916{ 1917 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 1918 static float double_pi = 3.1415926535 * 2; 1919 static float neg_pi = -3.1415926535; 1920 1921 int r; 1922 struct r600_bytecode_alu alu; 1923 1924 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1925 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1926 alu.is_op3 = 1; 1927 1928 alu.dst.chan = 0; 1929 alu.dst.sel = ctx->temp_reg; 1930 alu.dst.write = 1; 1931 1932 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1933 1934 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1935 alu.src[1].chan = 0; 1936 alu.src[1].value = *(uint32_t *)&half_inv_pi; 1937 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1938 alu.src[2].chan = 0; 1939 alu.last = 1; 1940 r = r600_bytecode_add_alu(ctx->bc, &alu); 1941 if (r) 1942 return r; 1943 1944 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1945 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1946 1947 alu.dst.chan = 0; 1948 alu.dst.sel = ctx->temp_reg; 1949 alu.dst.write = 1; 1950 1951 alu.src[0].sel = ctx->temp_reg; 1952 alu.src[0].chan = 0; 1953 alu.last = 1; 1954 r = r600_bytecode_add_alu(ctx->bc, &alu); 1955 if (r) 1956 return r; 1957 1958 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1959 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1960 alu.is_op3 = 1; 1961 1962 alu.dst.chan = 0; 1963 alu.dst.sel = ctx->temp_reg; 1964 alu.dst.write = 1; 1965 1966 alu.src[0].sel = ctx->temp_reg; 1967 alu.src[0].chan = 0; 1968 1969 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1970 alu.src[1].chan = 0; 1971 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1972 alu.src[2].chan = 0; 1973 1974 if (ctx->bc->chip_class == R600) { 1975 alu.src[1].value = *(uint32_t *)&double_pi; 1976 alu.src[2].value = *(uint32_t *)&neg_pi; 1977 } else { 1978 alu.src[1].sel = V_SQ_ALU_SRC_1; 1979 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1980 alu.src[2].neg = 1; 1981 } 1982 1983 alu.last = 1; 1984 r = r600_bytecode_add_alu(ctx->bc, &alu); 1985 if (r) 1986 return r; 1987 return 0; 1988} 1989 1990static int cayman_trig(struct r600_shader_ctx *ctx) 1991{ 1992 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1993 struct r600_bytecode_alu alu; 1994 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1995 int i, r; 1996 1997 r = tgsi_setup_trig(ctx); 1998 if (r) 1999 return r; 2000 2001 2002 for (i = 0; i < last_slot; i++) { 2003 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2004 alu.inst = ctx->inst_info->r600_opcode; 2005 alu.dst.chan = i; 2006 2007 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2008 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 2009 2010 alu.src[0].sel = ctx->temp_reg; 2011 alu.src[0].chan = 0; 2012 if (i == last_slot - 1) 2013 alu.last = 1; 2014 r = r600_bytecode_add_alu(ctx->bc, &alu); 2015 if (r) 2016 return r; 2017 } 2018 return 0; 2019} 2020 2021static int tgsi_trig(struct r600_shader_ctx *ctx) 2022{ 2023 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2024 struct r600_bytecode_alu alu; 2025 int i, r; 2026 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2027 2028 r = tgsi_setup_trig(ctx); 2029 if (r) 2030 return r; 2031 2032 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2033 alu.inst = ctx->inst_info->r600_opcode; 2034 alu.dst.chan = 0; 2035 alu.dst.sel = ctx->temp_reg; 2036 alu.dst.write = 1; 2037 2038 alu.src[0].sel = ctx->temp_reg; 2039 alu.src[0].chan = 0; 2040 alu.last = 1; 2041 r = r600_bytecode_add_alu(ctx->bc, &alu); 2042 if (r) 2043 return r; 2044 2045 /* replicate result */ 2046 for (i = 0; i < lasti + 1; i++) { 2047 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2048 continue; 2049 2050 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2051 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2052 2053 alu.src[0].sel = ctx->temp_reg; 2054 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2055 if (i == lasti) 2056 alu.last = 1; 2057 r = r600_bytecode_add_alu(ctx->bc, &alu); 2058 if (r) 2059 return r; 2060 } 2061 return 0; 2062} 2063 2064static int tgsi_scs(struct r600_shader_ctx *ctx) 2065{ 2066 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2067 struct r600_bytecode_alu alu; 2068 int i, r; 2069 2070 /* We'll only need the trig stuff if we are going to write to the 2071 * X or Y components of the destination vector. 2072 */ 2073 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 2074 r = tgsi_setup_trig(ctx); 2075 if (r) 2076 return r; 2077 } 2078 2079 /* dst.x = COS */ 2080 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2081 if (ctx->bc->chip_class == CAYMAN) { 2082 for (i = 0 ; i < 3; i++) { 2083 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2084 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 2085 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2086 2087 if (i == 0) 2088 alu.dst.write = 1; 2089 else 2090 alu.dst.write = 0; 2091 alu.src[0].sel = ctx->temp_reg; 2092 alu.src[0].chan = 0; 2093 if (i == 2) 2094 alu.last = 1; 2095 r = r600_bytecode_add_alu(ctx->bc, &alu); 2096 if (r) 2097 return r; 2098 } 2099 } else { 2100 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2101 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 2102 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 2103 2104 alu.src[0].sel = ctx->temp_reg; 2105 alu.src[0].chan = 0; 2106 alu.last = 1; 2107 r = r600_bytecode_add_alu(ctx->bc, &alu); 2108 if (r) 2109 return r; 2110 } 2111 } 2112 2113 /* dst.y = SIN */ 2114 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2115 if (ctx->bc->chip_class == CAYMAN) { 2116 for (i = 0 ; i < 3; i++) { 2117 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2118 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 2119 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2120 if (i == 1) 2121 alu.dst.write = 1; 2122 else 2123 alu.dst.write = 0; 2124 alu.src[0].sel = ctx->temp_reg; 2125 alu.src[0].chan = 0; 2126 if (i == 2) 2127 alu.last = 1; 2128 r = r600_bytecode_add_alu(ctx->bc, &alu); 2129 if (r) 2130 return r; 2131 } 2132 } else { 2133 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2134 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 2135 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 2136 2137 alu.src[0].sel = ctx->temp_reg; 2138 alu.src[0].chan = 0; 2139 alu.last = 1; 2140 r = r600_bytecode_add_alu(ctx->bc, &alu); 2141 if (r) 2142 return r; 2143 } 2144 } 2145 2146 /* dst.z = 0.0; */ 2147 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2148 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2149 2150 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2151 2152 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 2153 2154 alu.src[0].sel = V_SQ_ALU_SRC_0; 2155 alu.src[0].chan = 0; 2156 2157 alu.last = 1; 2158 2159 r = r600_bytecode_add_alu(ctx->bc, &alu); 2160 if (r) 2161 return r; 2162 } 2163 2164 /* dst.w = 1.0; */ 2165 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2166 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2167 2168 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2169 2170 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 2171 2172 alu.src[0].sel = V_SQ_ALU_SRC_1; 2173 alu.src[0].chan = 0; 2174 2175 alu.last = 1; 2176 2177 r = r600_bytecode_add_alu(ctx->bc, &alu); 2178 if (r) 2179 return r; 2180 } 2181 2182 return 0; 2183} 2184 2185static int tgsi_kill(struct r600_shader_ctx *ctx) 2186{ 2187 struct r600_bytecode_alu alu; 2188 int i, r; 2189 2190 for (i = 0; i < 4; i++) { 2191 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2192 alu.inst = ctx->inst_info->r600_opcode; 2193 2194 alu.dst.chan = i; 2195 2196 alu.src[0].sel = V_SQ_ALU_SRC_0; 2197 2198 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 2199 alu.src[1].sel = V_SQ_ALU_SRC_1; 2200 alu.src[1].neg = 1; 2201 } else { 2202 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2203 } 2204 if (i == 3) { 2205 alu.last = 1; 2206 } 2207 r = r600_bytecode_add_alu(ctx->bc, &alu); 2208 if (r) 2209 return r; 2210 } 2211 2212 /* kill must be last in ALU */ 2213 ctx->bc->force_add_cf = 1; 2214 ctx->shader->uses_kill = TRUE; 2215 return 0; 2216} 2217 2218static int tgsi_lit(struct r600_shader_ctx *ctx) 2219{ 2220 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2221 struct r600_bytecode_alu alu; 2222 int r; 2223 2224 /* tmp.x = max(src.y, 0.0) */ 2225 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2226 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 2227 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 2228 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 2229 alu.src[1].chan = 1; 2230 2231 alu.dst.sel = ctx->temp_reg; 2232 alu.dst.chan = 0; 2233 alu.dst.write = 1; 2234 2235 alu.last = 1; 2236 r = r600_bytecode_add_alu(ctx->bc, &alu); 2237 if (r) 2238 return r; 2239 2240 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 2241 { 2242 int chan; 2243 int sel; 2244 int i; 2245 2246 if (ctx->bc->chip_class == CAYMAN) { 2247 for (i = 0; i < 3; i++) { 2248 /* tmp.z = log(tmp.x) */ 2249 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2250 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 2251 alu.src[0].sel = ctx->temp_reg; 2252 alu.src[0].chan = 0; 2253 alu.dst.sel = ctx->temp_reg; 2254 alu.dst.chan = i; 2255 if (i == 2) { 2256 alu.dst.write = 1; 2257 alu.last = 1; 2258 } else 2259 alu.dst.write = 0; 2260 2261 r = r600_bytecode_add_alu(ctx->bc, &alu); 2262 if (r) 2263 return r; 2264 } 2265 } else { 2266 /* tmp.z = log(tmp.x) */ 2267 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2268 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 2269 alu.src[0].sel = ctx->temp_reg; 2270 alu.src[0].chan = 0; 2271 alu.dst.sel = ctx->temp_reg; 2272 alu.dst.chan = 2; 2273 alu.dst.write = 1; 2274 alu.last = 1; 2275 r = r600_bytecode_add_alu(ctx->bc, &alu); 2276 if (r) 2277 return r; 2278 } 2279 2280 chan = alu.dst.chan; 2281 sel = alu.dst.sel; 2282 2283 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 2284 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2285 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 2286 alu.src[0].sel = sel; 2287 alu.src[0].chan = chan; 2288 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 2289 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 2290 alu.dst.sel = ctx->temp_reg; 2291 alu.dst.chan = 0; 2292 alu.dst.write = 1; 2293 alu.is_op3 = 1; 2294 alu.last = 1; 2295 r = r600_bytecode_add_alu(ctx->bc, &alu); 2296 if (r) 2297 return r; 2298 2299 if (ctx->bc->chip_class == CAYMAN) { 2300 for (i = 0; i < 3; i++) { 2301 /* dst.z = exp(tmp.x) */ 2302 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2303 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2304 alu.src[0].sel = ctx->temp_reg; 2305 alu.src[0].chan = 0; 2306 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2307 if (i == 2) { 2308 alu.dst.write = 1; 2309 alu.last = 1; 2310 } else 2311 alu.dst.write = 0; 2312 r = r600_bytecode_add_alu(ctx->bc, &alu); 2313 if (r) 2314 return r; 2315 } 2316 } else { 2317 /* dst.z = exp(tmp.x) */ 2318 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2319 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2320 alu.src[0].sel = ctx->temp_reg; 2321 alu.src[0].chan = 0; 2322 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 2323 alu.last = 1; 2324 r = r600_bytecode_add_alu(ctx->bc, &alu); 2325 if (r) 2326 return r; 2327 } 2328 } 2329 2330 /* dst.x, <- 1.0 */ 2331 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2332 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2333 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 2334 alu.src[0].chan = 0; 2335 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 2336 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 2337 r = r600_bytecode_add_alu(ctx->bc, &alu); 2338 if (r) 2339 return r; 2340 2341 /* dst.y = max(src.x, 0.0) */ 2342 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2343 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 2344 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2345 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 2346 alu.src[1].chan = 0; 2347 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 2348 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 2349 r = r600_bytecode_add_alu(ctx->bc, &alu); 2350 if (r) 2351 return r; 2352 2353 /* dst.w, <- 1.0 */ 2354 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2355 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2356 alu.src[0].sel = V_SQ_ALU_SRC_1; 2357 alu.src[0].chan = 0; 2358 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 2359 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 2360 alu.last = 1; 2361 r = r600_bytecode_add_alu(ctx->bc, &alu); 2362 if (r) 2363 return r; 2364 2365 return 0; 2366} 2367 2368static int tgsi_rsq(struct r600_shader_ctx *ctx) 2369{ 2370 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2371 struct r600_bytecode_alu alu; 2372 int i, r; 2373 2374 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2375 2376 /* XXX: 2377 * For state trackers other than OpenGL, we'll want to use 2378 * _RECIPSQRT_IEEE instead. 2379 */ 2380 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 2381 2382 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 2383 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 2384 r600_bytecode_src_set_abs(&alu.src[i]); 2385 } 2386 alu.dst.sel = ctx->temp_reg; 2387 alu.dst.write = 1; 2388 alu.last = 1; 2389 r = r600_bytecode_add_alu(ctx->bc, &alu); 2390 if (r) 2391 return r; 2392 /* replicate result */ 2393 return tgsi_helper_tempx_replicate(ctx); 2394} 2395 2396static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 2397{ 2398 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2399 struct r600_bytecode_alu alu; 2400 int i, r; 2401 2402 for (i = 0; i < 4; i++) { 2403 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2404 alu.src[0].sel = ctx->temp_reg; 2405 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2406 alu.dst.chan = i; 2407 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2408 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 2409 if (i == 3) 2410 alu.last = 1; 2411 r = r600_bytecode_add_alu(ctx->bc, &alu); 2412 if (r) 2413 return r; 2414 } 2415 return 0; 2416} 2417 2418static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 2419{ 2420 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2421 struct r600_bytecode_alu alu; 2422 int i, r; 2423 2424 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2425 alu.inst = ctx->inst_info->r600_opcode; 2426 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 2427 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 2428 } 2429 alu.dst.sel = ctx->temp_reg; 2430 alu.dst.write = 1; 2431 alu.last = 1; 2432 r = r600_bytecode_add_alu(ctx->bc, &alu); 2433 if (r) 2434 return r; 2435 /* replicate result */ 2436 return tgsi_helper_tempx_replicate(ctx); 2437} 2438 2439static int cayman_pow(struct r600_shader_ctx *ctx) 2440{ 2441 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2442 int i, r; 2443 struct r600_bytecode_alu alu; 2444 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 2445 2446 for (i = 0; i < 3; i++) { 2447 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2448 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2449 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2450 alu.dst.sel = ctx->temp_reg; 2451 alu.dst.chan = i; 2452 alu.dst.write = 1; 2453 if (i == 2) 2454 alu.last = 1; 2455 r = r600_bytecode_add_alu(ctx->bc, &alu); 2456 if (r) 2457 return r; 2458 } 2459 2460 /* b * LOG2(a) */ 2461 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2462 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2463 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 2464 alu.src[1].sel = ctx->temp_reg; 2465 alu.dst.sel = ctx->temp_reg; 2466 alu.dst.write = 1; 2467 alu.last = 1; 2468 r = r600_bytecode_add_alu(ctx->bc, &alu); 2469 if (r) 2470 return r; 2471 2472 for (i = 0; i < last_slot; i++) { 2473 /* POW(a,b) = EXP2(b * LOG2(a))*/ 2474 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2475 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2476 alu.src[0].sel = ctx->temp_reg; 2477 2478 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2479 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 2480 if (i == last_slot - 1) 2481 alu.last = 1; 2482 r = r600_bytecode_add_alu(ctx->bc, &alu); 2483 if (r) 2484 return r; 2485 } 2486 return 0; 2487} 2488 2489static int tgsi_pow(struct r600_shader_ctx *ctx) 2490{ 2491 struct r600_bytecode_alu alu; 2492 int r; 2493 2494 /* LOG2(a) */ 2495 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2496 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2497 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2498 alu.dst.sel = ctx->temp_reg; 2499 alu.dst.write = 1; 2500 alu.last = 1; 2501 r = r600_bytecode_add_alu(ctx->bc, &alu); 2502 if (r) 2503 return r; 2504 /* b * LOG2(a) */ 2505 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2506 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2507 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 2508 alu.src[1].sel = ctx->temp_reg; 2509 alu.dst.sel = ctx->temp_reg; 2510 alu.dst.write = 1; 2511 alu.last = 1; 2512 r = r600_bytecode_add_alu(ctx->bc, &alu); 2513 if (r) 2514 return r; 2515 /* POW(a,b) = EXP2(b * LOG2(a))*/ 2516 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2517 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2518 alu.src[0].sel = ctx->temp_reg; 2519 alu.dst.sel = ctx->temp_reg; 2520 alu.dst.write = 1; 2521 alu.last = 1; 2522 r = r600_bytecode_add_alu(ctx->bc, &alu); 2523 if (r) 2524 return r; 2525 return tgsi_helper_tempx_replicate(ctx); 2526} 2527 2528static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 2529{ 2530 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2531 struct r600_bytecode_alu alu; 2532 int i, r, j; 2533 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2534 int tmp0 = ctx->temp_reg; 2535 int tmp1 = r600_get_temp(ctx); 2536 int tmp2 = r600_get_temp(ctx); 2537 int tmp3 = r600_get_temp(ctx); 2538 /* Unsigned path: 2539 * 2540 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 2541 * 2542 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 2543 * 2. tmp0.z = lo (tmp0.x * src2) 2544 * 3. tmp0.w = -tmp0.z 2545 * 4. tmp0.y = hi (tmp0.x * src2) 2546 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 2547 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 2548 * 7. tmp1.x = tmp0.x - tmp0.w 2549 * 8. tmp1.y = tmp0.x + tmp0.w 2550 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 2551 * 10. tmp0.z = hi(tmp0.x * src1) = q 2552 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 2553 * 2554 * 12. tmp0.w = src1 - tmp0.y = r 2555 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 2556 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 2557 * 2558 * if DIV 2559 * 2560 * 15. tmp1.z = tmp0.z + 1 = q + 1 2561 * 16. tmp1.w = tmp0.z - 1 = q - 1 2562 * 2563 * else MOD 2564 * 2565 * 15. tmp1.z = tmp0.w - src2 = r - src2 2566 * 16. tmp1.w = tmp0.w + src2 = r + src2 2567 * 2568 * endif 2569 * 2570 * 17. tmp1.x = tmp1.x & tmp1.y 2571 * 2572 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 2573 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 2574 * 2575 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 2576 * 20. dst = src2==0 ? MAX_UINT : tmp0.z 2577 * 2578 * Signed path: 2579 * 2580 * Same as unsigned, using abs values of the operands, 2581 * and fixing the sign of the result in the end. 2582 */ 2583 2584 for (i = 0; i < 4; i++) { 2585 if (!(write_mask & (1<<i))) 2586 continue; 2587 2588 if (signed_op) { 2589 2590 /* tmp2.x = -src0 */ 2591 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2592 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2593 2594 alu.dst.sel = tmp2; 2595 alu.dst.chan = 0; 2596 alu.dst.write = 1; 2597 2598 alu.src[0].sel = V_SQ_ALU_SRC_0; 2599 2600 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2601 2602 alu.last = 1; 2603 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2604 return r; 2605 2606 /* tmp2.y = -src1 */ 2607 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2608 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2609 2610 alu.dst.sel = tmp2; 2611 alu.dst.chan = 1; 2612 alu.dst.write = 1; 2613 2614 alu.src[0].sel = V_SQ_ALU_SRC_0; 2615 2616 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2617 2618 alu.last = 1; 2619 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2620 return r; 2621 2622 /* tmp2.z sign bit is set if src0 and src2 signs are different */ 2623 /* it will be a sign of the quotient */ 2624 if (!mod) { 2625 2626 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2627 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT); 2628 2629 alu.dst.sel = tmp2; 2630 alu.dst.chan = 2; 2631 alu.dst.write = 1; 2632 2633 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2634 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2635 2636 alu.last = 1; 2637 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2638 return r; 2639 } 2640 2641 /* tmp2.x = |src0| */ 2642 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2643 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2644 alu.is_op3 = 1; 2645 2646 alu.dst.sel = tmp2; 2647 alu.dst.chan = 0; 2648 alu.dst.write = 1; 2649 2650 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2651 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2652 alu.src[2].sel = tmp2; 2653 alu.src[2].chan = 0; 2654 2655 alu.last = 1; 2656 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2657 return r; 2658 2659 /* tmp2.y = |src1| */ 2660 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2661 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2662 alu.is_op3 = 1; 2663 2664 alu.dst.sel = tmp2; 2665 alu.dst.chan = 1; 2666 alu.dst.write = 1; 2667 2668 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2669 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2670 alu.src[2].sel = tmp2; 2671 alu.src[2].chan = 1; 2672 2673 alu.last = 1; 2674 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2675 return r; 2676 2677 } 2678 2679 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 2680 if (ctx->bc->chip_class == CAYMAN) { 2681 /* tmp3.x = u2f(src2) */ 2682 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2683 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT); 2684 2685 alu.dst.sel = tmp3; 2686 alu.dst.chan = 0; 2687 alu.dst.write = 1; 2688 2689 if (signed_op) { 2690 alu.src[0].sel = tmp2; 2691 alu.src[0].chan = 1; 2692 } else { 2693 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2694 } 2695 2696 alu.last = 1; 2697 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2698 return r; 2699 2700 /* tmp0.x = recip(tmp3.x) */ 2701 for (j = 0 ; j < 3; j++) { 2702 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2703 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; 2704 2705 alu.dst.sel = tmp0; 2706 alu.dst.chan = j; 2707 alu.dst.write = (j == 0); 2708 2709 alu.src[0].sel = tmp3; 2710 alu.src[0].chan = 0; 2711 2712 if (j == 2) 2713 alu.last = 1; 2714 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2715 return r; 2716 } 2717 2718 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2719 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2720 2721 alu.src[0].sel = tmp0; 2722 alu.src[0].chan = 0; 2723 2724 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2725 alu.src[1].value = 0x4f800000; 2726 2727 alu.dst.sel = tmp3; 2728 alu.dst.write = 1; 2729 alu.last = 1; 2730 r = r600_bytecode_add_alu(ctx->bc, &alu); 2731 if (r) 2732 return r; 2733 2734 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2735 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT); 2736 2737 alu.dst.sel = tmp0; 2738 alu.dst.chan = 0; 2739 alu.dst.write = 1; 2740 2741 alu.src[0].sel = tmp3; 2742 alu.src[0].chan = 0; 2743 2744 alu.last = 1; 2745 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2746 return r; 2747 2748 } else { 2749 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2750 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT); 2751 2752 alu.dst.sel = tmp0; 2753 alu.dst.chan = 0; 2754 alu.dst.write = 1; 2755 2756 if (signed_op) { 2757 alu.src[0].sel = tmp2; 2758 alu.src[0].chan = 1; 2759 } else { 2760 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2761 } 2762 2763 alu.last = 1; 2764 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2765 return r; 2766 } 2767 2768 /* 2. tmp0.z = lo (tmp0.x * src2) */ 2769 if (ctx->bc->chip_class == CAYMAN) { 2770 for (j = 0 ; j < 4; j++) { 2771 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2772 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 2773 2774 alu.dst.sel = tmp0; 2775 alu.dst.chan = j; 2776 alu.dst.write = (j == 2); 2777 2778 alu.src[0].sel = tmp0; 2779 alu.src[0].chan = 0; 2780 if (signed_op) { 2781 alu.src[1].sel = tmp2; 2782 alu.src[1].chan = 1; 2783 } else { 2784 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2785 } 2786 2787 alu.last = (j == 3); 2788 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2789 return r; 2790 } 2791 } else { 2792 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2793 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 2794 2795 alu.dst.sel = tmp0; 2796 alu.dst.chan = 2; 2797 alu.dst.write = 1; 2798 2799 alu.src[0].sel = tmp0; 2800 alu.src[0].chan = 0; 2801 if (signed_op) { 2802 alu.src[1].sel = tmp2; 2803 alu.src[1].chan = 1; 2804 } else { 2805 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2806 } 2807 2808 alu.last = 1; 2809 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2810 return r; 2811 } 2812 2813 /* 3. tmp0.w = -tmp0.z */ 2814 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2815 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2816 2817 alu.dst.sel = tmp0; 2818 alu.dst.chan = 3; 2819 alu.dst.write = 1; 2820 2821 alu.src[0].sel = V_SQ_ALU_SRC_0; 2822 alu.src[1].sel = tmp0; 2823 alu.src[1].chan = 2; 2824 2825 alu.last = 1; 2826 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2827 return r; 2828 2829 /* 4. tmp0.y = hi (tmp0.x * src2) */ 2830 if (ctx->bc->chip_class == CAYMAN) { 2831 for (j = 0 ; j < 4; j++) { 2832 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2833 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2834 2835 alu.dst.sel = tmp0; 2836 alu.dst.chan = j; 2837 alu.dst.write = (j == 1); 2838 2839 alu.src[0].sel = tmp0; 2840 alu.src[0].chan = 0; 2841 2842 if (signed_op) { 2843 alu.src[1].sel = tmp2; 2844 alu.src[1].chan = 1; 2845 } else { 2846 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2847 } 2848 alu.last = (j == 3); 2849 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2850 return r; 2851 } 2852 } else { 2853 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2854 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2855 2856 alu.dst.sel = tmp0; 2857 alu.dst.chan = 1; 2858 alu.dst.write = 1; 2859 2860 alu.src[0].sel = tmp0; 2861 alu.src[0].chan = 0; 2862 2863 if (signed_op) { 2864 alu.src[1].sel = tmp2; 2865 alu.src[1].chan = 1; 2866 } else { 2867 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2868 } 2869 2870 alu.last = 1; 2871 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2872 return r; 2873 } 2874 2875 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 2876 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2877 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 2878 alu.is_op3 = 1; 2879 2880 alu.dst.sel = tmp0; 2881 alu.dst.chan = 2; 2882 alu.dst.write = 1; 2883 2884 alu.src[0].sel = tmp0; 2885 alu.src[0].chan = 1; 2886 alu.src[1].sel = tmp0; 2887 alu.src[1].chan = 3; 2888 alu.src[2].sel = tmp0; 2889 alu.src[2].chan = 2; 2890 2891 alu.last = 1; 2892 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2893 return r; 2894 2895 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 2896 if (ctx->bc->chip_class == CAYMAN) { 2897 for (j = 0 ; j < 4; j++) { 2898 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2899 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2900 2901 alu.dst.sel = tmp0; 2902 alu.dst.chan = j; 2903 alu.dst.write = (j == 3); 2904 2905 alu.src[0].sel = tmp0; 2906 alu.src[0].chan = 2; 2907 2908 alu.src[1].sel = tmp0; 2909 alu.src[1].chan = 0; 2910 2911 alu.last = (j == 3); 2912 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2913 return r; 2914 } 2915 } else { 2916 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2917 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2918 2919 alu.dst.sel = tmp0; 2920 alu.dst.chan = 3; 2921 alu.dst.write = 1; 2922 2923 alu.src[0].sel = tmp0; 2924 alu.src[0].chan = 2; 2925 2926 alu.src[1].sel = tmp0; 2927 alu.src[1].chan = 0; 2928 2929 alu.last = 1; 2930 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2931 return r; 2932 } 2933 2934 /* 7. tmp1.x = tmp0.x - tmp0.w */ 2935 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2936 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2937 2938 alu.dst.sel = tmp1; 2939 alu.dst.chan = 0; 2940 alu.dst.write = 1; 2941 2942 alu.src[0].sel = tmp0; 2943 alu.src[0].chan = 0; 2944 alu.src[1].sel = tmp0; 2945 alu.src[1].chan = 3; 2946 2947 alu.last = 1; 2948 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2949 return r; 2950 2951 /* 8. tmp1.y = tmp0.x + tmp0.w */ 2952 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2953 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 2954 2955 alu.dst.sel = tmp1; 2956 alu.dst.chan = 1; 2957 alu.dst.write = 1; 2958 2959 alu.src[0].sel = tmp0; 2960 alu.src[0].chan = 0; 2961 alu.src[1].sel = tmp0; 2962 alu.src[1].chan = 3; 2963 2964 alu.last = 1; 2965 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2966 return r; 2967 2968 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 2969 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2970 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 2971 alu.is_op3 = 1; 2972 2973 alu.dst.sel = tmp0; 2974 alu.dst.chan = 0; 2975 alu.dst.write = 1; 2976 2977 alu.src[0].sel = tmp0; 2978 alu.src[0].chan = 1; 2979 alu.src[1].sel = tmp1; 2980 alu.src[1].chan = 1; 2981 alu.src[2].sel = tmp1; 2982 alu.src[2].chan = 0; 2983 2984 alu.last = 1; 2985 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2986 return r; 2987 2988 /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 2989 if (ctx->bc->chip_class == CAYMAN) { 2990 for (j = 0 ; j < 4; j++) { 2991 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2992 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2993 2994 alu.dst.sel = tmp0; 2995 alu.dst.chan = j; 2996 alu.dst.write = (j == 2); 2997 2998 alu.src[0].sel = tmp0; 2999 alu.src[0].chan = 0; 3000 3001 if (signed_op) { 3002 alu.src[1].sel = tmp2; 3003 alu.src[1].chan = 0; 3004 } else { 3005 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3006 } 3007 3008 alu.last = (j == 3); 3009 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3010 return r; 3011 } 3012 } else { 3013 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3014 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 3015 3016 alu.dst.sel = tmp0; 3017 alu.dst.chan = 2; 3018 alu.dst.write = 1; 3019 3020 alu.src[0].sel = tmp0; 3021 alu.src[0].chan = 0; 3022 3023 if (signed_op) { 3024 alu.src[1].sel = tmp2; 3025 alu.src[1].chan = 0; 3026 } else { 3027 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3028 } 3029 3030 alu.last = 1; 3031 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3032 return r; 3033 } 3034 3035 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 3036 if (ctx->bc->chip_class == CAYMAN) { 3037 for (j = 0 ; j < 4; j++) { 3038 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3039 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 3040 3041 alu.dst.sel = tmp0; 3042 alu.dst.chan = j; 3043 alu.dst.write = (j == 1); 3044 3045 if (signed_op) { 3046 alu.src[0].sel = tmp2; 3047 alu.src[0].chan = 1; 3048 } else { 3049 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3050 } 3051 3052 alu.src[1].sel = tmp0; 3053 alu.src[1].chan = 2; 3054 3055 alu.last = (j == 3); 3056 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3057 return r; 3058 } 3059 } else { 3060 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3061 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 3062 3063 alu.dst.sel = tmp0; 3064 alu.dst.chan = 1; 3065 alu.dst.write = 1; 3066 3067 if (signed_op) { 3068 alu.src[0].sel = tmp2; 3069 alu.src[0].chan = 1; 3070 } else { 3071 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3072 } 3073 3074 alu.src[1].sel = tmp0; 3075 alu.src[1].chan = 2; 3076 3077 alu.last = 1; 3078 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3079 return r; 3080 } 3081 3082 /* 12. tmp0.w = src1 - tmp0.y = r */ 3083 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3084 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 3085 3086 alu.dst.sel = tmp0; 3087 alu.dst.chan = 3; 3088 alu.dst.write = 1; 3089 3090 if (signed_op) { 3091 alu.src[0].sel = tmp2; 3092 alu.src[0].chan = 0; 3093 } else { 3094 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3095 } 3096 3097 alu.src[1].sel = tmp0; 3098 alu.src[1].chan = 1; 3099 3100 alu.last = 1; 3101 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3102 return r; 3103 3104 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 3105 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3106 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT); 3107 3108 alu.dst.sel = tmp1; 3109 alu.dst.chan = 0; 3110 alu.dst.write = 1; 3111 3112 alu.src[0].sel = tmp0; 3113 alu.src[0].chan = 3; 3114 if (signed_op) { 3115 alu.src[1].sel = tmp2; 3116 alu.src[1].chan = 1; 3117 } else { 3118 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3119 } 3120 3121 alu.last = 1; 3122 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3123 return r; 3124 3125 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 3126 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3127 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT); 3128 3129 alu.dst.sel = tmp1; 3130 alu.dst.chan = 1; 3131 alu.dst.write = 1; 3132 3133 if (signed_op) { 3134 alu.src[0].sel = tmp2; 3135 alu.src[0].chan = 0; 3136 } else { 3137 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3138 } 3139 3140 alu.src[1].sel = tmp0; 3141 alu.src[1].chan = 1; 3142 3143 alu.last = 1; 3144 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3145 return r; 3146 3147 if (mod) { /* UMOD */ 3148 3149 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 3150 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3151 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 3152 3153 alu.dst.sel = tmp1; 3154 alu.dst.chan = 2; 3155 alu.dst.write = 1; 3156 3157 alu.src[0].sel = tmp0; 3158 alu.src[0].chan = 3; 3159 3160 if (signed_op) { 3161 alu.src[1].sel = tmp2; 3162 alu.src[1].chan = 1; 3163 } else { 3164 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3165 } 3166 3167 alu.last = 1; 3168 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3169 return r; 3170 3171 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 3172 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3173 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 3174 3175 alu.dst.sel = tmp1; 3176 alu.dst.chan = 3; 3177 alu.dst.write = 1; 3178 3179 alu.src[0].sel = tmp0; 3180 alu.src[0].chan = 3; 3181 if (signed_op) { 3182 alu.src[1].sel = tmp2; 3183 alu.src[1].chan = 1; 3184 } else { 3185 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3186 } 3187 3188 alu.last = 1; 3189 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3190 return r; 3191 3192 } else { /* UDIV */ 3193 3194 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 3195 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3196 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 3197 3198 alu.dst.sel = tmp1; 3199 alu.dst.chan = 2; 3200 alu.dst.write = 1; 3201 3202 alu.src[0].sel = tmp0; 3203 alu.src[0].chan = 2; 3204 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 3205 3206 alu.last = 1; 3207 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3208 return r; 3209 3210 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 3211 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3212 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 3213 3214 alu.dst.sel = tmp1; 3215 alu.dst.chan = 3; 3216 alu.dst.write = 1; 3217 3218 alu.src[0].sel = tmp0; 3219 alu.src[0].chan = 2; 3220 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 3221 3222 alu.last = 1; 3223 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3224 return r; 3225 3226 } 3227 3228 /* 17. tmp1.x = tmp1.x & tmp1.y */ 3229 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3230 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT); 3231 3232 alu.dst.sel = tmp1; 3233 alu.dst.chan = 0; 3234 alu.dst.write = 1; 3235 3236 alu.src[0].sel = tmp1; 3237 alu.src[0].chan = 0; 3238 alu.src[1].sel = tmp1; 3239 alu.src[1].chan = 1; 3240 3241 alu.last = 1; 3242 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3243 return r; 3244 3245 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 3246 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 3247 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3248 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 3249 alu.is_op3 = 1; 3250 3251 alu.dst.sel = tmp0; 3252 alu.dst.chan = 2; 3253 alu.dst.write = 1; 3254 3255 alu.src[0].sel = tmp1; 3256 alu.src[0].chan = 0; 3257 alu.src[1].sel = tmp0; 3258 alu.src[1].chan = mod ? 3 : 2; 3259 alu.src[2].sel = tmp1; 3260 alu.src[2].chan = 2; 3261 3262 alu.last = 1; 3263 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3264 return r; 3265 3266 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 3267 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3268 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 3269 alu.is_op3 = 1; 3270 3271 if (signed_op) { 3272 alu.dst.sel = tmp0; 3273 alu.dst.chan = 2; 3274 alu.dst.write = 1; 3275 } else { 3276 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3277 } 3278 3279 alu.src[0].sel = tmp1; 3280 alu.src[0].chan = 1; 3281 alu.src[1].sel = tmp1; 3282 alu.src[1].chan = 3; 3283 alu.src[2].sel = tmp0; 3284 alu.src[2].chan = 2; 3285 3286 alu.last = 1; 3287 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3288 return r; 3289 3290 if (signed_op) { 3291 3292 /* fix the sign of the result */ 3293 3294 if (mod) { 3295 3296 /* tmp0.x = -tmp0.z */ 3297 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3298 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 3299 3300 alu.dst.sel = tmp0; 3301 alu.dst.chan = 0; 3302 alu.dst.write = 1; 3303 3304 alu.src[0].sel = V_SQ_ALU_SRC_0; 3305 alu.src[1].sel = tmp0; 3306 alu.src[1].chan = 2; 3307 3308 alu.last = 1; 3309 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3310 return r; 3311 3312 /* sign of the remainder is the same as the sign of src0 */ 3313 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 3314 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3315 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 3316 alu.is_op3 = 1; 3317 3318 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3319 3320 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3321 alu.src[1].sel = tmp0; 3322 alu.src[1].chan = 2; 3323 alu.src[2].sel = tmp0; 3324 alu.src[2].chan = 0; 3325 3326 alu.last = 1; 3327 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3328 return r; 3329 3330 } else { 3331 3332 /* tmp0.x = -tmp0.z */ 3333 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3334 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 3335 3336 alu.dst.sel = tmp0; 3337 alu.dst.chan = 0; 3338 alu.dst.write = 1; 3339 3340 alu.src[0].sel = V_SQ_ALU_SRC_0; 3341 alu.src[1].sel = tmp0; 3342 alu.src[1].chan = 2; 3343 3344 alu.last = 1; 3345 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3346 return r; 3347 3348 /* fix the quotient sign (same as the sign of src0*src1) */ 3349 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 3350 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3351 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 3352 alu.is_op3 = 1; 3353 3354 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3355 3356 alu.src[0].sel = tmp2; 3357 alu.src[0].chan = 2; 3358 alu.src[1].sel = tmp0; 3359 alu.src[1].chan = 2; 3360 alu.src[2].sel = tmp0; 3361 alu.src[2].chan = 0; 3362 3363 alu.last = 1; 3364 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3365 return r; 3366 } 3367 } 3368 } 3369 return 0; 3370} 3371 3372static int tgsi_udiv(struct r600_shader_ctx *ctx) 3373{ 3374 return tgsi_divmod(ctx, 0, 0); 3375} 3376 3377static int tgsi_umod(struct r600_shader_ctx *ctx) 3378{ 3379 return tgsi_divmod(ctx, 1, 0); 3380} 3381 3382static int tgsi_idiv(struct r600_shader_ctx *ctx) 3383{ 3384 return tgsi_divmod(ctx, 0, 1); 3385} 3386 3387static int tgsi_imod(struct r600_shader_ctx *ctx) 3388{ 3389 return tgsi_divmod(ctx, 1, 1); 3390} 3391 3392 3393static int tgsi_f2i(struct r600_shader_ctx *ctx) 3394{ 3395 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3396 struct r600_bytecode_alu alu; 3397 int i, r; 3398 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3399 int last_inst = tgsi_last_instruction(write_mask); 3400 3401 for (i = 0; i < 4; i++) { 3402 if (!(write_mask & (1<<i))) 3403 continue; 3404 3405 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3406 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC); 3407 3408 alu.dst.sel = ctx->temp_reg; 3409 alu.dst.chan = i; 3410 alu.dst.write = 1; 3411 3412 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3413 if (i == last_inst) 3414 alu.last = 1; 3415 r = r600_bytecode_add_alu(ctx->bc, &alu); 3416 if (r) 3417 return r; 3418 } 3419 3420 for (i = 0; i < 4; i++) { 3421 if (!(write_mask & (1<<i))) 3422 continue; 3423 3424 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3425 alu.inst = ctx->inst_info->r600_opcode; 3426 3427 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3428 3429 alu.src[0].sel = ctx->temp_reg; 3430 alu.src[0].chan = i; 3431 3432 if (i == last_inst || alu.inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT) 3433 alu.last = 1; 3434 r = r600_bytecode_add_alu(ctx->bc, &alu); 3435 if (r) 3436 return r; 3437 } 3438 3439 return 0; 3440} 3441 3442static int tgsi_iabs(struct r600_shader_ctx *ctx) 3443{ 3444 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3445 struct r600_bytecode_alu alu; 3446 int i, r; 3447 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3448 int last_inst = tgsi_last_instruction(write_mask); 3449 3450 /* tmp = -src */ 3451 for (i = 0; i < 4; i++) { 3452 if (!(write_mask & (1<<i))) 3453 continue; 3454 3455 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3456 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 3457 3458 alu.dst.sel = ctx->temp_reg; 3459 alu.dst.chan = i; 3460 alu.dst.write = 1; 3461 3462 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3463 alu.src[0].sel = V_SQ_ALU_SRC_0; 3464 3465 if (i == last_inst) 3466 alu.last = 1; 3467 r = r600_bytecode_add_alu(ctx->bc, &alu); 3468 if (r) 3469 return r; 3470 } 3471 3472 /* dst = (src >= 0 ? src : tmp) */ 3473 for (i = 0; i < 4; i++) { 3474 if (!(write_mask & (1<<i))) 3475 continue; 3476 3477 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3478 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 3479 alu.is_op3 = 1; 3480 alu.dst.write = 1; 3481 3482 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3483 3484 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3485 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3486 alu.src[2].sel = ctx->temp_reg; 3487 alu.src[2].chan = i; 3488 3489 if (i == last_inst) 3490 alu.last = 1; 3491 r = r600_bytecode_add_alu(ctx->bc, &alu); 3492 if (r) 3493 return r; 3494 } 3495 return 0; 3496} 3497 3498static int tgsi_issg(struct r600_shader_ctx *ctx) 3499{ 3500 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3501 struct r600_bytecode_alu alu; 3502 int i, r; 3503 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3504 int last_inst = tgsi_last_instruction(write_mask); 3505 3506 /* tmp = (src >= 0 ? src : -1) */ 3507 for (i = 0; i < 4; i++) { 3508 if (!(write_mask & (1<<i))) 3509 continue; 3510 3511 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3512 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 3513 alu.is_op3 = 1; 3514 3515 alu.dst.sel = ctx->temp_reg; 3516 alu.dst.chan = i; 3517 alu.dst.write = 1; 3518 3519 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3520 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3521 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 3522 3523 if (i == last_inst) 3524 alu.last = 1; 3525 r = r600_bytecode_add_alu(ctx->bc, &alu); 3526 if (r) 3527 return r; 3528 } 3529 3530 /* dst = (tmp > 0 ? 1 : tmp) */ 3531 for (i = 0; i < 4; i++) { 3532 if (!(write_mask & (1<<i))) 3533 continue; 3534 3535 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3536 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT); 3537 alu.is_op3 = 1; 3538 alu.dst.write = 1; 3539 3540 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3541 3542 alu.src[0].sel = ctx->temp_reg; 3543 alu.src[0].chan = i; 3544 3545 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 3546 3547 alu.src[2].sel = ctx->temp_reg; 3548 alu.src[2].chan = i; 3549 3550 if (i == last_inst) 3551 alu.last = 1; 3552 r = r600_bytecode_add_alu(ctx->bc, &alu); 3553 if (r) 3554 return r; 3555 } 3556 return 0; 3557} 3558 3559 3560 3561static int tgsi_ssg(struct r600_shader_ctx *ctx) 3562{ 3563 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3564 struct r600_bytecode_alu alu; 3565 int i, r; 3566 3567 /* tmp = (src > 0 ? 1 : src) */ 3568 for (i = 0; i < 4; i++) { 3569 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3570 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 3571 alu.is_op3 = 1; 3572 3573 alu.dst.sel = ctx->temp_reg; 3574 alu.dst.chan = i; 3575 3576 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3577 alu.src[1].sel = V_SQ_ALU_SRC_1; 3578 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 3579 3580 if (i == 3) 3581 alu.last = 1; 3582 r = r600_bytecode_add_alu(ctx->bc, &alu); 3583 if (r) 3584 return r; 3585 } 3586 3587 /* dst = (-tmp > 0 ? -1 : tmp) */ 3588 for (i = 0; i < 4; i++) { 3589 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3590 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 3591 alu.is_op3 = 1; 3592 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3593 3594 alu.src[0].sel = ctx->temp_reg; 3595 alu.src[0].chan = i; 3596 alu.src[0].neg = 1; 3597 3598 alu.src[1].sel = V_SQ_ALU_SRC_1; 3599 alu.src[1].neg = 1; 3600 3601 alu.src[2].sel = ctx->temp_reg; 3602 alu.src[2].chan = i; 3603 3604 if (i == 3) 3605 alu.last = 1; 3606 r = r600_bytecode_add_alu(ctx->bc, &alu); 3607 if (r) 3608 return r; 3609 } 3610 return 0; 3611} 3612 3613static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 3614{ 3615 struct r600_bytecode_alu alu; 3616 int i, r; 3617 3618 for (i = 0; i < 4; i++) { 3619 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3620 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 3621 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 3622 alu.dst.chan = i; 3623 } else { 3624 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3625 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3626 alu.src[0].sel = ctx->temp_reg; 3627 alu.src[0].chan = i; 3628 } 3629 if (i == 3) { 3630 alu.last = 1; 3631 } 3632 r = r600_bytecode_add_alu(ctx->bc, &alu); 3633 if (r) 3634 return r; 3635 } 3636 return 0; 3637} 3638 3639static int tgsi_op3(struct r600_shader_ctx *ctx) 3640{ 3641 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3642 struct r600_bytecode_alu alu; 3643 int i, j, r; 3644 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3645 3646 for (i = 0; i < lasti + 1; i++) { 3647 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3648 continue; 3649 3650 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3651 alu.inst = ctx->inst_info->r600_opcode; 3652 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3653 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3654 } 3655 3656 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3657 alu.dst.chan = i; 3658 alu.dst.write = 1; 3659 alu.is_op3 = 1; 3660 if (i == lasti) { 3661 alu.last = 1; 3662 } 3663 r = r600_bytecode_add_alu(ctx->bc, &alu); 3664 if (r) 3665 return r; 3666 } 3667 return 0; 3668} 3669 3670static int tgsi_dp(struct r600_shader_ctx *ctx) 3671{ 3672 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3673 struct r600_bytecode_alu alu; 3674 int i, j, r; 3675 3676 for (i = 0; i < 4; i++) { 3677 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3678 alu.inst = ctx->inst_info->r600_opcode; 3679 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3680 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3681 } 3682 3683 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3684 alu.dst.chan = i; 3685 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3686 /* handle some special cases */ 3687 switch (ctx->inst_info->tgsi_opcode) { 3688 case TGSI_OPCODE_DP2: 3689 if (i > 1) { 3690 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 3691 alu.src[0].chan = alu.src[1].chan = 0; 3692 } 3693 break; 3694 case TGSI_OPCODE_DP3: 3695 if (i > 2) { 3696 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 3697 alu.src[0].chan = alu.src[1].chan = 0; 3698 } 3699 break; 3700 case TGSI_OPCODE_DPH: 3701 if (i == 3) { 3702 alu.src[0].sel = V_SQ_ALU_SRC_1; 3703 alu.src[0].chan = 0; 3704 alu.src[0].neg = 0; 3705 } 3706 break; 3707 default: 3708 break; 3709 } 3710 if (i == 3) { 3711 alu.last = 1; 3712 } 3713 r = r600_bytecode_add_alu(ctx->bc, &alu); 3714 if (r) 3715 return r; 3716 } 3717 return 0; 3718} 3719 3720static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 3721 unsigned index) 3722{ 3723 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3724 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 3725 inst->Src[index].Register.File != TGSI_FILE_INPUT && 3726 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || 3727 ctx->src[index].neg || ctx->src[index].abs; 3728} 3729 3730static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 3731 unsigned index) 3732{ 3733 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3734 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 3735} 3736 3737static int tgsi_tex(struct r600_shader_ctx *ctx) 3738{ 3739 static float one_point_five = 1.5f; 3740 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3741 struct r600_bytecode_tex tex; 3742 struct r600_bytecode_alu alu; 3743 unsigned src_gpr; 3744 int r, i, j; 3745 int opcode; 3746 /* Texture fetch instructions can only use gprs as source. 3747 * Also they cannot negate the source or take the absolute value */ 3748 const boolean src_requires_loading = inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && 3749 tgsi_tex_src_requires_loading(ctx, 0); 3750 boolean src_loaded = FALSE; 3751 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; 3752 uint8_t offset_x = 0, offset_y = 0, offset_z = 0; 3753 3754 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 3755 3756 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 3757 /* get offset values */ 3758 if (inst->Texture.NumOffsets) { 3759 assert(inst->Texture.NumOffsets == 1); 3760 3761 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 3762 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 3763 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 3764 } 3765 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 3766 /* TGSI moves the sampler to src reg 3 for TXD */ 3767 sampler_src_reg = 3; 3768 3769 for (i = 1; i < 3; i++) { 3770 /* set gradients h/v */ 3771 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 3772 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : 3773 SQ_TEX_INST_SET_GRADIENTS_V; 3774 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 3775 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 3776 3777 if (tgsi_tex_src_requires_loading(ctx, i)) { 3778 tex.src_gpr = r600_get_temp(ctx); 3779 tex.src_sel_x = 0; 3780 tex.src_sel_y = 1; 3781 tex.src_sel_z = 2; 3782 tex.src_sel_w = 3; 3783 3784 for (j = 0; j < 4; j++) { 3785 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3786 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3787 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 3788 alu.dst.sel = tex.src_gpr; 3789 alu.dst.chan = j; 3790 if (j == 3) 3791 alu.last = 1; 3792 alu.dst.write = 1; 3793 r = r600_bytecode_add_alu(ctx->bc, &alu); 3794 if (r) 3795 return r; 3796 } 3797 3798 } else { 3799 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i); 3800 tex.src_sel_x = ctx->src[i].swizzle[0]; 3801 tex.src_sel_y = ctx->src[i].swizzle[1]; 3802 tex.src_sel_z = ctx->src[i].swizzle[2]; 3803 tex.src_sel_w = ctx->src[i].swizzle[3]; 3804 tex.src_rel = ctx->src[i].rel; 3805 } 3806 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */ 3807 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 3808 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 3809 tex.coord_type_x = 1; 3810 tex.coord_type_y = 1; 3811 tex.coord_type_z = 1; 3812 tex.coord_type_w = 1; 3813 } 3814 r = r600_bytecode_add_tex(ctx->bc, &tex); 3815 if (r) 3816 return r; 3817 } 3818 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 3819 int out_chan; 3820 /* Add perspective divide */ 3821 if (ctx->bc->chip_class == CAYMAN) { 3822 out_chan = 2; 3823 for (i = 0; i < 3; i++) { 3824 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3825 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3826 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 3827 3828 alu.dst.sel = ctx->temp_reg; 3829 alu.dst.chan = i; 3830 if (i == 2) 3831 alu.last = 1; 3832 if (out_chan == i) 3833 alu.dst.write = 1; 3834 r = r600_bytecode_add_alu(ctx->bc, &alu); 3835 if (r) 3836 return r; 3837 } 3838 3839 } else { 3840 out_chan = 3; 3841 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3842 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3843 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 3844 3845 alu.dst.sel = ctx->temp_reg; 3846 alu.dst.chan = out_chan; 3847 alu.last = 1; 3848 alu.dst.write = 1; 3849 r = r600_bytecode_add_alu(ctx->bc, &alu); 3850 if (r) 3851 return r; 3852 } 3853 3854 for (i = 0; i < 3; i++) { 3855 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3856 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 3857 alu.src[0].sel = ctx->temp_reg; 3858 alu.src[0].chan = out_chan; 3859 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3860 alu.dst.sel = ctx->temp_reg; 3861 alu.dst.chan = i; 3862 alu.dst.write = 1; 3863 r = r600_bytecode_add_alu(ctx->bc, &alu); 3864 if (r) 3865 return r; 3866 } 3867 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3868 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3869 alu.src[0].sel = V_SQ_ALU_SRC_1; 3870 alu.src[0].chan = 0; 3871 alu.dst.sel = ctx->temp_reg; 3872 alu.dst.chan = 3; 3873 alu.last = 1; 3874 alu.dst.write = 1; 3875 r = r600_bytecode_add_alu(ctx->bc, &alu); 3876 if (r) 3877 return r; 3878 src_loaded = TRUE; 3879 src_gpr = ctx->temp_reg; 3880 } 3881 3882 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 3883 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) && 3884 inst->Instruction.Opcode != TGSI_OPCODE_TXQ && 3885 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { 3886 3887 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 3888 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 3889 3890 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 3891 for (i = 0; i < 4; i++) { 3892 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3893 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 3894 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 3895 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 3896 alu.dst.sel = ctx->temp_reg; 3897 alu.dst.chan = i; 3898 if (i == 3) 3899 alu.last = 1; 3900 alu.dst.write = 1; 3901 r = r600_bytecode_add_alu(ctx->bc, &alu); 3902 if (r) 3903 return r; 3904 } 3905 3906 /* tmp1.z = RCP_e(|tmp1.z|) */ 3907 if (ctx->bc->chip_class == CAYMAN) { 3908 for (i = 0; i < 3; i++) { 3909 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3910 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3911 alu.src[0].sel = ctx->temp_reg; 3912 alu.src[0].chan = 2; 3913 alu.src[0].abs = 1; 3914 alu.dst.sel = ctx->temp_reg; 3915 alu.dst.chan = i; 3916 if (i == 2) 3917 alu.dst.write = 1; 3918 if (i == 2) 3919 alu.last = 1; 3920 r = r600_bytecode_add_alu(ctx->bc, &alu); 3921 if (r) 3922 return r; 3923 } 3924 } else { 3925 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3926 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3927 alu.src[0].sel = ctx->temp_reg; 3928 alu.src[0].chan = 2; 3929 alu.src[0].abs = 1; 3930 alu.dst.sel = ctx->temp_reg; 3931 alu.dst.chan = 2; 3932 alu.dst.write = 1; 3933 alu.last = 1; 3934 r = r600_bytecode_add_alu(ctx->bc, &alu); 3935 if (r) 3936 return r; 3937 } 3938 3939 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 3940 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 3941 * muladd has no writemask, have to use another temp 3942 */ 3943 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3944 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 3945 alu.is_op3 = 1; 3946 3947 alu.src[0].sel = ctx->temp_reg; 3948 alu.src[0].chan = 0; 3949 alu.src[1].sel = ctx->temp_reg; 3950 alu.src[1].chan = 2; 3951 3952 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 3953 alu.src[2].chan = 0; 3954 alu.src[2].value = *(uint32_t *)&one_point_five; 3955 3956 alu.dst.sel = ctx->temp_reg; 3957 alu.dst.chan = 0; 3958 alu.dst.write = 1; 3959 3960 r = r600_bytecode_add_alu(ctx->bc, &alu); 3961 if (r) 3962 return r; 3963 3964 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3965 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 3966 alu.is_op3 = 1; 3967 3968 alu.src[0].sel = ctx->temp_reg; 3969 alu.src[0].chan = 1; 3970 alu.src[1].sel = ctx->temp_reg; 3971 alu.src[1].chan = 2; 3972 3973 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 3974 alu.src[2].chan = 0; 3975 alu.src[2].value = *(uint32_t *)&one_point_five; 3976 3977 alu.dst.sel = ctx->temp_reg; 3978 alu.dst.chan = 1; 3979 alu.dst.write = 1; 3980 3981 alu.last = 1; 3982 r = r600_bytecode_add_alu(ctx->bc, &alu); 3983 if (r) 3984 return r; 3985 /* write initial W value into Z component */ 3986 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) { 3987 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3988 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3989 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 3990 alu.dst.sel = ctx->temp_reg; 3991 alu.dst.chan = 2; 3992 alu.dst.write = 1; 3993 alu.last = 1; 3994 r = r600_bytecode_add_alu(ctx->bc, &alu); 3995 if (r) 3996 return r; 3997 } 3998 src_loaded = TRUE; 3999 src_gpr = ctx->temp_reg; 4000 } 4001 4002 if (src_requires_loading && !src_loaded) { 4003 for (i = 0; i < 4; i++) { 4004 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4005 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 4006 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4007 alu.dst.sel = ctx->temp_reg; 4008 alu.dst.chan = i; 4009 if (i == 3) 4010 alu.last = 1; 4011 alu.dst.write = 1; 4012 r = r600_bytecode_add_alu(ctx->bc, &alu); 4013 if (r) 4014 return r; 4015 } 4016 src_loaded = TRUE; 4017 src_gpr = ctx->temp_reg; 4018 } 4019 4020 opcode = ctx->inst_info->r600_opcode; 4021 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 4022 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 4023 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 4024 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 4025 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 4026 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) { 4027 switch (opcode) { 4028 case SQ_TEX_INST_SAMPLE: 4029 opcode = SQ_TEX_INST_SAMPLE_C; 4030 break; 4031 case SQ_TEX_INST_SAMPLE_L: 4032 opcode = SQ_TEX_INST_SAMPLE_C_L; 4033 break; 4034 case SQ_TEX_INST_SAMPLE_LB: 4035 opcode = SQ_TEX_INST_SAMPLE_C_LB; 4036 break; 4037 case SQ_TEX_INST_SAMPLE_G: 4038 opcode = SQ_TEX_INST_SAMPLE_C_G; 4039 break; 4040 } 4041 } 4042 4043 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 4044 tex.inst = opcode; 4045 4046 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 4047 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 4048 tex.src_gpr = src_gpr; 4049 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 4050 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 4051 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 4052 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 4053 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 4054 4055 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) { 4056 tex.src_sel_x = 4; 4057 tex.src_sel_y = 4; 4058 tex.src_sel_z = 4; 4059 tex.src_sel_w = 4; 4060 } else if (src_loaded) { 4061 tex.src_sel_x = 0; 4062 tex.src_sel_y = 1; 4063 tex.src_sel_z = 2; 4064 tex.src_sel_w = 3; 4065 } else { 4066 tex.src_sel_x = ctx->src[0].swizzle[0]; 4067 tex.src_sel_y = ctx->src[0].swizzle[1]; 4068 tex.src_sel_z = ctx->src[0].swizzle[2]; 4069 tex.src_sel_w = ctx->src[0].swizzle[3]; 4070 tex.src_rel = ctx->src[0].rel; 4071 } 4072 4073 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 4074 tex.src_sel_x = 1; 4075 tex.src_sel_y = 0; 4076 tex.src_sel_z = 3; 4077 tex.src_sel_w = 1; 4078 } 4079 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) { 4080 tex.src_sel_x = 1; 4081 tex.src_sel_y = 0; 4082 tex.src_sel_z = 3; 4083 tex.src_sel_w = 2; /* route Z compare value into W */ 4084 } 4085 4086 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 4087 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 4088 tex.coord_type_x = 1; 4089 tex.coord_type_y = 1; 4090 } 4091 tex.coord_type_z = 1; 4092 tex.coord_type_w = 1; 4093 4094 tex.offset_x = offset_x; 4095 tex.offset_y = offset_y; 4096 tex.offset_z = offset_z; 4097 4098 /* Put the depth for comparison in W. 4099 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 4100 * Some instructions expect the depth in Z. */ 4101 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 4102 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 4103 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 4104 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 4105 opcode != SQ_TEX_INST_SAMPLE_C_L && 4106 opcode != SQ_TEX_INST_SAMPLE_C_LB) { 4107 tex.src_sel_w = tex.src_sel_z; 4108 } 4109 4110 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 4111 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 4112 if (opcode == SQ_TEX_INST_SAMPLE_C_L || 4113 opcode == SQ_TEX_INST_SAMPLE_C_LB) { 4114 /* the array index is read from Y */ 4115 tex.coord_type_y = 0; 4116 } else { 4117 /* the array index is read from Z */ 4118 tex.coord_type_z = 0; 4119 tex.src_sel_z = tex.src_sel_y; 4120 } 4121 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 4122 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) 4123 /* the array index is read from Z */ 4124 tex.coord_type_z = 0; 4125 4126 r = r600_bytecode_add_tex(ctx->bc, &tex); 4127 if (r) 4128 return r; 4129 4130 /* add shadow ambient support - gallium doesn't do it yet */ 4131 return 0; 4132} 4133 4134static int tgsi_lrp(struct r600_shader_ctx *ctx) 4135{ 4136 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4137 struct r600_bytecode_alu alu; 4138 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4139 unsigned i; 4140 int r; 4141 4142 /* optimize if it's just an equal balance */ 4143 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 4144 for (i = 0; i < lasti + 1; i++) { 4145 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4146 continue; 4147 4148 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4149 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 4150 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4151 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 4152 alu.omod = 3; 4153 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4154 alu.dst.chan = i; 4155 if (i == lasti) { 4156 alu.last = 1; 4157 } 4158 r = r600_bytecode_add_alu(ctx->bc, &alu); 4159 if (r) 4160 return r; 4161 } 4162 return 0; 4163 } 4164 4165 /* 1 - src0 */ 4166 for (i = 0; i < lasti + 1; i++) { 4167 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4168 continue; 4169 4170 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4171 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 4172 alu.src[0].sel = V_SQ_ALU_SRC_1; 4173 alu.src[0].chan = 0; 4174 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4175 r600_bytecode_src_toggle_neg(&alu.src[1]); 4176 alu.dst.sel = ctx->temp_reg; 4177 alu.dst.chan = i; 4178 if (i == lasti) { 4179 alu.last = 1; 4180 } 4181 alu.dst.write = 1; 4182 r = r600_bytecode_add_alu(ctx->bc, &alu); 4183 if (r) 4184 return r; 4185 } 4186 4187 /* (1 - src0) * src2 */ 4188 for (i = 0; i < lasti + 1; i++) { 4189 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4190 continue; 4191 4192 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4193 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 4194 alu.src[0].sel = ctx->temp_reg; 4195 alu.src[0].chan = i; 4196 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 4197 alu.dst.sel = ctx->temp_reg; 4198 alu.dst.chan = i; 4199 if (i == lasti) { 4200 alu.last = 1; 4201 } 4202 alu.dst.write = 1; 4203 r = r600_bytecode_add_alu(ctx->bc, &alu); 4204 if (r) 4205 return r; 4206 } 4207 4208 /* src0 * src1 + (1 - src0) * src2 */ 4209 for (i = 0; i < lasti + 1; i++) { 4210 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4211 continue; 4212 4213 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4214 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 4215 alu.is_op3 = 1; 4216 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4217 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4218 alu.src[2].sel = ctx->temp_reg; 4219 alu.src[2].chan = i; 4220 4221 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4222 alu.dst.chan = i; 4223 if (i == lasti) { 4224 alu.last = 1; 4225 } 4226 r = r600_bytecode_add_alu(ctx->bc, &alu); 4227 if (r) 4228 return r; 4229 } 4230 return 0; 4231} 4232 4233static int tgsi_cmp(struct r600_shader_ctx *ctx) 4234{ 4235 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4236 struct r600_bytecode_alu alu; 4237 int i, r; 4238 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4239 4240 for (i = 0; i < lasti + 1; i++) { 4241 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4242 continue; 4243 4244 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4245 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 4246 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4247 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 4248 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 4249 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4250 alu.dst.chan = i; 4251 alu.dst.write = 1; 4252 alu.is_op3 = 1; 4253 if (i == lasti) 4254 alu.last = 1; 4255 r = r600_bytecode_add_alu(ctx->bc, &alu); 4256 if (r) 4257 return r; 4258 } 4259 return 0; 4260} 4261 4262static int tgsi_xpd(struct r600_shader_ctx *ctx) 4263{ 4264 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4265 static const unsigned int src0_swizzle[] = {2, 0, 1}; 4266 static const unsigned int src1_swizzle[] = {1, 2, 0}; 4267 struct r600_bytecode_alu alu; 4268 uint32_t use_temp = 0; 4269 int i, r; 4270 4271 if (inst->Dst[0].Register.WriteMask != 0xf) 4272 use_temp = 1; 4273 4274 for (i = 0; i < 4; i++) { 4275 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4276 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 4277 if (i < 3) { 4278 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 4279 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 4280 } else { 4281 alu.src[0].sel = V_SQ_ALU_SRC_0; 4282 alu.src[0].chan = i; 4283 alu.src[1].sel = V_SQ_ALU_SRC_0; 4284 alu.src[1].chan = i; 4285 } 4286 4287 alu.dst.sel = ctx->temp_reg; 4288 alu.dst.chan = i; 4289 alu.dst.write = 1; 4290 4291 if (i == 3) 4292 alu.last = 1; 4293 r = r600_bytecode_add_alu(ctx->bc, &alu); 4294 if (r) 4295 return r; 4296 } 4297 4298 for (i = 0; i < 4; i++) { 4299 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4300 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 4301 4302 if (i < 3) { 4303 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 4304 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 4305 } else { 4306 alu.src[0].sel = V_SQ_ALU_SRC_0; 4307 alu.src[0].chan = i; 4308 alu.src[1].sel = V_SQ_ALU_SRC_0; 4309 alu.src[1].chan = i; 4310 } 4311 4312 alu.src[2].sel = ctx->temp_reg; 4313 alu.src[2].neg = 1; 4314 alu.src[2].chan = i; 4315 4316 if (use_temp) 4317 alu.dst.sel = ctx->temp_reg; 4318 else 4319 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4320 alu.dst.chan = i; 4321 alu.dst.write = 1; 4322 alu.is_op3 = 1; 4323 if (i == 3) 4324 alu.last = 1; 4325 r = r600_bytecode_add_alu(ctx->bc, &alu); 4326 if (r) 4327 return r; 4328 } 4329 if (use_temp) 4330 return tgsi_helper_copy(ctx, inst); 4331 return 0; 4332} 4333 4334static int tgsi_exp(struct r600_shader_ctx *ctx) 4335{ 4336 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4337 struct r600_bytecode_alu alu; 4338 int r; 4339 int i; 4340 4341 /* result.x = 2^floor(src); */ 4342 if (inst->Dst[0].Register.WriteMask & 1) { 4343 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4344 4345 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 4346 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4347 4348 alu.dst.sel = ctx->temp_reg; 4349 alu.dst.chan = 0; 4350 alu.dst.write = 1; 4351 alu.last = 1; 4352 r = r600_bytecode_add_alu(ctx->bc, &alu); 4353 if (r) 4354 return r; 4355 4356 if (ctx->bc->chip_class == CAYMAN) { 4357 for (i = 0; i < 3; i++) { 4358 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 4359 alu.src[0].sel = ctx->temp_reg; 4360 alu.src[0].chan = 0; 4361 4362 alu.dst.sel = ctx->temp_reg; 4363 alu.dst.chan = i; 4364 if (i == 0) 4365 alu.dst.write = 1; 4366 if (i == 2) 4367 alu.last = 1; 4368 r = r600_bytecode_add_alu(ctx->bc, &alu); 4369 if (r) 4370 return r; 4371 } 4372 } else { 4373 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 4374 alu.src[0].sel = ctx->temp_reg; 4375 alu.src[0].chan = 0; 4376 4377 alu.dst.sel = ctx->temp_reg; 4378 alu.dst.chan = 0; 4379 alu.dst.write = 1; 4380 alu.last = 1; 4381 r = r600_bytecode_add_alu(ctx->bc, &alu); 4382 if (r) 4383 return r; 4384 } 4385 } 4386 4387 /* result.y = tmp - floor(tmp); */ 4388 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 4389 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4390 4391 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 4392 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4393 4394 alu.dst.sel = ctx->temp_reg; 4395#if 0 4396 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4397 if (r) 4398 return r; 4399#endif 4400 alu.dst.write = 1; 4401 alu.dst.chan = 1; 4402 4403 alu.last = 1; 4404 4405 r = r600_bytecode_add_alu(ctx->bc, &alu); 4406 if (r) 4407 return r; 4408 } 4409 4410 /* result.z = RoughApprox2ToX(tmp);*/ 4411 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 4412 if (ctx->bc->chip_class == CAYMAN) { 4413 for (i = 0; i < 3; i++) { 4414 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4415 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 4416 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4417 4418 alu.dst.sel = ctx->temp_reg; 4419 alu.dst.chan = i; 4420 if (i == 2) { 4421 alu.dst.write = 1; 4422 alu.last = 1; 4423 } 4424 4425 r = r600_bytecode_add_alu(ctx->bc, &alu); 4426 if (r) 4427 return r; 4428 } 4429 } else { 4430 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4431 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 4432 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4433 4434 alu.dst.sel = ctx->temp_reg; 4435 alu.dst.write = 1; 4436 alu.dst.chan = 2; 4437 4438 alu.last = 1; 4439 4440 r = r600_bytecode_add_alu(ctx->bc, &alu); 4441 if (r) 4442 return r; 4443 } 4444 } 4445 4446 /* result.w = 1.0;*/ 4447 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 4448 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4449 4450 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 4451 alu.src[0].sel = V_SQ_ALU_SRC_1; 4452 alu.src[0].chan = 0; 4453 4454 alu.dst.sel = ctx->temp_reg; 4455 alu.dst.chan = 3; 4456 alu.dst.write = 1; 4457 alu.last = 1; 4458 r = r600_bytecode_add_alu(ctx->bc, &alu); 4459 if (r) 4460 return r; 4461 } 4462 return tgsi_helper_copy(ctx, inst); 4463} 4464 4465static int tgsi_log(struct r600_shader_ctx *ctx) 4466{ 4467 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4468 struct r600_bytecode_alu alu; 4469 int r; 4470 int i; 4471 4472 /* result.x = floor(log2(|src|)); */ 4473 if (inst->Dst[0].Register.WriteMask & 1) { 4474 if (ctx->bc->chip_class == CAYMAN) { 4475 for (i = 0; i < 3; i++) { 4476 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4477 4478 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 4479 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4480 r600_bytecode_src_set_abs(&alu.src[0]); 4481 4482 alu.dst.sel = ctx->temp_reg; 4483 alu.dst.chan = i; 4484 if (i == 0) 4485 alu.dst.write = 1; 4486 if (i == 2) 4487 alu.last = 1; 4488 r = r600_bytecode_add_alu(ctx->bc, &alu); 4489 if (r) 4490 return r; 4491 } 4492 4493 } else { 4494 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4495 4496 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 4497 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4498 r600_bytecode_src_set_abs(&alu.src[0]); 4499 4500 alu.dst.sel = ctx->temp_reg; 4501 alu.dst.chan = 0; 4502 alu.dst.write = 1; 4503 alu.last = 1; 4504 r = r600_bytecode_add_alu(ctx->bc, &alu); 4505 if (r) 4506 return r; 4507 } 4508 4509 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 4510 alu.src[0].sel = ctx->temp_reg; 4511 alu.src[0].chan = 0; 4512 4513 alu.dst.sel = ctx->temp_reg; 4514 alu.dst.chan = 0; 4515 alu.dst.write = 1; 4516 alu.last = 1; 4517 4518 r = r600_bytecode_add_alu(ctx->bc, &alu); 4519 if (r) 4520 return r; 4521 } 4522 4523 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 4524 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 4525 4526 if (ctx->bc->chip_class == CAYMAN) { 4527 for (i = 0; i < 3; i++) { 4528 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4529 4530 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 4531 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4532 r600_bytecode_src_set_abs(&alu.src[0]); 4533 4534 alu.dst.sel = ctx->temp_reg; 4535 alu.dst.chan = i; 4536 if (i == 1) 4537 alu.dst.write = 1; 4538 if (i == 2) 4539 alu.last = 1; 4540 4541 r = r600_bytecode_add_alu(ctx->bc, &alu); 4542 if (r) 4543 return r; 4544 } 4545 } else { 4546 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4547 4548 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 4549 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4550 r600_bytecode_src_set_abs(&alu.src[0]); 4551 4552 alu.dst.sel = ctx->temp_reg; 4553 alu.dst.chan = 1; 4554 alu.dst.write = 1; 4555 alu.last = 1; 4556 4557 r = r600_bytecode_add_alu(ctx->bc, &alu); 4558 if (r) 4559 return r; 4560 } 4561 4562 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4563 4564 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 4565 alu.src[0].sel = ctx->temp_reg; 4566 alu.src[0].chan = 1; 4567 4568 alu.dst.sel = ctx->temp_reg; 4569 alu.dst.chan = 1; 4570 alu.dst.write = 1; 4571 alu.last = 1; 4572 4573 r = r600_bytecode_add_alu(ctx->bc, &alu); 4574 if (r) 4575 return r; 4576 4577 if (ctx->bc->chip_class == CAYMAN) { 4578 for (i = 0; i < 3; i++) { 4579 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4580 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 4581 alu.src[0].sel = ctx->temp_reg; 4582 alu.src[0].chan = 1; 4583 4584 alu.dst.sel = ctx->temp_reg; 4585 alu.dst.chan = i; 4586 if (i == 1) 4587 alu.dst.write = 1; 4588 if (i == 2) 4589 alu.last = 1; 4590 4591 r = r600_bytecode_add_alu(ctx->bc, &alu); 4592 if (r) 4593 return r; 4594 } 4595 } else { 4596 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4597 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 4598 alu.src[0].sel = ctx->temp_reg; 4599 alu.src[0].chan = 1; 4600 4601 alu.dst.sel = ctx->temp_reg; 4602 alu.dst.chan = 1; 4603 alu.dst.write = 1; 4604 alu.last = 1; 4605 4606 r = r600_bytecode_add_alu(ctx->bc, &alu); 4607 if (r) 4608 return r; 4609 } 4610 4611 if (ctx->bc->chip_class == CAYMAN) { 4612 for (i = 0; i < 3; i++) { 4613 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4614 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 4615 alu.src[0].sel = ctx->temp_reg; 4616 alu.src[0].chan = 1; 4617 4618 alu.dst.sel = ctx->temp_reg; 4619 alu.dst.chan = i; 4620 if (i == 1) 4621 alu.dst.write = 1; 4622 if (i == 2) 4623 alu.last = 1; 4624 4625 r = r600_bytecode_add_alu(ctx->bc, &alu); 4626 if (r) 4627 return r; 4628 } 4629 } else { 4630 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4631 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 4632 alu.src[0].sel = ctx->temp_reg; 4633 alu.src[0].chan = 1; 4634 4635 alu.dst.sel = ctx->temp_reg; 4636 alu.dst.chan = 1; 4637 alu.dst.write = 1; 4638 alu.last = 1; 4639 4640 r = r600_bytecode_add_alu(ctx->bc, &alu); 4641 if (r) 4642 return r; 4643 } 4644 4645 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4646 4647 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 4648 4649 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4650 r600_bytecode_src_set_abs(&alu.src[0]); 4651 4652 alu.src[1].sel = ctx->temp_reg; 4653 alu.src[1].chan = 1; 4654 4655 alu.dst.sel = ctx->temp_reg; 4656 alu.dst.chan = 1; 4657 alu.dst.write = 1; 4658 alu.last = 1; 4659 4660 r = r600_bytecode_add_alu(ctx->bc, &alu); 4661 if (r) 4662 return r; 4663 } 4664 4665 /* result.z = log2(|src|);*/ 4666 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 4667 if (ctx->bc->chip_class == CAYMAN) { 4668 for (i = 0; i < 3; i++) { 4669 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4670 4671 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 4672 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4673 r600_bytecode_src_set_abs(&alu.src[0]); 4674 4675 alu.dst.sel = ctx->temp_reg; 4676 if (i == 2) 4677 alu.dst.write = 1; 4678 alu.dst.chan = i; 4679 if (i == 2) 4680 alu.last = 1; 4681 4682 r = r600_bytecode_add_alu(ctx->bc, &alu); 4683 if (r) 4684 return r; 4685 } 4686 } else { 4687 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4688 4689 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 4690 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4691 r600_bytecode_src_set_abs(&alu.src[0]); 4692 4693 alu.dst.sel = ctx->temp_reg; 4694 alu.dst.write = 1; 4695 alu.dst.chan = 2; 4696 alu.last = 1; 4697 4698 r = r600_bytecode_add_alu(ctx->bc, &alu); 4699 if (r) 4700 return r; 4701 } 4702 } 4703 4704 /* result.w = 1.0; */ 4705 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 4706 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4707 4708 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 4709 alu.src[0].sel = V_SQ_ALU_SRC_1; 4710 alu.src[0].chan = 0; 4711 4712 alu.dst.sel = ctx->temp_reg; 4713 alu.dst.chan = 3; 4714 alu.dst.write = 1; 4715 alu.last = 1; 4716 4717 r = r600_bytecode_add_alu(ctx->bc, &alu); 4718 if (r) 4719 return r; 4720 } 4721 4722 return tgsi_helper_copy(ctx, inst); 4723} 4724 4725static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 4726{ 4727 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4728 struct r600_bytecode_alu alu; 4729 int r; 4730 4731 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4732 4733 switch (inst->Instruction.Opcode) { 4734 case TGSI_OPCODE_ARL: 4735 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 4736 break; 4737 case TGSI_OPCODE_ARR: 4738 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 4739 break; 4740 case TGSI_OPCODE_UARL: 4741 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 4742 break; 4743 default: 4744 assert(0); 4745 return -1; 4746 } 4747 4748 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4749 alu.last = 1; 4750 alu.dst.sel = ctx->bc->ar_reg; 4751 alu.dst.write = 1; 4752 r = r600_bytecode_add_alu(ctx->bc, &alu); 4753 if (r) 4754 return r; 4755 4756 ctx->bc->ar_loaded = 0; 4757 return 0; 4758} 4759static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 4760{ 4761 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4762 struct r600_bytecode_alu alu; 4763 int r; 4764 4765 switch (inst->Instruction.Opcode) { 4766 case TGSI_OPCODE_ARL: 4767 memset(&alu, 0, sizeof(alu)); 4768 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 4769 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4770 alu.dst.sel = ctx->bc->ar_reg; 4771 alu.dst.write = 1; 4772 alu.last = 1; 4773 4774 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4775 return r; 4776 4777 memset(&alu, 0, sizeof(alu)); 4778 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 4779 alu.src[0].sel = ctx->bc->ar_reg; 4780 alu.dst.sel = ctx->bc->ar_reg; 4781 alu.dst.write = 1; 4782 alu.last = 1; 4783 4784 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4785 return r; 4786 break; 4787 case TGSI_OPCODE_ARR: 4788 memset(&alu, 0, sizeof(alu)); 4789 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 4790 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4791 alu.dst.sel = ctx->bc->ar_reg; 4792 alu.dst.write = 1; 4793 alu.last = 1; 4794 4795 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4796 return r; 4797 break; 4798 case TGSI_OPCODE_UARL: 4799 memset(&alu, 0, sizeof(alu)); 4800 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 4801 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4802 alu.dst.sel = ctx->bc->ar_reg; 4803 alu.dst.write = 1; 4804 alu.last = 1; 4805 4806 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4807 return r; 4808 break; 4809 default: 4810 assert(0); 4811 return -1; 4812 } 4813 4814 ctx->bc->ar_loaded = 0; 4815 return 0; 4816} 4817 4818static int tgsi_opdst(struct r600_shader_ctx *ctx) 4819{ 4820 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4821 struct r600_bytecode_alu alu; 4822 int i, r = 0; 4823 4824 for (i = 0; i < 4; i++) { 4825 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4826 4827 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 4828 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4829 4830 if (i == 0 || i == 3) { 4831 alu.src[0].sel = V_SQ_ALU_SRC_1; 4832 } else { 4833 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4834 } 4835 4836 if (i == 0 || i == 2) { 4837 alu.src[1].sel = V_SQ_ALU_SRC_1; 4838 } else { 4839 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4840 } 4841 if (i == 3) 4842 alu.last = 1; 4843 r = r600_bytecode_add_alu(ctx->bc, &alu); 4844 if (r) 4845 return r; 4846 } 4847 return 0; 4848} 4849 4850static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 4851{ 4852 struct r600_bytecode_alu alu; 4853 int r; 4854 4855 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4856 alu.inst = opcode; 4857 alu.predicate = 1; 4858 4859 alu.dst.sel = ctx->temp_reg; 4860 alu.dst.write = 1; 4861 alu.dst.chan = 0; 4862 4863 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4864 alu.src[1].sel = V_SQ_ALU_SRC_0; 4865 alu.src[1].chan = 0; 4866 4867 alu.last = 1; 4868 4869 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 4870 if (r) 4871 return r; 4872 return 0; 4873} 4874 4875static int pops(struct r600_shader_ctx *ctx, int pops) 4876{ 4877 unsigned force_pop = ctx->bc->force_add_cf; 4878 4879 if (!force_pop) { 4880 int alu_pop = 3; 4881 if (ctx->bc->cf_last) { 4882 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)) 4883 alu_pop = 0; 4884 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER)) 4885 alu_pop = 1; 4886 } 4887 alu_pop += pops; 4888 if (alu_pop == 1) { 4889 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER); 4890 ctx->bc->force_add_cf = 1; 4891 } else if (alu_pop == 2) { 4892 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER); 4893 ctx->bc->force_add_cf = 1; 4894 } else { 4895 force_pop = 1; 4896 } 4897 } 4898 4899 if (force_pop) { 4900 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 4901 ctx->bc->cf_last->pop_count = pops; 4902 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 4903 } 4904 4905 return 0; 4906} 4907 4908static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 4909{ 4910 switch(reason) { 4911 case FC_PUSH_VPM: 4912 ctx->bc->callstack[ctx->bc->call_sp].current--; 4913 break; 4914 case FC_PUSH_WQM: 4915 case FC_LOOP: 4916 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 4917 break; 4918 case FC_REP: 4919 /* TOODO : for 16 vp asic should -= 2; */ 4920 ctx->bc->callstack[ctx->bc->call_sp].current --; 4921 break; 4922 } 4923} 4924 4925static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 4926{ 4927 if (check_max_only) { 4928 int diff; 4929 switch (reason) { 4930 case FC_PUSH_VPM: 4931 diff = 1; 4932 break; 4933 case FC_PUSH_WQM: 4934 diff = 4; 4935 break; 4936 default: 4937 assert(0); 4938 diff = 0; 4939 } 4940 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 4941 ctx->bc->callstack[ctx->bc->call_sp].max) { 4942 ctx->bc->callstack[ctx->bc->call_sp].max = 4943 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 4944 } 4945 return; 4946 } 4947 switch (reason) { 4948 case FC_PUSH_VPM: 4949 ctx->bc->callstack[ctx->bc->call_sp].current++; 4950 break; 4951 case FC_PUSH_WQM: 4952 case FC_LOOP: 4953 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 4954 break; 4955 case FC_REP: 4956 ctx->bc->callstack[ctx->bc->call_sp].current++; 4957 break; 4958 } 4959 4960 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 4961 ctx->bc->callstack[ctx->bc->call_sp].max) { 4962 ctx->bc->callstack[ctx->bc->call_sp].max = 4963 ctx->bc->callstack[ctx->bc->call_sp].current; 4964 } 4965} 4966 4967static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 4968{ 4969 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 4970 4971 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid, 4972 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 4973 sp->mid[sp->num_mid] = ctx->bc->cf_last; 4974 sp->num_mid++; 4975} 4976 4977static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 4978{ 4979 ctx->bc->fc_sp++; 4980 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 4981 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 4982} 4983 4984static void fc_poplevel(struct r600_shader_ctx *ctx) 4985{ 4986 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 4987 if (sp->mid) { 4988 free(sp->mid); 4989 sp->mid = NULL; 4990 } 4991 sp->num_mid = 0; 4992 sp->start = NULL; 4993 sp->type = 0; 4994 ctx->bc->fc_sp--; 4995} 4996 4997#if 0 4998static int emit_return(struct r600_shader_ctx *ctx) 4999{ 5000 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); 5001 return 0; 5002} 5003 5004static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 5005{ 5006 5007 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 5008 ctx->bc->cf_last->pop_count = pops; 5009 /* XXX work out offset */ 5010 return 0; 5011} 5012 5013static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 5014{ 5015 return 0; 5016} 5017 5018static void emit_testflag(struct r600_shader_ctx *ctx) 5019{ 5020 5021} 5022 5023static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 5024{ 5025 emit_testflag(ctx); 5026 emit_jump_to_offset(ctx, 1, 4); 5027 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 5028 pops(ctx, ifidx + 1); 5029 emit_return(ctx); 5030} 5031 5032static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 5033{ 5034 emit_testflag(ctx); 5035 5036 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 5037 ctx->bc->cf_last->pop_count = 1; 5038 5039 fc_set_mid(ctx, fc_sp); 5040 5041 pops(ctx, 1); 5042} 5043#endif 5044 5045static int tgsi_if(struct r600_shader_ctx *ctx) 5046{ 5047 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)); 5048 5049 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 5050 5051 fc_pushlevel(ctx, FC_IF); 5052 5053 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 5054 return 0; 5055} 5056 5057static int tgsi_else(struct r600_shader_ctx *ctx) 5058{ 5059 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 5060 ctx->bc->cf_last->pop_count = 1; 5061 5062 fc_set_mid(ctx, ctx->bc->fc_sp); 5063 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 5064 return 0; 5065} 5066 5067static int tgsi_endif(struct r600_shader_ctx *ctx) 5068{ 5069 pops(ctx, 1); 5070 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 5071 R600_ERR("if/endif unbalanced in shader\n"); 5072 return -1; 5073 } 5074 5075 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 5076 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 5077 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 5078 } else { 5079 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 5080 } 5081 fc_poplevel(ctx); 5082 5083 callstack_decrease_current(ctx, FC_PUSH_VPM); 5084 return 0; 5085} 5086 5087static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 5088{ 5089 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 5090 5091 fc_pushlevel(ctx, FC_LOOP); 5092 5093 /* check stack depth */ 5094 callstack_check_depth(ctx, FC_LOOP, 0); 5095 return 0; 5096} 5097 5098static int tgsi_endloop(struct r600_shader_ctx *ctx) 5099{ 5100 int i; 5101 5102 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 5103 5104 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 5105 R600_ERR("loop/endloop in shader code are not paired.\n"); 5106 return -EINVAL; 5107 } 5108 5109 /* fixup loop pointers - from r600isa 5110 LOOP END points to CF after LOOP START, 5111 LOOP START point to CF after LOOP END 5112 BRK/CONT point to LOOP END CF 5113 */ 5114 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 5115 5116 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 5117 5118 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 5119 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 5120 } 5121 /* XXX add LOOPRET support */ 5122 fc_poplevel(ctx); 5123 callstack_decrease_current(ctx, FC_LOOP); 5124 return 0; 5125} 5126 5127static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 5128{ 5129 unsigned int fscp; 5130 5131 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 5132 { 5133 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 5134 break; 5135 } 5136 5137 if (fscp == 0) { 5138 R600_ERR("Break not inside loop/endloop pair\n"); 5139 return -EINVAL; 5140 } 5141 5142 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 5143 5144 fc_set_mid(ctx, fscp); 5145 5146 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 5147 return 0; 5148} 5149 5150static int tgsi_umad(struct r600_shader_ctx *ctx) 5151{ 5152 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5153 struct r600_bytecode_alu alu; 5154 int i, j, r; 5155 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5156 5157 /* src0 * src1 */ 5158 for (i = 0; i < lasti + 1; i++) { 5159 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5160 continue; 5161 5162 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5163 5164 alu.dst.chan = i; 5165 alu.dst.sel = ctx->temp_reg; 5166 alu.dst.write = 1; 5167 5168 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 5169 for (j = 0; j < 2; j++) { 5170 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 5171 } 5172 5173 alu.last = 1; 5174 r = r600_bytecode_add_alu(ctx->bc, &alu); 5175 if (r) 5176 return r; 5177 } 5178 5179 5180 for (i = 0; i < lasti + 1; i++) { 5181 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5182 continue; 5183 5184 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5185 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5186 5187 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 5188 5189 alu.src[0].sel = ctx->temp_reg; 5190 alu.src[0].chan = i; 5191 5192 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 5193 if (i == lasti) { 5194 alu.last = 1; 5195 } 5196 r = r600_bytecode_add_alu(ctx->bc, &alu); 5197 if (r) 5198 return r; 5199 } 5200 return 0; 5201} 5202 5203static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 5204 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 5205 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 5206 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 5207 5208 /* XXX: 5209 * For state trackers other than OpenGL, we'll want to use 5210 * _RECIP_IEEE instead. 5211 */ 5212 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 5213 5214 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 5215 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 5216 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 5217 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 5218 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 5219 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5220 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5221 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 5222 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 5223 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 5224 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 5225 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 5226 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 5227 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 5228 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 5229 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5230 /* gap */ 5231 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5232 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5233 /* gap */ 5234 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5235 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5236 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 5237 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5238 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 5239 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 5240 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 5241 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 5242 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 5243 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 5244 /* gap */ 5245 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5246 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 5247 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5248 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5249 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 5250 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 5251 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 5252 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 5253 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5254 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5255 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5256 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5257 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5258 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 5259 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5260 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 5261 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 5262 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 5263 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 5264 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5265 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 5266 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 5267 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 5268 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5269 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5270 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5271 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5272 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5273 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5274 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 5275 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5276 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5277 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5278 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 5279 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 5280 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 5281 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 5282 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5283 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5284 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5285 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 5286 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 5287 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 5288 /* gap */ 5289 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5290 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5291 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 5292 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 5293 /* gap */ 5294 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5295 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5296 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5297 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5298 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, 5299 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, 5300 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 5301 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 5302 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans}, 5303 /* gap */ 5304 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5305 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 5306 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 5307 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod}, 5308 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 5309 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5310 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 5311 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 5312 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 5313 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5314 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5315 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 5316 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5317 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 5318 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5319 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 5320 /* gap */ 5321 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5322 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5323 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5324 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5325 /* gap */ 5326 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5327 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5328 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5329 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5330 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5331 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5332 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5333 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5334 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 5335 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 5336 /* gap */ 5337 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5338 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans}, 5339 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 5340 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 5341 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 5342 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, 5343 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 5344 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans}, 5345 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 5346 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2}, 5347 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans}, 5348 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 5349 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv}, 5350 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 5351 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 5352 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 5353 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod}, 5354 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans}, 5355 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 5356 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 5357 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans}, 5358 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap}, 5359 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap}, 5360 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5361 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5362 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5363 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5364 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 5365 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, 5366 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, 5367 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 5368 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 5369 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 5370 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 5371 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 5372 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 5373 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, 5374 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 5375 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 5376 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl}, 5377 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 5378 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 5379 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 5380 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5381}; 5382 5383static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 5384 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 5385 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 5386 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 5387 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 5388 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, 5389 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 5390 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 5391 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 5392 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 5393 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5394 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5395 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 5396 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 5397 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 5398 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 5399 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 5400 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 5401 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 5402 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 5403 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5404 /* gap */ 5405 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5406 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5407 /* gap */ 5408 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5409 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5410 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 5411 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5412 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 5413 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 5414 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 5415 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 5416 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 5417 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 5418 /* gap */ 5419 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5420 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 5421 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5422 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5423 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 5424 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 5425 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 5426 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 5427 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5428 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5429 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5430 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5431 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5432 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 5433 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5434 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 5435 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 5436 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 5437 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 5438 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5439 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 5440 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 5441 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 5442 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5443 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5444 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5445 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5446 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5447 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5448 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 5449 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5450 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5451 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5452 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 5453 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 5454 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 5455 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 5456 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5457 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5458 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5459 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 5460 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 5461 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 5462 /* gap */ 5463 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5464 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5465 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 5466 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 5467 /* gap */ 5468 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5469 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5470 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5471 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5472 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, 5473 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, 5474 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 5475 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 5476 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2}, 5477 /* gap */ 5478 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5479 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 5480 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 5481 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod}, 5482 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 5483 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5484 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 5485 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 5486 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 5487 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5488 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5489 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 5490 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5491 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 5492 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5493 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 5494 /* gap */ 5495 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5496 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5497 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5498 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5499 /* gap */ 5500 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5501 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5502 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5503 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5504 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5505 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5506 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5507 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5508 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 5509 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 5510 /* gap */ 5511 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5512 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i}, 5513 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 5514 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 5515 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 5516 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, 5517 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 5518 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2}, 5519 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 5520 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i}, 5521 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans}, 5522 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 5523 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv}, 5524 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 5525 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 5526 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 5527 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod}, 5528 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans}, 5529 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 5530 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 5531 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2}, 5532 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap}, 5533 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2}, 5534 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5535 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5536 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5537 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5538 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 5539 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, 5540 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, 5541 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 5542 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 5543 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 5544 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 5545 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 5546 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 5547 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, 5548 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 5549 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 5550 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, 5551 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 5552 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 5553 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 5554 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5555}; 5556 5557static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 5558 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 5559 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 5560 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 5561 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr}, 5562 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr}, 5563 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 5564 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 5565 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 5566 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 5567 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5568 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5569 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 5570 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 5571 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 5572 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 5573 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 5574 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 5575 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 5576 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 5577 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5578 /* gap */ 5579 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5580 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5581 /* gap */ 5582 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5583 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5584 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 5585 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5586 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 5587 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 5588 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, 5589 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, 5590 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, 5591 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 5592 /* gap */ 5593 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5594 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 5595 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5596 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5597 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig}, 5598 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 5599 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 5600 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 5601 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5602 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5603 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5604 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5605 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5606 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 5607 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5608 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 5609 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig}, 5610 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 5611 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 5612 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5613 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 5614 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 5615 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 5616 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5617 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5618 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5619 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5620 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5621 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5622 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 5623 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5624 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5625 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5626 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 5627 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 5628 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 5629 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 5630 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5631 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5632 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5633 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 5634 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 5635 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 5636 /* gap */ 5637 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5638 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5639 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 5640 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 5641 /* gap */ 5642 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5643 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5644 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5645 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5646 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, 5647 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2}, 5648 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 5649 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 5650 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2}, 5651 /* gap */ 5652 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5653 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 5654 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 5655 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod}, 5656 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 5657 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5658 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 5659 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 5660 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 5661 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5662 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5663 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 5664 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5665 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 5666 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5667 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 5668 /* gap */ 5669 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5670 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5671 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5672 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5673 /* gap */ 5674 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5675 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5676 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5677 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5678 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5679 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5680 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5681 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5682 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 5683 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 5684 /* gap */ 5685 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5686 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2}, 5687 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 5688 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 5689 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 5690 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, 5691 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 5692 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2}, 5693 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 5694 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2}, 5695 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2}, 5696 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 5697 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv}, 5698 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 5699 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 5700 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 5701 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod}, 5702 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, cayman_mul_int_instr}, 5703 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 5704 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 5705 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2}, 5706 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap}, 5707 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2}, 5708 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5709 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5710 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5711 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5712 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 5713 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, 5714 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, 5715 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 5716 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 5717 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 5718 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 5719 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 5720 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 5721 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, 5722 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 5723 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 5724 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, 5725 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 5726 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 5727 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 5728 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5729}; 5730