r600_shader.c revision 7d532800d8be5ce31731658564691ae9cdaacf7a
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "r600_sq.h" 24#include "r600_llvm.h" 25#include "r600_formats.h" 26#include "r600_opcodes.h" 27#include "r600_shader.h" 28#include "r600d.h" 29 30#include "pipe/p_shader_tokens.h" 31#include "tgsi/tgsi_info.h" 32#include "tgsi/tgsi_parse.h" 33#include "tgsi/tgsi_scan.h" 34#include "tgsi/tgsi_dump.h" 35#include "util/u_memory.h" 36#include <stdio.h> 37#include <errno.h> 38#include <byteswap.h> 39 40/* CAYMAN notes 41Why CAYMAN got loops for lots of instructions is explained here. 42 43-These 8xx t-slot only ops are implemented in all vector slots. 44MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 45These 8xx t-slot only opcodes become vector ops, with all four 46slots expecting the arguments on sources a and b. Result is 47broadcast to all channels. 48MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT 49These 8xx t-slot only opcodes become vector ops in the z, y, and 50x slots. 51EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 52RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 53SQRT_IEEE/_64 54SIN/COS 55The w slot may have an independent co-issued operation, or if the 56result is required to be in the w slot, the opcode above may be 57issued in the w slot as well. 58The compiler must issue the source argument to slots z, y, and x 59*/ 60 61static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 62{ 63 struct r600_context *rctx = (struct r600_context *)ctx; 64 struct r600_shader *rshader = &shader->shader; 65 uint32_t *ptr; 66 int i; 67 68 /* copy new shader */ 69 if (shader->bo == NULL) { 70 shader->bo = (struct r600_resource*) 71 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4); 72 if (shader->bo == NULL) { 73 return -ENOMEM; 74 } 75 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); 76 if (R600_BIG_ENDIAN) { 77 for (i = 0; i < rshader->bc.ndw; ++i) { 78 ptr[i] = bswap_32(rshader->bc.bytecode[i]); 79 } 80 } else { 81 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr)); 82 } 83 rctx->ws->buffer_unmap(shader->bo->cs_buf); 84 } 85 /* build state */ 86 switch (rshader->processor_type) { 87 case TGSI_PROCESSOR_VERTEX: 88 if (rctx->chip_class >= EVERGREEN) { 89 evergreen_pipe_shader_vs(ctx, shader); 90 } else { 91 r600_pipe_shader_vs(ctx, shader); 92 } 93 break; 94 case TGSI_PROCESSOR_FRAGMENT: 95 if (rctx->chip_class >= EVERGREEN) { 96 evergreen_pipe_shader_ps(ctx, shader); 97 } else { 98 r600_pipe_shader_ps(ctx, shader); 99 } 100 break; 101 default: 102 return -EINVAL; 103 } 104 return 0; 105} 106 107static int r600_shader_from_tgsi(struct r600_screen *rscreen, 108 struct r600_pipe_shader *pipeshader, 109 struct r600_shader_key key); 110 111static void r600_dump_streamout(struct pipe_stream_output_info *so) 112{ 113 unsigned i; 114 115 fprintf(stderr, "STREAMOUT\n"); 116 for (i = 0; i < so->num_outputs; i++) { 117 unsigned mask = ((1 << so->output[i].num_components) - 1) << 118 so->output[i].start_component; 119 fprintf(stderr, " %i: MEM_STREAM0_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n", 120 i, so->output[i].output_buffer, 121 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, 122 so->output[i].register_index, 123 mask & 1 ? "x" : "", 124 mask & 2 ? "y" : "", 125 mask & 4 ? "z" : "", 126 mask & 8 ? "w" : "", 127 so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : ""); 128 } 129} 130 131int r600_pipe_shader_create(struct pipe_context *ctx, 132 struct r600_pipe_shader *shader, 133 struct r600_shader_key key) 134{ 135 static int dump_shaders = -1; 136 struct r600_context *rctx = (struct r600_context *)ctx; 137 struct r600_pipe_shader_selector *sel = shader->selector; 138 int r; 139 140 /* Would like some magic "get_bool_option_once" routine. 141 */ 142 if (dump_shaders == -1) 143 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 144 145 if (dump_shaders) { 146 fprintf(stderr, "--------------------------------------------------------------\n"); 147 tgsi_dump(sel->tokens, 0); 148 149 if (sel->so.num_outputs) { 150 r600_dump_streamout(&sel->so); 151 } 152 } 153 r = r600_shader_from_tgsi(rctx->screen, shader, key); 154 if (r) { 155 R600_ERR("translation from TGSI failed !\n"); 156 return r; 157 } 158 r = r600_bytecode_build(&shader->shader.bc); 159 if (r) { 160 R600_ERR("building bytecode failed !\n"); 161 return r; 162 } 163 if (dump_shaders) { 164 r600_bytecode_dump(&shader->shader.bc); 165 fprintf(stderr, "______________________________________________________________\n"); 166 } 167 return r600_pipe_shader(ctx, shader); 168} 169 170void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 171{ 172 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 173 r600_bytecode_clear(&shader->shader.bc); 174} 175 176/* 177 * tgsi -> r600 shader 178 */ 179struct r600_shader_tgsi_instruction; 180 181struct r600_shader_src { 182 unsigned sel; 183 unsigned swizzle[4]; 184 unsigned neg; 185 unsigned abs; 186 unsigned rel; 187 unsigned kc_bank; 188 uint32_t value[4]; 189}; 190 191struct r600_shader_ctx { 192 struct tgsi_shader_info info; 193 struct tgsi_parse_context parse; 194 const struct tgsi_token *tokens; 195 unsigned type; 196 unsigned file_offset[TGSI_FILE_COUNT]; 197 unsigned temp_reg; 198 struct r600_shader_tgsi_instruction *inst_info; 199 struct r600_bytecode *bc; 200 struct r600_shader *shader; 201 struct r600_shader_src src[4]; 202 uint32_t *literals; 203 uint32_t nliterals; 204 uint32_t max_driver_temp_used; 205 boolean use_llvm; 206 /* needed for evergreen interpolation */ 207 boolean input_centroid; 208 boolean input_linear; 209 boolean input_perspective; 210 int num_interp_gpr; 211 int face_gpr; 212 int colors_used; 213 boolean clip_vertex_write; 214 unsigned cv_output; 215 int fragcoord_input; 216 int native_integers; 217}; 218 219struct r600_shader_tgsi_instruction { 220 unsigned tgsi_opcode; 221 unsigned is_op3; 222 unsigned r600_opcode; 223 int (*process)(struct r600_shader_ctx *ctx); 224}; 225 226static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 227static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 228static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only); 229static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); 230static int tgsi_else(struct r600_shader_ctx *ctx); 231static int tgsi_endif(struct r600_shader_ctx *ctx); 232static int tgsi_bgnloop(struct r600_shader_ctx *ctx); 233static int tgsi_endloop(struct r600_shader_ctx *ctx); 234static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); 235 236/* 237 * bytestream -> r600 shader 238 * 239 * These functions are used to transform the output of the LLVM backend into 240 * struct r600_bytecode. 241 */ 242 243static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, 244 unsigned char * bytes, unsigned num_bytes); 245 246#ifdef HAVE_OPENCL 247int r600_compute_shader_create(struct pipe_context * ctx, 248 LLVMModuleRef mod, struct r600_bytecode * bytecode) 249{ 250 struct r600_context *r600_ctx = (struct r600_context *)ctx; 251 unsigned char * bytes; 252 unsigned byte_count; 253 struct r600_shader_ctx shader_ctx; 254 unsigned dump = 0; 255 256 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) { 257 dump = 1; 258 } 259 260 r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump); 261 shader_ctx.bc = bytecode; 262 r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family, 263 r600_ctx->screen->msaa_texture_support); 264 shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE; 265 r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count); 266 if (shader_ctx.bc->chip_class == CAYMAN) { 267 cm_bytecode_add_cf_end(shader_ctx.bc); 268 } 269 r600_bytecode_build(shader_ctx.bc); 270 if (dump) { 271 r600_bytecode_dump(shader_ctx.bc); 272 } 273 free(bytes); 274 return 1; 275} 276 277#endif /* HAVE_OPENCL */ 278 279static uint32_t i32_from_byte_stream(unsigned char * bytes, 280 unsigned * bytes_read) 281{ 282 unsigned i; 283 uint32_t out = 0; 284 for (i = 0; i < 4; i++) { 285 out |= bytes[(*bytes_read)++] << (8 * i); 286 } 287 return out; 288} 289 290static unsigned r600_src_from_byte_stream(unsigned char * bytes, 291 unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx) 292{ 293 unsigned i; 294 unsigned sel0, sel1; 295 sel0 = bytes[bytes_read++]; 296 sel1 = bytes[bytes_read++]; 297 alu->src[src_idx].sel = sel0 | (sel1 << 8); 298 alu->src[src_idx].chan = bytes[bytes_read++]; 299 alu->src[src_idx].neg = bytes[bytes_read++]; 300 alu->src[src_idx].abs = bytes[bytes_read++]; 301 alu->src[src_idx].rel = bytes[bytes_read++]; 302 alu->src[src_idx].kc_bank = bytes[bytes_read++]; 303 for (i = 0; i < 4; i++) { 304 alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8); 305 } 306 return bytes_read; 307} 308 309static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx, 310 unsigned char * bytes, unsigned bytes_read) 311{ 312 unsigned src_idx, src_num; 313 struct r600_bytecode_alu alu; 314 unsigned src_use_sel[3]; 315 unsigned src_sel[3] = {}; 316 uint32_t word0, word1; 317 318 src_num = bytes[bytes_read++]; 319 320 memset(&alu, 0, sizeof(alu)); 321 for(src_idx = 0; src_idx < src_num; src_idx++) { 322 unsigned i; 323 src_use_sel[src_idx] = bytes[bytes_read++]; 324 for (i = 0; i < 4; i++) { 325 src_sel[src_idx] |= bytes[bytes_read++] << (i * 8); 326 } 327 for (i = 0; i < 4; i++) { 328 alu.src[src_idx].value |= bytes[bytes_read++] << (i * 8); 329 } 330 } 331 332 word0 = i32_from_byte_stream(bytes, &bytes_read); 333 word1 = i32_from_byte_stream(bytes, &bytes_read); 334 335 switch(ctx->bc->chip_class) { 336 default: 337 case R600: 338 r600_bytecode_alu_read(&alu, word0, word1); 339 break; 340 case R700: 341 case EVERGREEN: 342 case CAYMAN: 343 r700_bytecode_alu_read(&alu, word0, word1); 344 break; 345 } 346 347 for(src_idx = 0; src_idx < src_num; src_idx++) { 348 if (src_use_sel[src_idx]) { 349 unsigned sel = src_sel[src_idx]; 350 351 alu.src[src_idx].chan = sel & 3; 352 sel >>= 2; 353 354 if (sel>=512) { /* constant */ 355 sel -= 512; 356 alu.src[src_idx].kc_bank = sel >> 12; 357 alu.src[src_idx].sel = (sel & 4095) + 512; 358 } 359 else { 360 alu.src[src_idx].sel = sel; 361 } 362 } 363 } 364 365#if HAVE_LLVM < 0x0302 366 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE) || 367 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE) || 368 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT) || 369 alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)) { 370 alu.update_pred = 1; 371 alu.dst.write = 0; 372 alu.src[1].sel = V_SQ_ALU_SRC_0; 373 alu.src[1].chan = 0; 374 alu.last = 1; 375 } 376#endif 377 378 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT)) { 379 ctx->bc->ar_reg = alu.src[0].sel; 380 ctx->bc->ar_loaded = 0; 381 return bytes_read; 382 } 383 384 if (alu.execute_mask) { 385 alu.pred_sel = 0; 386 r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 387 } else { 388 r600_bytecode_add_alu(ctx->bc, &alu); 389 } 390 391 /* XXX: Handle other KILL instructions */ 392 if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) { 393 ctx->shader->uses_kill = 1; 394 /* XXX: This should be enforced in the LLVM backend. */ 395 ctx->bc->force_add_cf = 1; 396 } 397 return bytes_read; 398} 399 400static void llvm_if(struct r600_shader_ctx *ctx) 401{ 402 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 403 fc_pushlevel(ctx, FC_IF); 404 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 405} 406 407static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx) 408{ 409 unsigned opcode = TGSI_OPCODE_BRK; 410 if (ctx->bc->chip_class == CAYMAN) 411 ctx->inst_info = &cm_shader_tgsi_instruction[opcode]; 412 else if (ctx->bc->chip_class >= EVERGREEN) 413 ctx->inst_info = &eg_shader_tgsi_instruction[opcode]; 414 else 415 ctx->inst_info = &r600_shader_tgsi_instruction[opcode]; 416 llvm_if(ctx); 417 tgsi_loop_brk_cont(ctx); 418 tgsi_endif(ctx); 419} 420 421static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx, 422 unsigned char * bytes, unsigned bytes_read) 423{ 424 struct r600_bytecode_alu alu; 425 unsigned inst; 426 memset(&alu, 0, sizeof(alu)); 427 bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0); 428 inst = bytes[bytes_read++]; 429 switch (inst) { 430 case 0: /* IF_PREDICATED */ 431 llvm_if(ctx); 432 break; 433 case 1: /* ELSE */ 434 tgsi_else(ctx); 435 break; 436 case 2: /* ENDIF */ 437 tgsi_endif(ctx); 438 break; 439 case 3: /* BGNLOOP */ 440 tgsi_bgnloop(ctx); 441 break; 442 case 4: /* ENDLOOP */ 443 tgsi_endloop(ctx); 444 break; 445 case 5: /* PREDICATED_BREAK */ 446 r600_break_from_byte_stream(ctx); 447 break; 448 case 6: /* CONTINUE */ 449 { 450 unsigned opcode = TGSI_OPCODE_CONT; 451 if (ctx->bc->chip_class == CAYMAN) { 452 ctx->inst_info = 453 &cm_shader_tgsi_instruction[opcode]; 454 } else if (ctx->bc->chip_class >= EVERGREEN) { 455 ctx->inst_info = 456 &eg_shader_tgsi_instruction[opcode]; 457 } else { 458 ctx->inst_info = 459 &r600_shader_tgsi_instruction[opcode]; 460 } 461 tgsi_loop_brk_cont(ctx); 462 } 463 break; 464 } 465 466 return bytes_read; 467} 468 469static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx, 470 unsigned char * bytes, unsigned bytes_read) 471{ 472 struct r600_bytecode_tex tex; 473 474 tex.inst = bytes[bytes_read++]; 475 tex.resource_id = bytes[bytes_read++]; 476 tex.src_gpr = bytes[bytes_read++]; 477 tex.src_rel = bytes[bytes_read++]; 478 tex.dst_gpr = bytes[bytes_read++]; 479 tex.dst_rel = bytes[bytes_read++]; 480 tex.dst_sel_x = bytes[bytes_read++]; 481 tex.dst_sel_y = bytes[bytes_read++]; 482 tex.dst_sel_z = bytes[bytes_read++]; 483 tex.dst_sel_w = bytes[bytes_read++]; 484 tex.lod_bias = bytes[bytes_read++]; 485 tex.coord_type_x = bytes[bytes_read++]; 486 tex.coord_type_y = bytes[bytes_read++]; 487 tex.coord_type_z = bytes[bytes_read++]; 488 tex.coord_type_w = bytes[bytes_read++]; 489 tex.offset_x = bytes[bytes_read++]; 490 tex.offset_y = bytes[bytes_read++]; 491 tex.offset_z = bytes[bytes_read++]; 492 tex.sampler_id = bytes[bytes_read++]; 493 tex.src_sel_x = bytes[bytes_read++]; 494 tex.src_sel_y = bytes[bytes_read++]; 495 tex.src_sel_z = bytes[bytes_read++]; 496 tex.src_sel_w = bytes[bytes_read++]; 497 498 tex.inst_mod = 0; 499 500 r600_bytecode_add_tex(ctx->bc, &tex); 501 502 return bytes_read; 503} 504 505static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx, 506 unsigned char * bytes, unsigned bytes_read) 507{ 508 struct r600_bytecode_vtx vtx; 509 510 uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read); 511 uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read); 512 uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read); 513 514 memset(&vtx, 0, sizeof(vtx)); 515 516 /* WORD0 */ 517 vtx.inst = G_SQ_VTX_WORD0_VTX_INST(word0); 518 vtx.fetch_type = G_SQ_VTX_WORD0_FETCH_TYPE(word0); 519 vtx.buffer_id = G_SQ_VTX_WORD0_BUFFER_ID(word0); 520 vtx.src_gpr = G_SQ_VTX_WORD0_SRC_GPR(word0); 521 vtx.src_sel_x = G_SQ_VTX_WORD0_SRC_SEL_X(word0); 522 vtx.mega_fetch_count = G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(word0); 523 524 /* WORD1 */ 525 vtx.dst_gpr = G_SQ_VTX_WORD1_GPR_DST_GPR(word1); 526 vtx.dst_sel_x = G_SQ_VTX_WORD1_DST_SEL_X(word1); 527 vtx.dst_sel_y = G_SQ_VTX_WORD1_DST_SEL_Y(word1); 528 vtx.dst_sel_z = G_SQ_VTX_WORD1_DST_SEL_Z(word1); 529 vtx.dst_sel_w = G_SQ_VTX_WORD1_DST_SEL_W(word1); 530 vtx.use_const_fields = G_SQ_VTX_WORD1_USE_CONST_FIELDS(word1); 531 vtx.data_format = G_SQ_VTX_WORD1_DATA_FORMAT(word1); 532 vtx.num_format_all = G_SQ_VTX_WORD1_NUM_FORMAT_ALL(word1); 533 vtx.format_comp_all = G_SQ_VTX_WORD1_FORMAT_COMP_ALL(word1); 534 vtx.srf_mode_all = G_SQ_VTX_WORD1_SRF_MODE_ALL(word1); 535 536 /* WORD 2*/ 537 vtx.offset = G_SQ_VTX_WORD2_OFFSET(word2); 538 vtx.endian = G_SQ_VTX_WORD2_ENDIAN_SWAP(word2); 539 540 if (r600_bytecode_add_vtx(ctx->bc, &vtx)) { 541 fprintf(stderr, "Error adding vtx\n"); 542 } 543 544 /* Use the Texture Cache for compute shaders*/ 545 if (ctx->bc->chip_class >= EVERGREEN && 546 ctx->bc->type == TGSI_PROCESSOR_COMPUTE) { 547 ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX; 548 } 549 return bytes_read; 550} 551 552static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx, 553 unsigned char * bytes, unsigned bytes_read) 554{ 555 uint32_t word0 = 0, word1 = 0; 556 struct r600_bytecode_output output; 557 memset(&output, 0, sizeof(struct r600_bytecode_output)); 558 word0 = i32_from_byte_stream(bytes, &bytes_read); 559 word1 = i32_from_byte_stream(bytes, &bytes_read); 560 if (ctx->bc->chip_class >= EVERGREEN) 561 eg_bytecode_export_read(&output, word0,word1); 562 else 563 r600_bytecode_export_read(&output, word0,word1); 564 r600_bytecode_add_output(ctx->bc, &output); 565 return bytes_read; 566} 567 568static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, 569 unsigned char * bytes, unsigned num_bytes) 570{ 571 unsigned bytes_read = 0; 572 unsigned i, byte; 573 while (bytes_read < num_bytes) { 574 char inst_type = bytes[bytes_read++]; 575 switch (inst_type) { 576 case 0: 577 bytes_read = r600_alu_from_byte_stream(ctx, bytes, 578 bytes_read); 579 break; 580 case 1: 581 bytes_read = r600_tex_from_byte_stream(ctx, bytes, 582 bytes_read); 583 break; 584 case 2: 585 bytes_read = r600_fc_from_byte_stream(ctx, bytes, 586 bytes_read); 587 break; 588 case 3: 589 r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE); 590 for (i = 0; i < 2; i++) { 591 for (byte = 0 ; byte < 4; byte++) { 592 ctx->bc->cf_last->isa[i] |= 593 (bytes[bytes_read++] << (byte * 8)); 594 } 595 } 596 break; 597 598 case 4: 599 bytes_read = r600_vtx_from_byte_stream(ctx, bytes, 600 bytes_read); 601 break; 602 case 5: 603 bytes_read = r600_export_from_byte_stream(ctx, bytes, 604 bytes_read); 605 break; 606 default: 607 /* XXX: Error here */ 608 break; 609 } 610 } 611} 612 613/* End bytestream -> r600 shader functions*/ 614 615static int tgsi_is_supported(struct r600_shader_ctx *ctx) 616{ 617 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 618 int j; 619 620 if (i->Instruction.NumDstRegs > 1) { 621 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 622 return -EINVAL; 623 } 624 if (i->Instruction.Predicate) { 625 R600_ERR("predicate unsupported\n"); 626 return -EINVAL; 627 } 628#if 0 629 if (i->Instruction.Label) { 630 R600_ERR("label unsupported\n"); 631 return -EINVAL; 632 } 633#endif 634 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 635 if (i->Src[j].Register.Dimension) { 636 if (i->Src[j].Register.File != TGSI_FILE_CONSTANT) { 637 R600_ERR("unsupported src %d (dimension %d)\n", j, 638 i->Src[j].Register.Dimension); 639 return -EINVAL; 640 } 641 } 642 } 643 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 644 if (i->Dst[j].Register.Dimension) { 645 R600_ERR("unsupported dst (dimension)\n"); 646 return -EINVAL; 647 } 648 } 649 return 0; 650} 651 652static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 653{ 654 int i, r; 655 struct r600_bytecode_alu alu; 656 int gpr = 0, base_chan = 0; 657 int ij_index = 0; 658 659 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 660 ij_index = 0; 661 if (ctx->shader->input[input].centroid) 662 ij_index++; 663 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 664 ij_index = 0; 665 /* if we have perspective add one */ 666 if (ctx->input_perspective) { 667 ij_index++; 668 /* if we have perspective centroid */ 669 if (ctx->input_centroid) 670 ij_index++; 671 } 672 if (ctx->shader->input[input].centroid) 673 ij_index++; 674 } 675 676 /* work out gpr and base_chan from index */ 677 gpr = ij_index / 2; 678 base_chan = (2 * (ij_index % 2)) + 1; 679 680 for (i = 0; i < 8; i++) { 681 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 682 683 if (i < 4) 684 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW; 685 else 686 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY; 687 688 if ((i > 1) && (i < 6)) { 689 alu.dst.sel = ctx->shader->input[input].gpr; 690 alu.dst.write = 1; 691 } 692 693 alu.dst.chan = i % 4; 694 695 alu.src[0].sel = gpr; 696 alu.src[0].chan = (base_chan - (i % 2)); 697 698 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 699 700 alu.bank_swizzle_force = SQ_ALU_VEC_210; 701 if ((i % 4) == 3) 702 alu.last = 1; 703 r = r600_bytecode_add_alu(ctx->bc, &alu); 704 if (r) 705 return r; 706 } 707 return 0; 708} 709 710static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 711{ 712 int i, r; 713 struct r600_bytecode_alu alu; 714 715 for (i = 0; i < 4; i++) { 716 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 717 718 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0; 719 720 alu.dst.sel = ctx->shader->input[input].gpr; 721 alu.dst.write = 1; 722 723 alu.dst.chan = i; 724 725 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 726 alu.src[0].chan = i; 727 728 if (i == 3) 729 alu.last = 1; 730 r = r600_bytecode_add_alu(ctx->bc, &alu); 731 if (r) 732 return r; 733 } 734 return 0; 735} 736 737/* 738 * Special export handling in shaders 739 * 740 * shader export ARRAY_BASE for EXPORT_POS: 741 * 60 is position 742 * 61 is misc vector 743 * 62, 63 are clip distance vectors 744 * 745 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 746 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 747 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 748 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 749 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 750 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 751 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 752 * exclusive from render target index) 753 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 754 * 755 * 756 * shader export ARRAY_BASE for EXPORT_PIXEL: 757 * 0-7 CB targets 758 * 61 computed Z vector 759 * 760 * The use of the values exported in the computed Z vector are controlled 761 * by DB_SHADER_CONTROL: 762 * Z_EXPORT_ENABLE - Z as a float in RED 763 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 764 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 765 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 766 * DB_SOURCE_FORMAT - export control restrictions 767 * 768 */ 769 770 771/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 772static int r600_spi_sid(struct r600_shader_io * io) 773{ 774 int index, name = io->name; 775 776 /* These params are handled differently, they don't need 777 * semantic indices, so we'll use 0 for them. 778 */ 779 if (name == TGSI_SEMANTIC_POSITION || 780 name == TGSI_SEMANTIC_PSIZE || 781 name == TGSI_SEMANTIC_FACE) 782 index = 0; 783 else { 784 if (name == TGSI_SEMANTIC_GENERIC) { 785 /* For generic params simply use sid from tgsi */ 786 index = io->sid; 787 } else { 788 /* For non-generic params - pack name and sid into 8 bits */ 789 index = 0x80 | (name<<3) | (io->sid); 790 } 791 792 /* Make sure that all really used indices have nonzero value, so 793 * we can just compare it to 0 later instead of comparing the name 794 * with different values to detect special cases. */ 795 index++; 796 } 797 798 return index; 799}; 800 801/* turn input into interpolate on EG */ 802static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 803{ 804 int r = 0; 805 806 if (ctx->shader->input[index].spi_sid) { 807 ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 808 if (!ctx->use_llvm) { 809 if (ctx->shader->input[index].interpolate > 0) { 810 r = evergreen_interp_alu(ctx, index); 811 } else { 812 r = evergreen_interp_flat(ctx, index); 813 } 814 } 815 } 816 return r; 817} 818 819static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 820{ 821 struct r600_bytecode_alu alu; 822 int i, r; 823 int gpr_front = ctx->shader->input[front].gpr; 824 int gpr_back = ctx->shader->input[back].gpr; 825 826 for (i = 0; i < 4; i++) { 827 memset(&alu, 0, sizeof(alu)); 828 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 829 alu.is_op3 = 1; 830 alu.dst.write = 1; 831 alu.dst.sel = gpr_front; 832 alu.src[0].sel = ctx->face_gpr; 833 alu.src[1].sel = gpr_front; 834 alu.src[2].sel = gpr_back; 835 836 alu.dst.chan = i; 837 alu.src[1].chan = i; 838 alu.src[2].chan = i; 839 alu.last = (i==3); 840 841 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 842 return r; 843 } 844 845 return 0; 846} 847 848static int tgsi_declaration(struct r600_shader_ctx *ctx) 849{ 850 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 851 unsigned i; 852 int r; 853 854 switch (d->Declaration.File) { 855 case TGSI_FILE_INPUT: 856 i = ctx->shader->ninput++; 857 ctx->shader->input[i].name = d->Semantic.Name; 858 ctx->shader->input[i].sid = d->Semantic.Index; 859 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 860 ctx->shader->input[i].interpolate = d->Interp.Interpolate; 861 ctx->shader->input[i].centroid = d->Interp.Centroid; 862 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First; 863 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 864 switch (ctx->shader->input[i].name) { 865 case TGSI_SEMANTIC_FACE: 866 ctx->face_gpr = ctx->shader->input[i].gpr; 867 break; 868 case TGSI_SEMANTIC_COLOR: 869 ctx->colors_used++; 870 break; 871 case TGSI_SEMANTIC_POSITION: 872 ctx->fragcoord_input = i; 873 break; 874 } 875 if (ctx->bc->chip_class >= EVERGREEN) { 876 if ((r = evergreen_interp_input(ctx, i))) 877 return r; 878 } 879 } 880 break; 881 case TGSI_FILE_OUTPUT: 882 i = ctx->shader->noutput++; 883 ctx->shader->output[i].name = d->Semantic.Name; 884 ctx->shader->output[i].sid = d->Semantic.Index; 885 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 886 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; 887 ctx->shader->output[i].interpolate = d->Interp.Interpolate; 888 ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 889 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 890 switch (d->Semantic.Name) { 891 case TGSI_SEMANTIC_CLIPDIST: 892 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2); 893 break; 894 case TGSI_SEMANTIC_PSIZE: 895 ctx->shader->vs_out_misc_write = 1; 896 ctx->shader->vs_out_point_size = 1; 897 break; 898 case TGSI_SEMANTIC_CLIPVERTEX: 899 ctx->clip_vertex_write = TRUE; 900 ctx->cv_output = i; 901 break; 902 } 903 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 904 switch (d->Semantic.Name) { 905 case TGSI_SEMANTIC_COLOR: 906 ctx->shader->nr_ps_max_color_exports++; 907 break; 908 } 909 } 910 break; 911 case TGSI_FILE_CONSTANT: 912 case TGSI_FILE_TEMPORARY: 913 case TGSI_FILE_SAMPLER: 914 case TGSI_FILE_ADDRESS: 915 break; 916 917 case TGSI_FILE_SYSTEM_VALUE: 918 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 919 if (!ctx->native_integers) { 920 struct r600_bytecode_alu alu; 921 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 922 923 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 924 alu.src[0].sel = 0; 925 alu.src[0].chan = 3; 926 927 alu.dst.sel = 0; 928 alu.dst.chan = 3; 929 alu.dst.write = 1; 930 alu.last = 1; 931 932 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 933 return r; 934 } 935 break; 936 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 937 break; 938 default: 939 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 940 return -EINVAL; 941 } 942 return 0; 943} 944 945static int r600_get_temp(struct r600_shader_ctx *ctx) 946{ 947 return ctx->temp_reg + ctx->max_driver_temp_used++; 948} 949 950/* 951 * for evergreen we need to scan the shader to find the number of GPRs we need to 952 * reserve for interpolation. 953 * 954 * we need to know if we are going to emit 955 * any centroid inputs 956 * if perspective and linear are required 957*/ 958static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 959{ 960 int i; 961 int num_baryc; 962 963 ctx->input_linear = FALSE; 964 ctx->input_perspective = FALSE; 965 ctx->input_centroid = FALSE; 966 ctx->num_interp_gpr = 1; 967 968 /* any centroid inputs */ 969 for (i = 0; i < ctx->info.num_inputs; i++) { 970 /* skip position/face */ 971 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 972 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 973 continue; 974 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 975 ctx->input_linear = TRUE; 976 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 977 ctx->input_perspective = TRUE; 978 if (ctx->info.input_centroid[i]) 979 ctx->input_centroid = TRUE; 980 } 981 982 num_baryc = 0; 983 /* ignoring sample for now */ 984 if (ctx->input_perspective) 985 num_baryc++; 986 if (ctx->input_linear) 987 num_baryc++; 988 if (ctx->input_centroid) 989 num_baryc *= 2; 990 991 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 992 993 /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */ 994 return ctx->num_interp_gpr; 995} 996 997static void tgsi_src(struct r600_shader_ctx *ctx, 998 const struct tgsi_full_src_register *tgsi_src, 999 struct r600_shader_src *r600_src) 1000{ 1001 memset(r600_src, 0, sizeof(*r600_src)); 1002 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 1003 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 1004 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 1005 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 1006 r600_src->neg = tgsi_src->Register.Negate; 1007 r600_src->abs = tgsi_src->Register.Absolute; 1008 1009 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 1010 int index; 1011 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 1012 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 1013 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 1014 1015 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 1016 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 1017 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 1018 return; 1019 } 1020 index = tgsi_src->Register.Index; 1021 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 1022 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 1023 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 1024 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 1025 r600_src->swizzle[0] = 3; 1026 r600_src->swizzle[1] = 3; 1027 r600_src->swizzle[2] = 3; 1028 r600_src->swizzle[3] = 3; 1029 r600_src->sel = 0; 1030 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 1031 r600_src->swizzle[0] = 0; 1032 r600_src->swizzle[1] = 0; 1033 r600_src->swizzle[2] = 0; 1034 r600_src->swizzle[3] = 0; 1035 r600_src->sel = 0; 1036 } 1037 } else { 1038 if (tgsi_src->Register.Indirect) 1039 r600_src->rel = V_SQ_REL_RELATIVE; 1040 r600_src->sel = tgsi_src->Register.Index; 1041 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 1042 } 1043 if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) { 1044 if (tgsi_src->Register.Dimension) { 1045 r600_src->kc_bank = tgsi_src->Dimension.Index; 1046 } 1047 } 1048} 1049 1050static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int cb_idx, unsigned int offset, unsigned int dst_reg) 1051{ 1052 struct r600_bytecode_vtx vtx; 1053 unsigned int ar_reg; 1054 int r; 1055 1056 if (offset) { 1057 struct r600_bytecode_alu alu; 1058 1059 memset(&alu, 0, sizeof(alu)); 1060 1061 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 1062 alu.src[0].sel = ctx->bc->ar_reg; 1063 1064 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1065 alu.src[1].value = offset; 1066 1067 alu.dst.sel = dst_reg; 1068 alu.dst.write = 1; 1069 alu.last = 1; 1070 1071 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1072 return r; 1073 1074 ar_reg = dst_reg; 1075 } else { 1076 ar_reg = ctx->bc->ar_reg; 1077 } 1078 1079 memset(&vtx, 0, sizeof(vtx)); 1080 vtx.buffer_id = cb_idx; 1081 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 1082 vtx.src_gpr = ar_reg; 1083 vtx.mega_fetch_count = 16; 1084 vtx.dst_gpr = dst_reg; 1085 vtx.dst_sel_x = 0; /* SEL_X */ 1086 vtx.dst_sel_y = 1; /* SEL_Y */ 1087 vtx.dst_sel_z = 2; /* SEL_Z */ 1088 vtx.dst_sel_w = 3; /* SEL_W */ 1089 vtx.data_format = FMT_32_32_32_32_FLOAT; 1090 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 1091 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 1092 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 1093 vtx.endian = r600_endian_swap(32); 1094 1095 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1096 return r; 1097 1098 return 0; 1099} 1100 1101static int tgsi_split_constant(struct r600_shader_ctx *ctx) 1102{ 1103 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1104 struct r600_bytecode_alu alu; 1105 int i, j, k, nconst, r; 1106 1107 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 1108 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 1109 nconst++; 1110 } 1111 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 1112 } 1113 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 1114 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 1115 continue; 1116 } 1117 1118 if (ctx->src[i].rel) { 1119 int treg = r600_get_temp(ctx); 1120 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].sel - 512, treg))) 1121 return r; 1122 1123 ctx->src[i].kc_bank = 0; 1124 ctx->src[i].sel = treg; 1125 ctx->src[i].rel = 0; 1126 j--; 1127 } else if (j > 0) { 1128 int treg = r600_get_temp(ctx); 1129 for (k = 0; k < 4; k++) { 1130 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1131 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1132 alu.src[0].sel = ctx->src[i].sel; 1133 alu.src[0].chan = k; 1134 alu.src[0].rel = ctx->src[i].rel; 1135 alu.dst.sel = treg; 1136 alu.dst.chan = k; 1137 alu.dst.write = 1; 1138 if (k == 3) 1139 alu.last = 1; 1140 r = r600_bytecode_add_alu(ctx->bc, &alu); 1141 if (r) 1142 return r; 1143 } 1144 ctx->src[i].sel = treg; 1145 ctx->src[i].rel =0; 1146 j--; 1147 } 1148 } 1149 return 0; 1150} 1151 1152/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 1153static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 1154{ 1155 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1156 struct r600_bytecode_alu alu; 1157 int i, j, k, nliteral, r; 1158 1159 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 1160 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1161 nliteral++; 1162 } 1163 } 1164 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 1165 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1166 int treg = r600_get_temp(ctx); 1167 for (k = 0; k < 4; k++) { 1168 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1169 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1170 alu.src[0].sel = ctx->src[i].sel; 1171 alu.src[0].chan = k; 1172 alu.src[0].value = ctx->src[i].value[k]; 1173 alu.dst.sel = treg; 1174 alu.dst.chan = k; 1175 alu.dst.write = 1; 1176 if (k == 3) 1177 alu.last = 1; 1178 r = r600_bytecode_add_alu(ctx->bc, &alu); 1179 if (r) 1180 return r; 1181 } 1182 ctx->src[i].sel = treg; 1183 j--; 1184 } 1185 } 1186 return 0; 1187} 1188 1189static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 1190{ 1191 int i, r, count = ctx->shader->ninput; 1192 1193 for (i = 0; i < count; i++) { 1194 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 1195 unsigned back_facing_reg = ctx->shader->input[i].potential_back_facing_reg; 1196 if (ctx->bc->chip_class >= EVERGREEN) { 1197 if ((r = evergreen_interp_input(ctx, back_facing_reg))) 1198 return r; 1199 } 1200 1201 if (!ctx->use_llvm) { 1202 r = select_twoside_color(ctx, i, back_facing_reg); 1203 if (r) 1204 return r; 1205 } 1206 } 1207 } 1208 return 0; 1209} 1210 1211static int r600_shader_from_tgsi(struct r600_screen *rscreen, 1212 struct r600_pipe_shader *pipeshader, 1213 struct r600_shader_key key) 1214{ 1215 struct r600_shader *shader = &pipeshader->shader; 1216 struct tgsi_token *tokens = pipeshader->selector->tokens; 1217 struct pipe_stream_output_info so = pipeshader->selector->so; 1218 struct tgsi_full_immediate *immediate; 1219 struct tgsi_full_property *property; 1220 struct r600_shader_ctx ctx; 1221 struct r600_bytecode_output output[32]; 1222 unsigned output_done, noutput; 1223 unsigned opcode; 1224 int i, j, k, r = 0; 1225 int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0; 1226 /* Declarations used by llvm code */ 1227 bool use_llvm = false; 1228 unsigned char * inst_bytes = NULL; 1229 unsigned inst_byte_count = 0; 1230 1231#ifdef R600_USE_LLVM 1232 use_llvm = debug_get_bool_option("R600_LLVM", TRUE); 1233#endif 1234 ctx.bc = &shader->bc; 1235 ctx.shader = shader; 1236 ctx.native_integers = true; 1237 1238 r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family, 1239 rscreen->msaa_texture_support); 1240 ctx.tokens = tokens; 1241 tgsi_scan_shader(tokens, &ctx.info); 1242 tgsi_parse_init(&ctx.parse, tokens); 1243 ctx.type = ctx.parse.FullHeader.Processor.Processor; 1244 shader->processor_type = ctx.type; 1245 ctx.bc->type = shader->processor_type; 1246 1247 ctx.face_gpr = -1; 1248 ctx.fragcoord_input = -1; 1249 ctx.colors_used = 0; 1250 ctx.clip_vertex_write = 0; 1251 1252 shader->nr_ps_color_exports = 0; 1253 shader->nr_ps_max_color_exports = 0; 1254 1255 shader->two_side = key.color_two_side; 1256 1257 /* register allocations */ 1258 /* Values [0,127] correspond to GPR[0..127]. 1259 * Values [128,159] correspond to constant buffer bank 0 1260 * Values [160,191] correspond to constant buffer bank 1 1261 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 1262 * Values [256,287] correspond to constant buffer bank 2 (EG) 1263 * Values [288,319] correspond to constant buffer bank 3 (EG) 1264 * Other special values are shown in the list below. 1265 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 1266 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 1267 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 1268 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 1269 * 248 SQ_ALU_SRC_0: special constant 0.0. 1270 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 1271 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 1272 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 1273 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 1274 * 253 SQ_ALU_SRC_LITERAL: literal constant. 1275 * 254 SQ_ALU_SRC_PV: previous vector result. 1276 * 255 SQ_ALU_SRC_PS: previous scalar result. 1277 */ 1278 for (i = 0; i < TGSI_FILE_COUNT; i++) { 1279 ctx.file_offset[i] = 0; 1280 } 1281 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 1282 ctx.file_offset[TGSI_FILE_INPUT] = 1; 1283 if (ctx.bc->chip_class >= EVERGREEN) { 1284 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 1285 } else { 1286 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 1287 } 1288 } 1289 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { 1290 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 1291 } 1292 1293#ifdef R600_USE_LLVM 1294 if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) { 1295 fprintf(stderr, "Warning: R600 LLVM backend does not support " 1296 "indirect adressing. Falling back to TGSI " 1297 "backend.\n"); 1298 use_llvm = 0; 1299 } 1300#endif 1301 ctx.use_llvm = use_llvm; 1302 1303 if (use_llvm) { 1304 ctx.file_offset[TGSI_FILE_OUTPUT] = 1305 ctx.file_offset[TGSI_FILE_INPUT]; 1306 } else { 1307 ctx.file_offset[TGSI_FILE_OUTPUT] = 1308 ctx.file_offset[TGSI_FILE_INPUT] + 1309 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 1310 } 1311 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 1312 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 1313 1314 /* Outside the GPR range. This will be translated to one of the 1315 * kcache banks later. */ 1316 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 1317 1318 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 1319 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 1320 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 1321 ctx.temp_reg = ctx.bc->ar_reg + 1; 1322 1323 ctx.nliterals = 0; 1324 ctx.literals = NULL; 1325 shader->fs_write_all = FALSE; 1326 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 1327 tgsi_parse_token(&ctx.parse); 1328 switch (ctx.parse.FullToken.Token.Type) { 1329 case TGSI_TOKEN_TYPE_IMMEDIATE: 1330 immediate = &ctx.parse.FullToken.FullImmediate; 1331 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 1332 if(ctx.literals == NULL) { 1333 r = -ENOMEM; 1334 goto out_err; 1335 } 1336 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 1337 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 1338 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 1339 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 1340 ctx.nliterals++; 1341 break; 1342 case TGSI_TOKEN_TYPE_DECLARATION: 1343 r = tgsi_declaration(&ctx); 1344 if (r) 1345 goto out_err; 1346 break; 1347 case TGSI_TOKEN_TYPE_INSTRUCTION: 1348 break; 1349 case TGSI_TOKEN_TYPE_PROPERTY: 1350 property = &ctx.parse.FullToken.FullProperty; 1351 switch (property->Property.PropertyName) { 1352 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: 1353 if (property->u[0].Data == 1) 1354 shader->fs_write_all = TRUE; 1355 break; 1356 case TGSI_PROPERTY_VS_PROHIBIT_UCPS: 1357 /* we don't need this one */ 1358 break; 1359 } 1360 break; 1361 default: 1362 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 1363 r = -EINVAL; 1364 goto out_err; 1365 } 1366 } 1367 1368 /* Process two side if needed */ 1369 if (shader->two_side && ctx.colors_used) { 1370 int i, count = ctx.shader->ninput; 1371 unsigned next_lds_loc = ctx.shader->nlds; 1372 1373 /* additional inputs will be allocated right after the existing inputs, 1374 * we won't need them after the color selection, so we don't need to 1375 * reserve these gprs for the rest of the shader code and to adjust 1376 * output offsets etc. */ 1377 int gpr = ctx.file_offset[TGSI_FILE_INPUT] + 1378 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 1379 1380 if (ctx.face_gpr == -1) { 1381 i = ctx.shader->ninput++; 1382 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; 1383 ctx.shader->input[i].spi_sid = 0; 1384 ctx.shader->input[i].gpr = gpr++; 1385 ctx.face_gpr = ctx.shader->input[i].gpr; 1386 } 1387 1388 for (i = 0; i < count; i++) { 1389 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) { 1390 int ni = ctx.shader->ninput++; 1391 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io)); 1392 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 1393 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]); 1394 ctx.shader->input[ni].gpr = gpr++; 1395 // TGSI to LLVM needs to know the lds position of inputs. 1396 // Non LLVM path computes it later (in process_twoside_color) 1397 ctx.shader->input[ni].lds_pos = next_lds_loc++; 1398 ctx.shader->input[i].potential_back_facing_reg = ni; 1399 } 1400 } 1401 } 1402 1403/* LLVM backend setup */ 1404#ifdef R600_USE_LLVM 1405 if (use_llvm) { 1406 struct radeon_llvm_context radeon_llvm_ctx; 1407 LLVMModuleRef mod; 1408 unsigned dump = 0; 1409 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); 1410 radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT]; 1411 radeon_llvm_ctx.type = ctx.type; 1412 radeon_llvm_ctx.two_side = shader->two_side; 1413 radeon_llvm_ctx.face_input = ctx.face_gpr; 1414 radeon_llvm_ctx.r600_inputs = ctx.shader->input; 1415 radeon_llvm_ctx.r600_outputs = ctx.shader->output; 1416 radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1); 1417 radeon_llvm_ctx.chip_class = ctx.bc->chip_class; 1418 radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN); 1419 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); 1420 if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) { 1421 dump = 1; 1422 } 1423 if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count, 1424 rscreen->family, dump)) { 1425 FREE(inst_bytes); 1426 radeon_llvm_dispose(&radeon_llvm_ctx); 1427 use_llvm = 0; 1428 fprintf(stderr, "R600 LLVM backend failed to compile " 1429 "shader. Falling back to TGSI\n"); 1430 } else { 1431 ctx.file_offset[TGSI_FILE_OUTPUT] = 1432 ctx.file_offset[TGSI_FILE_INPUT]; 1433 } 1434 radeon_llvm_dispose(&radeon_llvm_ctx); 1435 } 1436#endif 1437/* End of LLVM backend setup */ 1438 1439 if (shader->fs_write_all && rscreen->chip_class >= EVERGREEN) 1440 shader->nr_ps_max_color_exports = 8; 1441 1442 if (ctx.fragcoord_input >= 0 && !use_llvm) { 1443 if (ctx.bc->chip_class == CAYMAN) { 1444 for (j = 0 ; j < 4; j++) { 1445 struct r600_bytecode_alu alu; 1446 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1447 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1448 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 1449 alu.src[0].chan = 3; 1450 1451 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 1452 alu.dst.chan = j; 1453 alu.dst.write = (j == 3); 1454 alu.last = 1; 1455 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 1456 return r; 1457 } 1458 } else { 1459 struct r600_bytecode_alu alu; 1460 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1461 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1462 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 1463 alu.src[0].chan = 3; 1464 1465 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 1466 alu.dst.chan = 3; 1467 alu.dst.write = 1; 1468 alu.last = 1; 1469 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 1470 return r; 1471 } 1472 } 1473 1474 if (shader->two_side && ctx.colors_used) { 1475 if ((r = process_twoside_color_inputs(&ctx))) 1476 return r; 1477 } 1478 1479 tgsi_parse_init(&ctx.parse, tokens); 1480 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 1481 tgsi_parse_token(&ctx.parse); 1482 switch (ctx.parse.FullToken.Token.Type) { 1483 case TGSI_TOKEN_TYPE_INSTRUCTION: 1484 if (use_llvm) { 1485 continue; 1486 } 1487 r = tgsi_is_supported(&ctx); 1488 if (r) 1489 goto out_err; 1490 ctx.max_driver_temp_used = 0; 1491 /* reserve first tmp for everyone */ 1492 r600_get_temp(&ctx); 1493 1494 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 1495 if ((r = tgsi_split_constant(&ctx))) 1496 goto out_err; 1497 if ((r = tgsi_split_literal_constant(&ctx))) 1498 goto out_err; 1499 if (ctx.bc->chip_class == CAYMAN) 1500 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 1501 else if (ctx.bc->chip_class >= EVERGREEN) 1502 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 1503 else 1504 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 1505 r = ctx.inst_info->process(&ctx); 1506 if (r) 1507 goto out_err; 1508 break; 1509 default: 1510 break; 1511 } 1512 } 1513 1514 /* Reset the temporary register counter. */ 1515 ctx.max_driver_temp_used = 0; 1516 1517 /* Get instructions if we are using the LLVM backend. */ 1518 if (use_llvm) { 1519 r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count); 1520 FREE(inst_bytes); 1521 } 1522 1523 noutput = shader->noutput; 1524 1525 if (ctx.clip_vertex_write) { 1526 unsigned clipdist_temp[2]; 1527 1528 clipdist_temp[0] = r600_get_temp(&ctx); 1529 clipdist_temp[1] = r600_get_temp(&ctx); 1530 1531 /* need to convert a clipvertex write into clipdistance writes and not export 1532 the clip vertex anymore */ 1533 1534 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 1535 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 1536 shader->output[noutput].gpr = clipdist_temp[0]; 1537 noutput++; 1538 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 1539 shader->output[noutput].gpr = clipdist_temp[1]; 1540 noutput++; 1541 1542 /* reset spi_sid for clipvertex output to avoid confusing spi */ 1543 shader->output[ctx.cv_output].spi_sid = 0; 1544 1545 shader->clip_dist_write = 0xFF; 1546 1547 for (i = 0; i < 8; i++) { 1548 int oreg = i >> 2; 1549 int ochan = i & 3; 1550 1551 for (j = 0; j < 4; j++) { 1552 struct r600_bytecode_alu alu; 1553 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1554 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4); 1555 alu.src[0].sel = shader->output[ctx.cv_output].gpr; 1556 alu.src[0].chan = j; 1557 1558 alu.src[1].sel = 512 + i; 1559 alu.src[1].kc_bank = R600_UCP_CONST_BUFFER; 1560 alu.src[1].chan = j; 1561 1562 alu.dst.sel = clipdist_temp[oreg]; 1563 alu.dst.chan = j; 1564 alu.dst.write = (j == ochan); 1565 if (j == 3) 1566 alu.last = 1; 1567 r = r600_bytecode_add_alu(ctx.bc, &alu); 1568 if (r) 1569 return r; 1570 } 1571 } 1572 } 1573 1574 /* Add stream outputs. */ 1575 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) { 1576 unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; 1577 1578 /* Sanity checking. */ 1579 if (so.num_outputs > PIPE_MAX_SHADER_OUTPUTS) { 1580 R600_ERR("Too many stream outputs: %d\n", so.num_outputs); 1581 r = -EINVAL; 1582 goto out_err; 1583 } 1584 for (i = 0; i < so.num_outputs; i++) { 1585 if (so.output[i].output_buffer >= 4) { 1586 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", 1587 so.output[i].output_buffer); 1588 r = -EINVAL; 1589 goto out_err; 1590 } 1591 } 1592 1593 /* Initialize locations where the outputs are stored. */ 1594 for (i = 0; i < so.num_outputs; i++) { 1595 so_gpr[i] = shader->output[so.output[i].register_index].gpr; 1596 1597 /* Lower outputs with dst_offset < start_component. 1598 * 1599 * We can only output 4D vectors with a write mask, e.g. we can 1600 * only output the W component at offset 3, etc. If we want 1601 * to store Y, Z, or W at buffer offset 0, we need to use MOV 1602 * to move it to X and output X. */ 1603 if (so.output[i].dst_offset < so.output[i].start_component) { 1604 unsigned tmp = r600_get_temp(&ctx); 1605 1606 for (j = 0; j < so.output[i].num_components; j++) { 1607 struct r600_bytecode_alu alu; 1608 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1609 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1610 alu.src[0].sel = so_gpr[i]; 1611 alu.src[0].chan = so.output[i].start_component + j; 1612 1613 alu.dst.sel = tmp; 1614 alu.dst.chan = j; 1615 alu.dst.write = 1; 1616 if (j == so.output[i].num_components - 1) 1617 alu.last = 1; 1618 r = r600_bytecode_add_alu(ctx.bc, &alu); 1619 if (r) 1620 return r; 1621 } 1622 so.output[i].start_component = 0; 1623 so_gpr[i] = tmp; 1624 } 1625 } 1626 1627 /* Write outputs to buffers. */ 1628 for (i = 0; i < so.num_outputs; i++) { 1629 struct r600_bytecode_output output; 1630 1631 memset(&output, 0, sizeof(struct r600_bytecode_output)); 1632 output.gpr = so_gpr[i]; 1633 output.elem_size = so.output[i].num_components; 1634 output.array_base = so.output[i].dst_offset - so.output[i].start_component; 1635 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 1636 output.burst_count = 1; 1637 output.barrier = 1; 1638 /* array_size is an upper limit for the burst_count 1639 * with MEM_STREAM instructions */ 1640 output.array_size = 0xFFF; 1641 output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component; 1642 if (ctx.bc->chip_class >= EVERGREEN) { 1643 switch (so.output[i].output_buffer) { 1644 case 0: 1645 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0; 1646 break; 1647 case 1: 1648 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1; 1649 break; 1650 case 2: 1651 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2; 1652 break; 1653 case 3: 1654 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3; 1655 break; 1656 } 1657 } else { 1658 switch (so.output[i].output_buffer) { 1659 case 0: 1660 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0; 1661 break; 1662 case 1: 1663 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1; 1664 break; 1665 case 2: 1666 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2; 1667 break; 1668 case 3: 1669 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3; 1670 break; 1671 } 1672 } 1673 r = r600_bytecode_add_output(ctx.bc, &output); 1674 if (r) 1675 goto out_err; 1676 } 1677 } 1678 1679 /* export output */ 1680 for (i = 0, j = 0; i < noutput; i++, j++) { 1681 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1682 output[j].gpr = shader->output[i].gpr; 1683 output[j].elem_size = 3; 1684 output[j].swizzle_x = 0; 1685 output[j].swizzle_y = 1; 1686 output[j].swizzle_z = 2; 1687 output[j].swizzle_w = 3; 1688 output[j].burst_count = 1; 1689 output[j].barrier = 1; 1690 output[j].type = -1; 1691 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1692 switch (ctx.type) { 1693 case TGSI_PROCESSOR_VERTEX: 1694 switch (shader->output[i].name) { 1695 case TGSI_SEMANTIC_POSITION: 1696 output[j].array_base = next_pos_base++; 1697 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1698 break; 1699 1700 case TGSI_SEMANTIC_PSIZE: 1701 output[j].array_base = next_pos_base++; 1702 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1703 break; 1704 case TGSI_SEMANTIC_CLIPVERTEX: 1705 j--; 1706 break; 1707 case TGSI_SEMANTIC_CLIPDIST: 1708 output[j].array_base = next_pos_base++; 1709 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1710 /* spi_sid is 0 for clipdistance outputs that were generated 1711 * for clipvertex - we don't need to pass them to PS */ 1712 if (shader->output[i].spi_sid) { 1713 j++; 1714 /* duplicate it as PARAM to pass to the pixel shader */ 1715 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 1716 output[j].array_base = next_param_base++; 1717 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1718 } 1719 break; 1720 case TGSI_SEMANTIC_FOG: 1721 output[j].swizzle_y = 4; /* 0 */ 1722 output[j].swizzle_z = 4; /* 0 */ 1723 output[j].swizzle_w = 5; /* 1 */ 1724 break; 1725 } 1726 break; 1727 case TGSI_PROCESSOR_FRAGMENT: 1728 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 1729 /* never export more colors than the number of CBs */ 1730 if (next_pixel_base && next_pixel_base >= key.nr_cbufs) { 1731 /* skip export */ 1732 j--; 1733 continue; 1734 } 1735 output[j].swizzle_w = key.alpha_to_one ? 5 : 3; 1736 output[j].array_base = next_pixel_base++; 1737 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1738 shader->nr_ps_color_exports++; 1739 if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN)) { 1740 for (k = 1; k < key.nr_cbufs; k++) { 1741 j++; 1742 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1743 output[j].gpr = shader->output[i].gpr; 1744 output[j].elem_size = 3; 1745 output[j].swizzle_x = 0; 1746 output[j].swizzle_y = 1; 1747 output[j].swizzle_z = 2; 1748 output[j].swizzle_w = key.alpha_to_one ? 5 : 3; 1749 output[j].burst_count = 1; 1750 output[j].barrier = 1; 1751 output[j].array_base = next_pixel_base++; 1752 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1753 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1754 shader->nr_ps_color_exports++; 1755 } 1756 } 1757 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 1758 output[j].array_base = 61; 1759 output[j].swizzle_x = 2; 1760 output[j].swizzle_y = 7; 1761 output[j].swizzle_z = output[j].swizzle_w = 7; 1762 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1763 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 1764 output[j].array_base = 61; 1765 output[j].swizzle_x = 7; 1766 output[j].swizzle_y = 1; 1767 output[j].swizzle_z = output[j].swizzle_w = 7; 1768 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1769 } else { 1770 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 1771 r = -EINVAL; 1772 goto out_err; 1773 } 1774 break; 1775 default: 1776 R600_ERR("unsupported processor type %d\n", ctx.type); 1777 r = -EINVAL; 1778 goto out_err; 1779 } 1780 1781 if (output[j].type==-1) { 1782 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1783 output[j].array_base = next_param_base++; 1784 } 1785 } 1786 1787 /* add fake position export */ 1788 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) { 1789 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1790 output[j].gpr = 0; 1791 output[j].elem_size = 3; 1792 output[j].swizzle_x = 7; 1793 output[j].swizzle_y = 7; 1794 output[j].swizzle_z = 7; 1795 output[j].swizzle_w = 7; 1796 output[j].burst_count = 1; 1797 output[j].barrier = 1; 1798 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1799 output[j].array_base = next_pos_base; 1800 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1801 j++; 1802 } 1803 1804 /* add fake param output for vertex shader if no param is exported */ 1805 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { 1806 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1807 output[j].gpr = 0; 1808 output[j].elem_size = 3; 1809 output[j].swizzle_x = 7; 1810 output[j].swizzle_y = 7; 1811 output[j].swizzle_z = 7; 1812 output[j].swizzle_w = 7; 1813 output[j].burst_count = 1; 1814 output[j].barrier = 1; 1815 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1816 output[j].array_base = 0; 1817 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1818 j++; 1819 } 1820 1821 /* add fake pixel export */ 1822 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) { 1823 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1824 output[j].gpr = 0; 1825 output[j].elem_size = 3; 1826 output[j].swizzle_x = 7; 1827 output[j].swizzle_y = 7; 1828 output[j].swizzle_z = 7; 1829 output[j].swizzle_w = 7; 1830 output[j].burst_count = 1; 1831 output[j].barrier = 1; 1832 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1833 output[j].array_base = 0; 1834 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1835 j++; 1836 } 1837 1838 noutput = j; 1839 1840 /* set export done on last export of each type */ 1841 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 1842 if (ctx.bc->chip_class < CAYMAN) { 1843 if (i == (noutput - 1)) { 1844 output[i].end_of_program = 1; 1845 } 1846 } 1847 if (!(output_done & (1 << output[i].type))) { 1848 output_done |= (1 << output[i].type); 1849 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 1850 } 1851 } 1852 /* add output to bytecode */ 1853 if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT) { 1854 for (i = 0; i < noutput; i++) { 1855 r = r600_bytecode_add_output(ctx.bc, &output[i]); 1856 if (r) 1857 goto out_err; 1858 } 1859 } 1860 /* add program end */ 1861 if (ctx.bc->chip_class == CAYMAN) 1862 cm_bytecode_add_cf_end(ctx.bc); 1863 1864 /* check GPR limit - we have 124 = 128 - 4 1865 * (4 are reserved as alu clause temporary registers) */ 1866 if (ctx.bc->ngpr > 124) { 1867 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); 1868 r = -ENOMEM; 1869 goto out_err; 1870 } 1871 1872 free(ctx.literals); 1873 tgsi_parse_free(&ctx.parse); 1874 return 0; 1875out_err: 1876 free(ctx.literals); 1877 tgsi_parse_free(&ctx.parse); 1878 return r; 1879} 1880 1881static int tgsi_unsupported(struct r600_shader_ctx *ctx) 1882{ 1883 R600_ERR("%s tgsi opcode unsupported\n", 1884 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode)); 1885 return -EINVAL; 1886} 1887 1888static int tgsi_end(struct r600_shader_ctx *ctx) 1889{ 1890 return 0; 1891} 1892 1893static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 1894 const struct r600_shader_src *shader_src, 1895 unsigned chan) 1896{ 1897 bc_src->sel = shader_src->sel; 1898 bc_src->chan = shader_src->swizzle[chan]; 1899 bc_src->neg = shader_src->neg; 1900 bc_src->abs = shader_src->abs; 1901 bc_src->rel = shader_src->rel; 1902 bc_src->value = shader_src->value[bc_src->chan]; 1903 bc_src->kc_bank = shader_src->kc_bank; 1904} 1905 1906static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 1907{ 1908 bc_src->abs = 1; 1909 bc_src->neg = 0; 1910} 1911 1912static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 1913{ 1914 bc_src->neg = !bc_src->neg; 1915} 1916 1917static void tgsi_dst(struct r600_shader_ctx *ctx, 1918 const struct tgsi_full_dst_register *tgsi_dst, 1919 unsigned swizzle, 1920 struct r600_bytecode_alu_dst *r600_dst) 1921{ 1922 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1923 1924 r600_dst->sel = tgsi_dst->Register.Index; 1925 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 1926 r600_dst->chan = swizzle; 1927 r600_dst->write = 1; 1928 if (tgsi_dst->Register.Indirect) 1929 r600_dst->rel = V_SQ_REL_RELATIVE; 1930 if (inst->Instruction.Saturate) { 1931 r600_dst->clamp = 1; 1932 } 1933} 1934 1935static int tgsi_last_instruction(unsigned writemask) 1936{ 1937 int i, lasti = 0; 1938 1939 for (i = 0; i < 4; i++) { 1940 if (writemask & (1 << i)) { 1941 lasti = i; 1942 } 1943 } 1944 return lasti; 1945} 1946 1947static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 1948{ 1949 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1950 struct r600_bytecode_alu alu; 1951 int i, j, r; 1952 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1953 1954 for (i = 0; i < lasti + 1; i++) { 1955 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1956 continue; 1957 1958 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1959 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1960 1961 alu.inst = ctx->inst_info->r600_opcode; 1962 if (!swap) { 1963 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1964 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1965 } 1966 } else { 1967 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 1968 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1969 } 1970 /* handle some special cases */ 1971 switch (ctx->inst_info->tgsi_opcode) { 1972 case TGSI_OPCODE_SUB: 1973 r600_bytecode_src_toggle_neg(&alu.src[1]); 1974 break; 1975 case TGSI_OPCODE_ABS: 1976 r600_bytecode_src_set_abs(&alu.src[0]); 1977 break; 1978 default: 1979 break; 1980 } 1981 if (i == lasti || trans_only) { 1982 alu.last = 1; 1983 } 1984 r = r600_bytecode_add_alu(ctx->bc, &alu); 1985 if (r) 1986 return r; 1987 } 1988 return 0; 1989} 1990 1991static int tgsi_op2(struct r600_shader_ctx *ctx) 1992{ 1993 return tgsi_op2_s(ctx, 0, 0); 1994} 1995 1996static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1997{ 1998 return tgsi_op2_s(ctx, 1, 0); 1999} 2000 2001static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 2002{ 2003 return tgsi_op2_s(ctx, 0, 1); 2004} 2005 2006static int tgsi_ineg(struct r600_shader_ctx *ctx) 2007{ 2008 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2009 struct r600_bytecode_alu alu; 2010 int i, r; 2011 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2012 2013 for (i = 0; i < lasti + 1; i++) { 2014 2015 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2016 continue; 2017 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2018 alu.inst = ctx->inst_info->r600_opcode; 2019 2020 alu.src[0].sel = V_SQ_ALU_SRC_0; 2021 2022 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2023 2024 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2025 2026 if (i == lasti) { 2027 alu.last = 1; 2028 } 2029 r = r600_bytecode_add_alu(ctx->bc, &alu); 2030 if (r) 2031 return r; 2032 } 2033 return 0; 2034 2035} 2036 2037static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 2038{ 2039 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2040 int i, j, r; 2041 struct r600_bytecode_alu alu; 2042 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 2043 2044 for (i = 0 ; i < last_slot; i++) { 2045 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2046 alu.inst = ctx->inst_info->r600_opcode; 2047 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2048 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 2049 2050 /* RSQ should take the absolute value of src */ 2051 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_RSQ) { 2052 r600_bytecode_src_set_abs(&alu.src[j]); 2053 } 2054 } 2055 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2056 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 2057 2058 if (i == last_slot - 1) 2059 alu.last = 1; 2060 r = r600_bytecode_add_alu(ctx->bc, &alu); 2061 if (r) 2062 return r; 2063 } 2064 return 0; 2065} 2066 2067static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) 2068{ 2069 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2070 int i, j, k, r; 2071 struct r600_bytecode_alu alu; 2072 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 2073 for (k = 0; k < last_slot; k++) { 2074 if (!(inst->Dst[0].Register.WriteMask & (1 << k))) 2075 continue; 2076 2077 for (i = 0 ; i < 4; i++) { 2078 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2079 alu.inst = ctx->inst_info->r600_opcode; 2080 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2081 r600_bytecode_src(&alu.src[j], &ctx->src[j], k); 2082 } 2083 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2084 alu.dst.write = (i == k); 2085 if (i == 3) 2086 alu.last = 1; 2087 r = r600_bytecode_add_alu(ctx->bc, &alu); 2088 if (r) 2089 return r; 2090 } 2091 } 2092 return 0; 2093} 2094 2095/* 2096 * r600 - trunc to -PI..PI range 2097 * r700 - normalize by dividing by 2PI 2098 * see fdo bug 27901 2099 */ 2100static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 2101{ 2102 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 2103 static float double_pi = 3.1415926535 * 2; 2104 static float neg_pi = -3.1415926535; 2105 2106 int r; 2107 struct r600_bytecode_alu alu; 2108 2109 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2110 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2111 alu.is_op3 = 1; 2112 2113 alu.dst.chan = 0; 2114 alu.dst.sel = ctx->temp_reg; 2115 alu.dst.write = 1; 2116 2117 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2118 2119 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2120 alu.src[1].chan = 0; 2121 alu.src[1].value = *(uint32_t *)&half_inv_pi; 2122 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 2123 alu.src[2].chan = 0; 2124 alu.last = 1; 2125 r = r600_bytecode_add_alu(ctx->bc, &alu); 2126 if (r) 2127 return r; 2128 2129 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2130 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2131 2132 alu.dst.chan = 0; 2133 alu.dst.sel = ctx->temp_reg; 2134 alu.dst.write = 1; 2135 2136 alu.src[0].sel = ctx->temp_reg; 2137 alu.src[0].chan = 0; 2138 alu.last = 1; 2139 r = r600_bytecode_add_alu(ctx->bc, &alu); 2140 if (r) 2141 return r; 2142 2143 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2144 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2145 alu.is_op3 = 1; 2146 2147 alu.dst.chan = 0; 2148 alu.dst.sel = ctx->temp_reg; 2149 alu.dst.write = 1; 2150 2151 alu.src[0].sel = ctx->temp_reg; 2152 alu.src[0].chan = 0; 2153 2154 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2155 alu.src[1].chan = 0; 2156 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 2157 alu.src[2].chan = 0; 2158 2159 if (ctx->bc->chip_class == R600) { 2160 alu.src[1].value = *(uint32_t *)&double_pi; 2161 alu.src[2].value = *(uint32_t *)&neg_pi; 2162 } else { 2163 alu.src[1].sel = V_SQ_ALU_SRC_1; 2164 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 2165 alu.src[2].neg = 1; 2166 } 2167 2168 alu.last = 1; 2169 r = r600_bytecode_add_alu(ctx->bc, &alu); 2170 if (r) 2171 return r; 2172 return 0; 2173} 2174 2175static int cayman_trig(struct r600_shader_ctx *ctx) 2176{ 2177 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2178 struct r600_bytecode_alu alu; 2179 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 2180 int i, r; 2181 2182 r = tgsi_setup_trig(ctx); 2183 if (r) 2184 return r; 2185 2186 2187 for (i = 0; i < last_slot; i++) { 2188 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2189 alu.inst = ctx->inst_info->r600_opcode; 2190 alu.dst.chan = i; 2191 2192 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2193 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 2194 2195 alu.src[0].sel = ctx->temp_reg; 2196 alu.src[0].chan = 0; 2197 if (i == last_slot - 1) 2198 alu.last = 1; 2199 r = r600_bytecode_add_alu(ctx->bc, &alu); 2200 if (r) 2201 return r; 2202 } 2203 return 0; 2204} 2205 2206static int tgsi_trig(struct r600_shader_ctx *ctx) 2207{ 2208 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2209 struct r600_bytecode_alu alu; 2210 int i, r; 2211 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2212 2213 r = tgsi_setup_trig(ctx); 2214 if (r) 2215 return r; 2216 2217 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2218 alu.inst = ctx->inst_info->r600_opcode; 2219 alu.dst.chan = 0; 2220 alu.dst.sel = ctx->temp_reg; 2221 alu.dst.write = 1; 2222 2223 alu.src[0].sel = ctx->temp_reg; 2224 alu.src[0].chan = 0; 2225 alu.last = 1; 2226 r = r600_bytecode_add_alu(ctx->bc, &alu); 2227 if (r) 2228 return r; 2229 2230 /* replicate result */ 2231 for (i = 0; i < lasti + 1; i++) { 2232 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2233 continue; 2234 2235 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2236 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2237 2238 alu.src[0].sel = ctx->temp_reg; 2239 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2240 if (i == lasti) 2241 alu.last = 1; 2242 r = r600_bytecode_add_alu(ctx->bc, &alu); 2243 if (r) 2244 return r; 2245 } 2246 return 0; 2247} 2248 2249static int tgsi_scs(struct r600_shader_ctx *ctx) 2250{ 2251 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2252 struct r600_bytecode_alu alu; 2253 int i, r; 2254 2255 /* We'll only need the trig stuff if we are going to write to the 2256 * X or Y components of the destination vector. 2257 */ 2258 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 2259 r = tgsi_setup_trig(ctx); 2260 if (r) 2261 return r; 2262 } 2263 2264 /* dst.x = COS */ 2265 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2266 if (ctx->bc->chip_class == CAYMAN) { 2267 for (i = 0 ; i < 3; i++) { 2268 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2269 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 2270 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2271 2272 if (i == 0) 2273 alu.dst.write = 1; 2274 else 2275 alu.dst.write = 0; 2276 alu.src[0].sel = ctx->temp_reg; 2277 alu.src[0].chan = 0; 2278 if (i == 2) 2279 alu.last = 1; 2280 r = r600_bytecode_add_alu(ctx->bc, &alu); 2281 if (r) 2282 return r; 2283 } 2284 } else { 2285 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2286 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 2287 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 2288 2289 alu.src[0].sel = ctx->temp_reg; 2290 alu.src[0].chan = 0; 2291 alu.last = 1; 2292 r = r600_bytecode_add_alu(ctx->bc, &alu); 2293 if (r) 2294 return r; 2295 } 2296 } 2297 2298 /* dst.y = SIN */ 2299 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2300 if (ctx->bc->chip_class == CAYMAN) { 2301 for (i = 0 ; i < 3; i++) { 2302 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2303 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 2304 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2305 if (i == 1) 2306 alu.dst.write = 1; 2307 else 2308 alu.dst.write = 0; 2309 alu.src[0].sel = ctx->temp_reg; 2310 alu.src[0].chan = 0; 2311 if (i == 2) 2312 alu.last = 1; 2313 r = r600_bytecode_add_alu(ctx->bc, &alu); 2314 if (r) 2315 return r; 2316 } 2317 } else { 2318 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2319 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 2320 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 2321 2322 alu.src[0].sel = ctx->temp_reg; 2323 alu.src[0].chan = 0; 2324 alu.last = 1; 2325 r = r600_bytecode_add_alu(ctx->bc, &alu); 2326 if (r) 2327 return r; 2328 } 2329 } 2330 2331 /* dst.z = 0.0; */ 2332 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2333 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2334 2335 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2336 2337 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 2338 2339 alu.src[0].sel = V_SQ_ALU_SRC_0; 2340 alu.src[0].chan = 0; 2341 2342 alu.last = 1; 2343 2344 r = r600_bytecode_add_alu(ctx->bc, &alu); 2345 if (r) 2346 return r; 2347 } 2348 2349 /* dst.w = 1.0; */ 2350 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2351 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2352 2353 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2354 2355 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 2356 2357 alu.src[0].sel = V_SQ_ALU_SRC_1; 2358 alu.src[0].chan = 0; 2359 2360 alu.last = 1; 2361 2362 r = r600_bytecode_add_alu(ctx->bc, &alu); 2363 if (r) 2364 return r; 2365 } 2366 2367 return 0; 2368} 2369 2370static int tgsi_kill(struct r600_shader_ctx *ctx) 2371{ 2372 struct r600_bytecode_alu alu; 2373 int i, r; 2374 2375 for (i = 0; i < 4; i++) { 2376 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2377 alu.inst = ctx->inst_info->r600_opcode; 2378 2379 alu.dst.chan = i; 2380 2381 alu.src[0].sel = V_SQ_ALU_SRC_0; 2382 2383 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 2384 alu.src[1].sel = V_SQ_ALU_SRC_1; 2385 alu.src[1].neg = 1; 2386 } else { 2387 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2388 } 2389 if (i == 3) { 2390 alu.last = 1; 2391 } 2392 r = r600_bytecode_add_alu(ctx->bc, &alu); 2393 if (r) 2394 return r; 2395 } 2396 2397 /* kill must be last in ALU */ 2398 ctx->bc->force_add_cf = 1; 2399 ctx->shader->uses_kill = TRUE; 2400 return 0; 2401} 2402 2403static int tgsi_lit(struct r600_shader_ctx *ctx) 2404{ 2405 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2406 struct r600_bytecode_alu alu; 2407 int r; 2408 2409 /* tmp.x = max(src.y, 0.0) */ 2410 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2411 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 2412 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 2413 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 2414 alu.src[1].chan = 1; 2415 2416 alu.dst.sel = ctx->temp_reg; 2417 alu.dst.chan = 0; 2418 alu.dst.write = 1; 2419 2420 alu.last = 1; 2421 r = r600_bytecode_add_alu(ctx->bc, &alu); 2422 if (r) 2423 return r; 2424 2425 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 2426 { 2427 int chan; 2428 int sel; 2429 int i; 2430 2431 if (ctx->bc->chip_class == CAYMAN) { 2432 for (i = 0; i < 3; i++) { 2433 /* tmp.z = log(tmp.x) */ 2434 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2435 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 2436 alu.src[0].sel = ctx->temp_reg; 2437 alu.src[0].chan = 0; 2438 alu.dst.sel = ctx->temp_reg; 2439 alu.dst.chan = i; 2440 if (i == 2) { 2441 alu.dst.write = 1; 2442 alu.last = 1; 2443 } else 2444 alu.dst.write = 0; 2445 2446 r = r600_bytecode_add_alu(ctx->bc, &alu); 2447 if (r) 2448 return r; 2449 } 2450 } else { 2451 /* tmp.z = log(tmp.x) */ 2452 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2453 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 2454 alu.src[0].sel = ctx->temp_reg; 2455 alu.src[0].chan = 0; 2456 alu.dst.sel = ctx->temp_reg; 2457 alu.dst.chan = 2; 2458 alu.dst.write = 1; 2459 alu.last = 1; 2460 r = r600_bytecode_add_alu(ctx->bc, &alu); 2461 if (r) 2462 return r; 2463 } 2464 2465 chan = alu.dst.chan; 2466 sel = alu.dst.sel; 2467 2468 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 2469 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2470 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 2471 alu.src[0].sel = sel; 2472 alu.src[0].chan = chan; 2473 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 2474 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 2475 alu.dst.sel = ctx->temp_reg; 2476 alu.dst.chan = 0; 2477 alu.dst.write = 1; 2478 alu.is_op3 = 1; 2479 alu.last = 1; 2480 r = r600_bytecode_add_alu(ctx->bc, &alu); 2481 if (r) 2482 return r; 2483 2484 if (ctx->bc->chip_class == CAYMAN) { 2485 for (i = 0; i < 3; i++) { 2486 /* dst.z = exp(tmp.x) */ 2487 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2488 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2489 alu.src[0].sel = ctx->temp_reg; 2490 alu.src[0].chan = 0; 2491 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2492 if (i == 2) { 2493 alu.dst.write = 1; 2494 alu.last = 1; 2495 } else 2496 alu.dst.write = 0; 2497 r = r600_bytecode_add_alu(ctx->bc, &alu); 2498 if (r) 2499 return r; 2500 } 2501 } else { 2502 /* dst.z = exp(tmp.x) */ 2503 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2504 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2505 alu.src[0].sel = ctx->temp_reg; 2506 alu.src[0].chan = 0; 2507 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 2508 alu.last = 1; 2509 r = r600_bytecode_add_alu(ctx->bc, &alu); 2510 if (r) 2511 return r; 2512 } 2513 } 2514 2515 /* dst.x, <- 1.0 */ 2516 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2517 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2518 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 2519 alu.src[0].chan = 0; 2520 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 2521 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 2522 r = r600_bytecode_add_alu(ctx->bc, &alu); 2523 if (r) 2524 return r; 2525 2526 /* dst.y = max(src.x, 0.0) */ 2527 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2528 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 2529 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2530 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 2531 alu.src[1].chan = 0; 2532 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 2533 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 2534 r = r600_bytecode_add_alu(ctx->bc, &alu); 2535 if (r) 2536 return r; 2537 2538 /* dst.w, <- 1.0 */ 2539 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2540 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2541 alu.src[0].sel = V_SQ_ALU_SRC_1; 2542 alu.src[0].chan = 0; 2543 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 2544 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 2545 alu.last = 1; 2546 r = r600_bytecode_add_alu(ctx->bc, &alu); 2547 if (r) 2548 return r; 2549 2550 return 0; 2551} 2552 2553static int tgsi_rsq(struct r600_shader_ctx *ctx) 2554{ 2555 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2556 struct r600_bytecode_alu alu; 2557 int i, r; 2558 2559 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2560 2561 /* XXX: 2562 * For state trackers other than OpenGL, we'll want to use 2563 * _RECIPSQRT_IEEE instead. 2564 */ 2565 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 2566 2567 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 2568 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 2569 r600_bytecode_src_set_abs(&alu.src[i]); 2570 } 2571 alu.dst.sel = ctx->temp_reg; 2572 alu.dst.write = 1; 2573 alu.last = 1; 2574 r = r600_bytecode_add_alu(ctx->bc, &alu); 2575 if (r) 2576 return r; 2577 /* replicate result */ 2578 return tgsi_helper_tempx_replicate(ctx); 2579} 2580 2581static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 2582{ 2583 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2584 struct r600_bytecode_alu alu; 2585 int i, r; 2586 2587 for (i = 0; i < 4; i++) { 2588 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2589 alu.src[0].sel = ctx->temp_reg; 2590 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2591 alu.dst.chan = i; 2592 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2593 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 2594 if (i == 3) 2595 alu.last = 1; 2596 r = r600_bytecode_add_alu(ctx->bc, &alu); 2597 if (r) 2598 return r; 2599 } 2600 return 0; 2601} 2602 2603static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 2604{ 2605 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2606 struct r600_bytecode_alu alu; 2607 int i, r; 2608 2609 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2610 alu.inst = ctx->inst_info->r600_opcode; 2611 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 2612 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 2613 } 2614 alu.dst.sel = ctx->temp_reg; 2615 alu.dst.write = 1; 2616 alu.last = 1; 2617 r = r600_bytecode_add_alu(ctx->bc, &alu); 2618 if (r) 2619 return r; 2620 /* replicate result */ 2621 return tgsi_helper_tempx_replicate(ctx); 2622} 2623 2624static int cayman_pow(struct r600_shader_ctx *ctx) 2625{ 2626 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2627 int i, r; 2628 struct r600_bytecode_alu alu; 2629 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 2630 2631 for (i = 0; i < 3; i++) { 2632 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2633 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2634 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2635 alu.dst.sel = ctx->temp_reg; 2636 alu.dst.chan = i; 2637 alu.dst.write = 1; 2638 if (i == 2) 2639 alu.last = 1; 2640 r = r600_bytecode_add_alu(ctx->bc, &alu); 2641 if (r) 2642 return r; 2643 } 2644 2645 /* b * LOG2(a) */ 2646 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2647 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2648 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 2649 alu.src[1].sel = ctx->temp_reg; 2650 alu.dst.sel = ctx->temp_reg; 2651 alu.dst.write = 1; 2652 alu.last = 1; 2653 r = r600_bytecode_add_alu(ctx->bc, &alu); 2654 if (r) 2655 return r; 2656 2657 for (i = 0; i < last_slot; i++) { 2658 /* POW(a,b) = EXP2(b * LOG2(a))*/ 2659 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2660 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2661 alu.src[0].sel = ctx->temp_reg; 2662 2663 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2664 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 2665 if (i == last_slot - 1) 2666 alu.last = 1; 2667 r = r600_bytecode_add_alu(ctx->bc, &alu); 2668 if (r) 2669 return r; 2670 } 2671 return 0; 2672} 2673 2674static int tgsi_pow(struct r600_shader_ctx *ctx) 2675{ 2676 struct r600_bytecode_alu alu; 2677 int r; 2678 2679 /* LOG2(a) */ 2680 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2681 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2682 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2683 alu.dst.sel = ctx->temp_reg; 2684 alu.dst.write = 1; 2685 alu.last = 1; 2686 r = r600_bytecode_add_alu(ctx->bc, &alu); 2687 if (r) 2688 return r; 2689 /* b * LOG2(a) */ 2690 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2691 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2692 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 2693 alu.src[1].sel = ctx->temp_reg; 2694 alu.dst.sel = ctx->temp_reg; 2695 alu.dst.write = 1; 2696 alu.last = 1; 2697 r = r600_bytecode_add_alu(ctx->bc, &alu); 2698 if (r) 2699 return r; 2700 /* POW(a,b) = EXP2(b * LOG2(a))*/ 2701 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2702 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2703 alu.src[0].sel = ctx->temp_reg; 2704 alu.dst.sel = ctx->temp_reg; 2705 alu.dst.write = 1; 2706 alu.last = 1; 2707 r = r600_bytecode_add_alu(ctx->bc, &alu); 2708 if (r) 2709 return r; 2710 return tgsi_helper_tempx_replicate(ctx); 2711} 2712 2713static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 2714{ 2715 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2716 struct r600_bytecode_alu alu; 2717 int i, r, j; 2718 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2719 int tmp0 = ctx->temp_reg; 2720 int tmp1 = r600_get_temp(ctx); 2721 int tmp2 = r600_get_temp(ctx); 2722 int tmp3 = r600_get_temp(ctx); 2723 /* Unsigned path: 2724 * 2725 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 2726 * 2727 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 2728 * 2. tmp0.z = lo (tmp0.x * src2) 2729 * 3. tmp0.w = -tmp0.z 2730 * 4. tmp0.y = hi (tmp0.x * src2) 2731 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 2732 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 2733 * 7. tmp1.x = tmp0.x - tmp0.w 2734 * 8. tmp1.y = tmp0.x + tmp0.w 2735 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 2736 * 10. tmp0.z = hi(tmp0.x * src1) = q 2737 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 2738 * 2739 * 12. tmp0.w = src1 - tmp0.y = r 2740 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 2741 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 2742 * 2743 * if DIV 2744 * 2745 * 15. tmp1.z = tmp0.z + 1 = q + 1 2746 * 16. tmp1.w = tmp0.z - 1 = q - 1 2747 * 2748 * else MOD 2749 * 2750 * 15. tmp1.z = tmp0.w - src2 = r - src2 2751 * 16. tmp1.w = tmp0.w + src2 = r + src2 2752 * 2753 * endif 2754 * 2755 * 17. tmp1.x = tmp1.x & tmp1.y 2756 * 2757 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 2758 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 2759 * 2760 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 2761 * 20. dst = src2==0 ? MAX_UINT : tmp0.z 2762 * 2763 * Signed path: 2764 * 2765 * Same as unsigned, using abs values of the operands, 2766 * and fixing the sign of the result in the end. 2767 */ 2768 2769 for (i = 0; i < 4; i++) { 2770 if (!(write_mask & (1<<i))) 2771 continue; 2772 2773 if (signed_op) { 2774 2775 /* tmp2.x = -src0 */ 2776 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2777 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2778 2779 alu.dst.sel = tmp2; 2780 alu.dst.chan = 0; 2781 alu.dst.write = 1; 2782 2783 alu.src[0].sel = V_SQ_ALU_SRC_0; 2784 2785 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2786 2787 alu.last = 1; 2788 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2789 return r; 2790 2791 /* tmp2.y = -src1 */ 2792 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2793 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2794 2795 alu.dst.sel = tmp2; 2796 alu.dst.chan = 1; 2797 alu.dst.write = 1; 2798 2799 alu.src[0].sel = V_SQ_ALU_SRC_0; 2800 2801 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2802 2803 alu.last = 1; 2804 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2805 return r; 2806 2807 /* tmp2.z sign bit is set if src0 and src2 signs are different */ 2808 /* it will be a sign of the quotient */ 2809 if (!mod) { 2810 2811 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2812 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT); 2813 2814 alu.dst.sel = tmp2; 2815 alu.dst.chan = 2; 2816 alu.dst.write = 1; 2817 2818 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2819 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2820 2821 alu.last = 1; 2822 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2823 return r; 2824 } 2825 2826 /* tmp2.x = |src0| */ 2827 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2828 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2829 alu.is_op3 = 1; 2830 2831 alu.dst.sel = tmp2; 2832 alu.dst.chan = 0; 2833 alu.dst.write = 1; 2834 2835 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2836 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2837 alu.src[2].sel = tmp2; 2838 alu.src[2].chan = 0; 2839 2840 alu.last = 1; 2841 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2842 return r; 2843 2844 /* tmp2.y = |src1| */ 2845 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2846 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2847 alu.is_op3 = 1; 2848 2849 alu.dst.sel = tmp2; 2850 alu.dst.chan = 1; 2851 alu.dst.write = 1; 2852 2853 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2854 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2855 alu.src[2].sel = tmp2; 2856 alu.src[2].chan = 1; 2857 2858 alu.last = 1; 2859 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2860 return r; 2861 2862 } 2863 2864 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 2865 if (ctx->bc->chip_class == CAYMAN) { 2866 /* tmp3.x = u2f(src2) */ 2867 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2868 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT); 2869 2870 alu.dst.sel = tmp3; 2871 alu.dst.chan = 0; 2872 alu.dst.write = 1; 2873 2874 if (signed_op) { 2875 alu.src[0].sel = tmp2; 2876 alu.src[0].chan = 1; 2877 } else { 2878 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2879 } 2880 2881 alu.last = 1; 2882 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2883 return r; 2884 2885 /* tmp0.x = recip(tmp3.x) */ 2886 for (j = 0 ; j < 3; j++) { 2887 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2888 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; 2889 2890 alu.dst.sel = tmp0; 2891 alu.dst.chan = j; 2892 alu.dst.write = (j == 0); 2893 2894 alu.src[0].sel = tmp3; 2895 alu.src[0].chan = 0; 2896 2897 if (j == 2) 2898 alu.last = 1; 2899 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2900 return r; 2901 } 2902 2903 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2904 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2905 2906 alu.src[0].sel = tmp0; 2907 alu.src[0].chan = 0; 2908 2909 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2910 alu.src[1].value = 0x4f800000; 2911 2912 alu.dst.sel = tmp3; 2913 alu.dst.write = 1; 2914 alu.last = 1; 2915 r = r600_bytecode_add_alu(ctx->bc, &alu); 2916 if (r) 2917 return r; 2918 2919 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2920 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT); 2921 2922 alu.dst.sel = tmp0; 2923 alu.dst.chan = 0; 2924 alu.dst.write = 1; 2925 2926 alu.src[0].sel = tmp3; 2927 alu.src[0].chan = 0; 2928 2929 alu.last = 1; 2930 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2931 return r; 2932 2933 } else { 2934 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2935 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT); 2936 2937 alu.dst.sel = tmp0; 2938 alu.dst.chan = 0; 2939 alu.dst.write = 1; 2940 2941 if (signed_op) { 2942 alu.src[0].sel = tmp2; 2943 alu.src[0].chan = 1; 2944 } else { 2945 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2946 } 2947 2948 alu.last = 1; 2949 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2950 return r; 2951 } 2952 2953 /* 2. tmp0.z = lo (tmp0.x * src2) */ 2954 if (ctx->bc->chip_class == CAYMAN) { 2955 for (j = 0 ; j < 4; j++) { 2956 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2957 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 2958 2959 alu.dst.sel = tmp0; 2960 alu.dst.chan = j; 2961 alu.dst.write = (j == 2); 2962 2963 alu.src[0].sel = tmp0; 2964 alu.src[0].chan = 0; 2965 if (signed_op) { 2966 alu.src[1].sel = tmp2; 2967 alu.src[1].chan = 1; 2968 } else { 2969 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2970 } 2971 2972 alu.last = (j == 3); 2973 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2974 return r; 2975 } 2976 } else { 2977 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2978 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 2979 2980 alu.dst.sel = tmp0; 2981 alu.dst.chan = 2; 2982 alu.dst.write = 1; 2983 2984 alu.src[0].sel = tmp0; 2985 alu.src[0].chan = 0; 2986 if (signed_op) { 2987 alu.src[1].sel = tmp2; 2988 alu.src[1].chan = 1; 2989 } else { 2990 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2991 } 2992 2993 alu.last = 1; 2994 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2995 return r; 2996 } 2997 2998 /* 3. tmp0.w = -tmp0.z */ 2999 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3000 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 3001 3002 alu.dst.sel = tmp0; 3003 alu.dst.chan = 3; 3004 alu.dst.write = 1; 3005 3006 alu.src[0].sel = V_SQ_ALU_SRC_0; 3007 alu.src[1].sel = tmp0; 3008 alu.src[1].chan = 2; 3009 3010 alu.last = 1; 3011 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3012 return r; 3013 3014 /* 4. tmp0.y = hi (tmp0.x * src2) */ 3015 if (ctx->bc->chip_class == CAYMAN) { 3016 for (j = 0 ; j < 4; j++) { 3017 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3018 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 3019 3020 alu.dst.sel = tmp0; 3021 alu.dst.chan = j; 3022 alu.dst.write = (j == 1); 3023 3024 alu.src[0].sel = tmp0; 3025 alu.src[0].chan = 0; 3026 3027 if (signed_op) { 3028 alu.src[1].sel = tmp2; 3029 alu.src[1].chan = 1; 3030 } else { 3031 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3032 } 3033 alu.last = (j == 3); 3034 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3035 return r; 3036 } 3037 } else { 3038 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3039 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 3040 3041 alu.dst.sel = tmp0; 3042 alu.dst.chan = 1; 3043 alu.dst.write = 1; 3044 3045 alu.src[0].sel = tmp0; 3046 alu.src[0].chan = 0; 3047 3048 if (signed_op) { 3049 alu.src[1].sel = tmp2; 3050 alu.src[1].chan = 1; 3051 } else { 3052 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3053 } 3054 3055 alu.last = 1; 3056 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3057 return r; 3058 } 3059 3060 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 3061 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3062 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 3063 alu.is_op3 = 1; 3064 3065 alu.dst.sel = tmp0; 3066 alu.dst.chan = 2; 3067 alu.dst.write = 1; 3068 3069 alu.src[0].sel = tmp0; 3070 alu.src[0].chan = 1; 3071 alu.src[1].sel = tmp0; 3072 alu.src[1].chan = 3; 3073 alu.src[2].sel = tmp0; 3074 alu.src[2].chan = 2; 3075 3076 alu.last = 1; 3077 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3078 return r; 3079 3080 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 3081 if (ctx->bc->chip_class == CAYMAN) { 3082 for (j = 0 ; j < 4; j++) { 3083 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3084 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 3085 3086 alu.dst.sel = tmp0; 3087 alu.dst.chan = j; 3088 alu.dst.write = (j == 3); 3089 3090 alu.src[0].sel = tmp0; 3091 alu.src[0].chan = 2; 3092 3093 alu.src[1].sel = tmp0; 3094 alu.src[1].chan = 0; 3095 3096 alu.last = (j == 3); 3097 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3098 return r; 3099 } 3100 } else { 3101 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3102 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 3103 3104 alu.dst.sel = tmp0; 3105 alu.dst.chan = 3; 3106 alu.dst.write = 1; 3107 3108 alu.src[0].sel = tmp0; 3109 alu.src[0].chan = 2; 3110 3111 alu.src[1].sel = tmp0; 3112 alu.src[1].chan = 0; 3113 3114 alu.last = 1; 3115 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3116 return r; 3117 } 3118 3119 /* 7. tmp1.x = tmp0.x - tmp0.w */ 3120 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3121 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 3122 3123 alu.dst.sel = tmp1; 3124 alu.dst.chan = 0; 3125 alu.dst.write = 1; 3126 3127 alu.src[0].sel = tmp0; 3128 alu.src[0].chan = 0; 3129 alu.src[1].sel = tmp0; 3130 alu.src[1].chan = 3; 3131 3132 alu.last = 1; 3133 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3134 return r; 3135 3136 /* 8. tmp1.y = tmp0.x + tmp0.w */ 3137 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3138 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 3139 3140 alu.dst.sel = tmp1; 3141 alu.dst.chan = 1; 3142 alu.dst.write = 1; 3143 3144 alu.src[0].sel = tmp0; 3145 alu.src[0].chan = 0; 3146 alu.src[1].sel = tmp0; 3147 alu.src[1].chan = 3; 3148 3149 alu.last = 1; 3150 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3151 return r; 3152 3153 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 3154 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3155 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 3156 alu.is_op3 = 1; 3157 3158 alu.dst.sel = tmp0; 3159 alu.dst.chan = 0; 3160 alu.dst.write = 1; 3161 3162 alu.src[0].sel = tmp0; 3163 alu.src[0].chan = 1; 3164 alu.src[1].sel = tmp1; 3165 alu.src[1].chan = 1; 3166 alu.src[2].sel = tmp1; 3167 alu.src[2].chan = 0; 3168 3169 alu.last = 1; 3170 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3171 return r; 3172 3173 /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 3174 if (ctx->bc->chip_class == CAYMAN) { 3175 for (j = 0 ; j < 4; j++) { 3176 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3177 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 3178 3179 alu.dst.sel = tmp0; 3180 alu.dst.chan = j; 3181 alu.dst.write = (j == 2); 3182 3183 alu.src[0].sel = tmp0; 3184 alu.src[0].chan = 0; 3185 3186 if (signed_op) { 3187 alu.src[1].sel = tmp2; 3188 alu.src[1].chan = 0; 3189 } else { 3190 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3191 } 3192 3193 alu.last = (j == 3); 3194 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3195 return r; 3196 } 3197 } else { 3198 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3199 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 3200 3201 alu.dst.sel = tmp0; 3202 alu.dst.chan = 2; 3203 alu.dst.write = 1; 3204 3205 alu.src[0].sel = tmp0; 3206 alu.src[0].chan = 0; 3207 3208 if (signed_op) { 3209 alu.src[1].sel = tmp2; 3210 alu.src[1].chan = 0; 3211 } else { 3212 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3213 } 3214 3215 alu.last = 1; 3216 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3217 return r; 3218 } 3219 3220 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 3221 if (ctx->bc->chip_class == CAYMAN) { 3222 for (j = 0 ; j < 4; j++) { 3223 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3224 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 3225 3226 alu.dst.sel = tmp0; 3227 alu.dst.chan = j; 3228 alu.dst.write = (j == 1); 3229 3230 if (signed_op) { 3231 alu.src[0].sel = tmp2; 3232 alu.src[0].chan = 1; 3233 } else { 3234 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3235 } 3236 3237 alu.src[1].sel = tmp0; 3238 alu.src[1].chan = 2; 3239 3240 alu.last = (j == 3); 3241 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3242 return r; 3243 } 3244 } else { 3245 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3246 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 3247 3248 alu.dst.sel = tmp0; 3249 alu.dst.chan = 1; 3250 alu.dst.write = 1; 3251 3252 if (signed_op) { 3253 alu.src[0].sel = tmp2; 3254 alu.src[0].chan = 1; 3255 } else { 3256 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3257 } 3258 3259 alu.src[1].sel = tmp0; 3260 alu.src[1].chan = 2; 3261 3262 alu.last = 1; 3263 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3264 return r; 3265 } 3266 3267 /* 12. tmp0.w = src1 - tmp0.y = r */ 3268 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3269 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 3270 3271 alu.dst.sel = tmp0; 3272 alu.dst.chan = 3; 3273 alu.dst.write = 1; 3274 3275 if (signed_op) { 3276 alu.src[0].sel = tmp2; 3277 alu.src[0].chan = 0; 3278 } else { 3279 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3280 } 3281 3282 alu.src[1].sel = tmp0; 3283 alu.src[1].chan = 1; 3284 3285 alu.last = 1; 3286 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3287 return r; 3288 3289 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 3290 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3291 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT); 3292 3293 alu.dst.sel = tmp1; 3294 alu.dst.chan = 0; 3295 alu.dst.write = 1; 3296 3297 alu.src[0].sel = tmp0; 3298 alu.src[0].chan = 3; 3299 if (signed_op) { 3300 alu.src[1].sel = tmp2; 3301 alu.src[1].chan = 1; 3302 } else { 3303 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3304 } 3305 3306 alu.last = 1; 3307 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3308 return r; 3309 3310 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 3311 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3312 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT); 3313 3314 alu.dst.sel = tmp1; 3315 alu.dst.chan = 1; 3316 alu.dst.write = 1; 3317 3318 if (signed_op) { 3319 alu.src[0].sel = tmp2; 3320 alu.src[0].chan = 0; 3321 } else { 3322 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3323 } 3324 3325 alu.src[1].sel = tmp0; 3326 alu.src[1].chan = 1; 3327 3328 alu.last = 1; 3329 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3330 return r; 3331 3332 if (mod) { /* UMOD */ 3333 3334 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 3335 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3336 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 3337 3338 alu.dst.sel = tmp1; 3339 alu.dst.chan = 2; 3340 alu.dst.write = 1; 3341 3342 alu.src[0].sel = tmp0; 3343 alu.src[0].chan = 3; 3344 3345 if (signed_op) { 3346 alu.src[1].sel = tmp2; 3347 alu.src[1].chan = 1; 3348 } else { 3349 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3350 } 3351 3352 alu.last = 1; 3353 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3354 return r; 3355 3356 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 3357 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3358 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 3359 3360 alu.dst.sel = tmp1; 3361 alu.dst.chan = 3; 3362 alu.dst.write = 1; 3363 3364 alu.src[0].sel = tmp0; 3365 alu.src[0].chan = 3; 3366 if (signed_op) { 3367 alu.src[1].sel = tmp2; 3368 alu.src[1].chan = 1; 3369 } else { 3370 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3371 } 3372 3373 alu.last = 1; 3374 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3375 return r; 3376 3377 } else { /* UDIV */ 3378 3379 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 3380 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3381 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 3382 3383 alu.dst.sel = tmp1; 3384 alu.dst.chan = 2; 3385 alu.dst.write = 1; 3386 3387 alu.src[0].sel = tmp0; 3388 alu.src[0].chan = 2; 3389 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 3390 3391 alu.last = 1; 3392 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3393 return r; 3394 3395 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 3396 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3397 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 3398 3399 alu.dst.sel = tmp1; 3400 alu.dst.chan = 3; 3401 alu.dst.write = 1; 3402 3403 alu.src[0].sel = tmp0; 3404 alu.src[0].chan = 2; 3405 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 3406 3407 alu.last = 1; 3408 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3409 return r; 3410 3411 } 3412 3413 /* 17. tmp1.x = tmp1.x & tmp1.y */ 3414 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3415 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT); 3416 3417 alu.dst.sel = tmp1; 3418 alu.dst.chan = 0; 3419 alu.dst.write = 1; 3420 3421 alu.src[0].sel = tmp1; 3422 alu.src[0].chan = 0; 3423 alu.src[1].sel = tmp1; 3424 alu.src[1].chan = 1; 3425 3426 alu.last = 1; 3427 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3428 return r; 3429 3430 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 3431 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 3432 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3433 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 3434 alu.is_op3 = 1; 3435 3436 alu.dst.sel = tmp0; 3437 alu.dst.chan = 2; 3438 alu.dst.write = 1; 3439 3440 alu.src[0].sel = tmp1; 3441 alu.src[0].chan = 0; 3442 alu.src[1].sel = tmp0; 3443 alu.src[1].chan = mod ? 3 : 2; 3444 alu.src[2].sel = tmp1; 3445 alu.src[2].chan = 2; 3446 3447 alu.last = 1; 3448 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3449 return r; 3450 3451 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 3452 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3453 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 3454 alu.is_op3 = 1; 3455 3456 if (signed_op) { 3457 alu.dst.sel = tmp0; 3458 alu.dst.chan = 2; 3459 alu.dst.write = 1; 3460 } else { 3461 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3462 } 3463 3464 alu.src[0].sel = tmp1; 3465 alu.src[0].chan = 1; 3466 alu.src[1].sel = tmp1; 3467 alu.src[1].chan = 3; 3468 alu.src[2].sel = tmp0; 3469 alu.src[2].chan = 2; 3470 3471 alu.last = 1; 3472 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3473 return r; 3474 3475 if (signed_op) { 3476 3477 /* fix the sign of the result */ 3478 3479 if (mod) { 3480 3481 /* tmp0.x = -tmp0.z */ 3482 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3483 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 3484 3485 alu.dst.sel = tmp0; 3486 alu.dst.chan = 0; 3487 alu.dst.write = 1; 3488 3489 alu.src[0].sel = V_SQ_ALU_SRC_0; 3490 alu.src[1].sel = tmp0; 3491 alu.src[1].chan = 2; 3492 3493 alu.last = 1; 3494 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3495 return r; 3496 3497 /* sign of the remainder is the same as the sign of src0 */ 3498 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 3499 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3500 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 3501 alu.is_op3 = 1; 3502 3503 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3504 3505 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3506 alu.src[1].sel = tmp0; 3507 alu.src[1].chan = 2; 3508 alu.src[2].sel = tmp0; 3509 alu.src[2].chan = 0; 3510 3511 alu.last = 1; 3512 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3513 return r; 3514 3515 } else { 3516 3517 /* tmp0.x = -tmp0.z */ 3518 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3519 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 3520 3521 alu.dst.sel = tmp0; 3522 alu.dst.chan = 0; 3523 alu.dst.write = 1; 3524 3525 alu.src[0].sel = V_SQ_ALU_SRC_0; 3526 alu.src[1].sel = tmp0; 3527 alu.src[1].chan = 2; 3528 3529 alu.last = 1; 3530 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3531 return r; 3532 3533 /* fix the quotient sign (same as the sign of src0*src1) */ 3534 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 3535 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3536 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 3537 alu.is_op3 = 1; 3538 3539 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3540 3541 alu.src[0].sel = tmp2; 3542 alu.src[0].chan = 2; 3543 alu.src[1].sel = tmp0; 3544 alu.src[1].chan = 2; 3545 alu.src[2].sel = tmp0; 3546 alu.src[2].chan = 0; 3547 3548 alu.last = 1; 3549 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3550 return r; 3551 } 3552 } 3553 } 3554 return 0; 3555} 3556 3557static int tgsi_udiv(struct r600_shader_ctx *ctx) 3558{ 3559 return tgsi_divmod(ctx, 0, 0); 3560} 3561 3562static int tgsi_umod(struct r600_shader_ctx *ctx) 3563{ 3564 return tgsi_divmod(ctx, 1, 0); 3565} 3566 3567static int tgsi_idiv(struct r600_shader_ctx *ctx) 3568{ 3569 return tgsi_divmod(ctx, 0, 1); 3570} 3571 3572static int tgsi_imod(struct r600_shader_ctx *ctx) 3573{ 3574 return tgsi_divmod(ctx, 1, 1); 3575} 3576 3577 3578static int tgsi_f2i(struct r600_shader_ctx *ctx) 3579{ 3580 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3581 struct r600_bytecode_alu alu; 3582 int i, r; 3583 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3584 int last_inst = tgsi_last_instruction(write_mask); 3585 3586 for (i = 0; i < 4; i++) { 3587 if (!(write_mask & (1<<i))) 3588 continue; 3589 3590 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3591 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC); 3592 3593 alu.dst.sel = ctx->temp_reg; 3594 alu.dst.chan = i; 3595 alu.dst.write = 1; 3596 3597 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3598 if (i == last_inst) 3599 alu.last = 1; 3600 r = r600_bytecode_add_alu(ctx->bc, &alu); 3601 if (r) 3602 return r; 3603 } 3604 3605 for (i = 0; i < 4; i++) { 3606 if (!(write_mask & (1<<i))) 3607 continue; 3608 3609 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3610 alu.inst = ctx->inst_info->r600_opcode; 3611 3612 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3613 3614 alu.src[0].sel = ctx->temp_reg; 3615 alu.src[0].chan = i; 3616 3617 if (i == last_inst || alu.inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT) 3618 alu.last = 1; 3619 r = r600_bytecode_add_alu(ctx->bc, &alu); 3620 if (r) 3621 return r; 3622 } 3623 3624 return 0; 3625} 3626 3627static int tgsi_iabs(struct r600_shader_ctx *ctx) 3628{ 3629 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3630 struct r600_bytecode_alu alu; 3631 int i, r; 3632 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3633 int last_inst = tgsi_last_instruction(write_mask); 3634 3635 /* tmp = -src */ 3636 for (i = 0; i < 4; i++) { 3637 if (!(write_mask & (1<<i))) 3638 continue; 3639 3640 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3641 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 3642 3643 alu.dst.sel = ctx->temp_reg; 3644 alu.dst.chan = i; 3645 alu.dst.write = 1; 3646 3647 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3648 alu.src[0].sel = V_SQ_ALU_SRC_0; 3649 3650 if (i == last_inst) 3651 alu.last = 1; 3652 r = r600_bytecode_add_alu(ctx->bc, &alu); 3653 if (r) 3654 return r; 3655 } 3656 3657 /* dst = (src >= 0 ? src : tmp) */ 3658 for (i = 0; i < 4; i++) { 3659 if (!(write_mask & (1<<i))) 3660 continue; 3661 3662 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3663 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 3664 alu.is_op3 = 1; 3665 alu.dst.write = 1; 3666 3667 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3668 3669 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3670 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3671 alu.src[2].sel = ctx->temp_reg; 3672 alu.src[2].chan = i; 3673 3674 if (i == last_inst) 3675 alu.last = 1; 3676 r = r600_bytecode_add_alu(ctx->bc, &alu); 3677 if (r) 3678 return r; 3679 } 3680 return 0; 3681} 3682 3683static int tgsi_issg(struct r600_shader_ctx *ctx) 3684{ 3685 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3686 struct r600_bytecode_alu alu; 3687 int i, r; 3688 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3689 int last_inst = tgsi_last_instruction(write_mask); 3690 3691 /* tmp = (src >= 0 ? src : -1) */ 3692 for (i = 0; i < 4; i++) { 3693 if (!(write_mask & (1<<i))) 3694 continue; 3695 3696 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3697 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 3698 alu.is_op3 = 1; 3699 3700 alu.dst.sel = ctx->temp_reg; 3701 alu.dst.chan = i; 3702 alu.dst.write = 1; 3703 3704 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3705 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3706 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 3707 3708 if (i == last_inst) 3709 alu.last = 1; 3710 r = r600_bytecode_add_alu(ctx->bc, &alu); 3711 if (r) 3712 return r; 3713 } 3714 3715 /* dst = (tmp > 0 ? 1 : tmp) */ 3716 for (i = 0; i < 4; i++) { 3717 if (!(write_mask & (1<<i))) 3718 continue; 3719 3720 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3721 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT); 3722 alu.is_op3 = 1; 3723 alu.dst.write = 1; 3724 3725 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3726 3727 alu.src[0].sel = ctx->temp_reg; 3728 alu.src[0].chan = i; 3729 3730 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 3731 3732 alu.src[2].sel = ctx->temp_reg; 3733 alu.src[2].chan = i; 3734 3735 if (i == last_inst) 3736 alu.last = 1; 3737 r = r600_bytecode_add_alu(ctx->bc, &alu); 3738 if (r) 3739 return r; 3740 } 3741 return 0; 3742} 3743 3744 3745 3746static int tgsi_ssg(struct r600_shader_ctx *ctx) 3747{ 3748 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3749 struct r600_bytecode_alu alu; 3750 int i, r; 3751 3752 /* tmp = (src > 0 ? 1 : src) */ 3753 for (i = 0; i < 4; i++) { 3754 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3755 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 3756 alu.is_op3 = 1; 3757 3758 alu.dst.sel = ctx->temp_reg; 3759 alu.dst.chan = i; 3760 3761 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3762 alu.src[1].sel = V_SQ_ALU_SRC_1; 3763 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 3764 3765 if (i == 3) 3766 alu.last = 1; 3767 r = r600_bytecode_add_alu(ctx->bc, &alu); 3768 if (r) 3769 return r; 3770 } 3771 3772 /* dst = (-tmp > 0 ? -1 : tmp) */ 3773 for (i = 0; i < 4; i++) { 3774 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3775 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 3776 alu.is_op3 = 1; 3777 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3778 3779 alu.src[0].sel = ctx->temp_reg; 3780 alu.src[0].chan = i; 3781 alu.src[0].neg = 1; 3782 3783 alu.src[1].sel = V_SQ_ALU_SRC_1; 3784 alu.src[1].neg = 1; 3785 3786 alu.src[2].sel = ctx->temp_reg; 3787 alu.src[2].chan = i; 3788 3789 if (i == 3) 3790 alu.last = 1; 3791 r = r600_bytecode_add_alu(ctx->bc, &alu); 3792 if (r) 3793 return r; 3794 } 3795 return 0; 3796} 3797 3798static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 3799{ 3800 struct r600_bytecode_alu alu; 3801 int i, r; 3802 3803 for (i = 0; i < 4; i++) { 3804 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3805 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 3806 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 3807 alu.dst.chan = i; 3808 } else { 3809 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3810 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3811 alu.src[0].sel = ctx->temp_reg; 3812 alu.src[0].chan = i; 3813 } 3814 if (i == 3) { 3815 alu.last = 1; 3816 } 3817 r = r600_bytecode_add_alu(ctx->bc, &alu); 3818 if (r) 3819 return r; 3820 } 3821 return 0; 3822} 3823 3824static int tgsi_op3(struct r600_shader_ctx *ctx) 3825{ 3826 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3827 struct r600_bytecode_alu alu; 3828 int i, j, r; 3829 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3830 3831 for (i = 0; i < lasti + 1; i++) { 3832 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3833 continue; 3834 3835 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3836 alu.inst = ctx->inst_info->r600_opcode; 3837 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3838 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3839 } 3840 3841 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3842 alu.dst.chan = i; 3843 alu.dst.write = 1; 3844 alu.is_op3 = 1; 3845 if (i == lasti) { 3846 alu.last = 1; 3847 } 3848 r = r600_bytecode_add_alu(ctx->bc, &alu); 3849 if (r) 3850 return r; 3851 } 3852 return 0; 3853} 3854 3855static int tgsi_dp(struct r600_shader_ctx *ctx) 3856{ 3857 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3858 struct r600_bytecode_alu alu; 3859 int i, j, r; 3860 3861 for (i = 0; i < 4; i++) { 3862 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3863 alu.inst = ctx->inst_info->r600_opcode; 3864 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3865 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3866 } 3867 3868 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3869 alu.dst.chan = i; 3870 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3871 /* handle some special cases */ 3872 switch (ctx->inst_info->tgsi_opcode) { 3873 case TGSI_OPCODE_DP2: 3874 if (i > 1) { 3875 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 3876 alu.src[0].chan = alu.src[1].chan = 0; 3877 } 3878 break; 3879 case TGSI_OPCODE_DP3: 3880 if (i > 2) { 3881 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 3882 alu.src[0].chan = alu.src[1].chan = 0; 3883 } 3884 break; 3885 case TGSI_OPCODE_DPH: 3886 if (i == 3) { 3887 alu.src[0].sel = V_SQ_ALU_SRC_1; 3888 alu.src[0].chan = 0; 3889 alu.src[0].neg = 0; 3890 } 3891 break; 3892 default: 3893 break; 3894 } 3895 if (i == 3) { 3896 alu.last = 1; 3897 } 3898 r = r600_bytecode_add_alu(ctx->bc, &alu); 3899 if (r) 3900 return r; 3901 } 3902 return 0; 3903} 3904 3905static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 3906 unsigned index) 3907{ 3908 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3909 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 3910 inst->Src[index].Register.File != TGSI_FILE_INPUT && 3911 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || 3912 ctx->src[index].neg || ctx->src[index].abs; 3913} 3914 3915static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 3916 unsigned index) 3917{ 3918 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3919 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 3920} 3921 3922static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) 3923{ 3924 struct r600_bytecode_vtx vtx; 3925 struct r600_bytecode_alu alu; 3926 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3927 int src_gpr, r, i; 3928 int id = tgsi_tex_get_src_gpr(ctx, 1); 3929 3930 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 3931 if (src_requires_loading) { 3932 for (i = 0; i < 4; i++) { 3933 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3934 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3935 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3936 alu.dst.sel = ctx->temp_reg; 3937 alu.dst.chan = i; 3938 if (i == 3) 3939 alu.last = 1; 3940 alu.dst.write = 1; 3941 r = r600_bytecode_add_alu(ctx->bc, &alu); 3942 if (r) 3943 return r; 3944 } 3945 src_gpr = ctx->temp_reg; 3946 } 3947 3948 memset(&vtx, 0, sizeof(vtx)); 3949 vtx.inst = 0; 3950 vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; 3951 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 3952 vtx.src_gpr = src_gpr; 3953 vtx.mega_fetch_count = 16; 3954 vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 3955 vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 3956 vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 3957 vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 3958 vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 3959 vtx.use_const_fields = 1; 3960 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 3961 3962 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 3963 return r; 3964 3965 if (ctx->bc->chip_class >= EVERGREEN) 3966 return 0; 3967 3968 for (i = 0; i < 4; i++) { 3969 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3970 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3971 continue; 3972 3973 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3974 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT); 3975 3976 alu.dst.chan = i; 3977 alu.dst.sel = vtx.dst_gpr; 3978 alu.dst.write = 1; 3979 3980 alu.src[0].sel = vtx.dst_gpr; 3981 alu.src[0].chan = i; 3982 3983 alu.src[1].sel = 512 + (id * 2); 3984 alu.src[1].chan = i % 4; 3985 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 3986 3987 if (i == lasti) 3988 alu.last = 1; 3989 r = r600_bytecode_add_alu(ctx->bc, &alu); 3990 if (r) 3991 return r; 3992 } 3993 3994 if (inst->Dst[0].Register.WriteMask & 3) { 3995 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3996 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT); 3997 3998 alu.dst.chan = 3; 3999 alu.dst.sel = vtx.dst_gpr; 4000 alu.dst.write = 1; 4001 4002 alu.src[0].sel = vtx.dst_gpr; 4003 alu.src[0].chan = 3; 4004 4005 alu.src[1].sel = 512 + (id * 2) + 1; 4006 alu.src[1].chan = 0; 4007 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 4008 4009 alu.last = 1; 4010 r = r600_bytecode_add_alu(ctx->bc, &alu); 4011 if (r) 4012 return r; 4013 } 4014 return 0; 4015} 4016 4017static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) 4018{ 4019 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4020 struct r600_bytecode_alu alu; 4021 int r; 4022 int id = tgsi_tex_get_src_gpr(ctx, 1); 4023 4024 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4025 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 4026 4027 if (ctx->bc->chip_class >= EVERGREEN) { 4028 alu.src[0].sel = 512 + (id / 4); 4029 alu.src[0].chan = id % 4; 4030 } else { 4031 /* r600 we have them at channel 2 of the second dword */ 4032 alu.src[0].sel = 512 + (id * 2) + 1; 4033 alu.src[0].chan = 1; 4034 } 4035 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 4036 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4037 alu.last = 1; 4038 r = r600_bytecode_add_alu(ctx->bc, &alu); 4039 if (r) 4040 return r; 4041 return 0; 4042} 4043 4044static int tgsi_tex(struct r600_shader_ctx *ctx) 4045{ 4046 static float one_point_five = 1.5f; 4047 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4048 struct r600_bytecode_tex tex; 4049 struct r600_bytecode_alu alu; 4050 unsigned src_gpr; 4051 int r, i, j; 4052 int opcode; 4053 bool read_compressed_msaa = ctx->bc->msaa_texture_mode == MSAA_TEXTURE_COMPRESSED && 4054 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 4055 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || 4056 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); 4057 /* Texture fetch instructions can only use gprs as source. 4058 * Also they cannot negate the source or take the absolute value */ 4059 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && 4060 tgsi_tex_src_requires_loading(ctx, 0)) || 4061 read_compressed_msaa; 4062 boolean src_loaded = FALSE; 4063 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; 4064 int8_t offset_x = 0, offset_y = 0, offset_z = 0; 4065 boolean has_txq_cube_array_z = false; 4066 4067 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && 4068 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 4069 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) 4070 if (inst->Dst[0].Register.WriteMask & 4) { 4071 ctx->shader->has_txq_cube_array_z_comp = true; 4072 has_txq_cube_array_z = true; 4073 } 4074 4075 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || 4076 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 4077 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 4078 sampler_src_reg = 2; 4079 4080 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 4081 4082 if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { 4083 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { 4084 ctx->shader->uses_tex_buffers = true; 4085 return r600_do_buffer_txq(ctx); 4086 } 4087 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 4088 if (ctx->bc->chip_class < EVERGREEN) 4089 ctx->shader->uses_tex_buffers = true; 4090 return do_vtx_fetch_inst(ctx, src_requires_loading); 4091 } 4092 } 4093 4094 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 4095 /* get offset values */ 4096 if (inst->Texture.NumOffsets) { 4097 assert(inst->Texture.NumOffsets == 1); 4098 4099 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 4100 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 4101 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 4102 } 4103 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 4104 /* TGSI moves the sampler to src reg 3 for TXD */ 4105 sampler_src_reg = 3; 4106 4107 for (i = 1; i < 3; i++) { 4108 /* set gradients h/v */ 4109 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 4110 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : 4111 SQ_TEX_INST_SET_GRADIENTS_V; 4112 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 4113 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 4114 4115 if (tgsi_tex_src_requires_loading(ctx, i)) { 4116 tex.src_gpr = r600_get_temp(ctx); 4117 tex.src_sel_x = 0; 4118 tex.src_sel_y = 1; 4119 tex.src_sel_z = 2; 4120 tex.src_sel_w = 3; 4121 4122 for (j = 0; j < 4; j++) { 4123 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4124 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 4125 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 4126 alu.dst.sel = tex.src_gpr; 4127 alu.dst.chan = j; 4128 if (j == 3) 4129 alu.last = 1; 4130 alu.dst.write = 1; 4131 r = r600_bytecode_add_alu(ctx->bc, &alu); 4132 if (r) 4133 return r; 4134 } 4135 4136 } else { 4137 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i); 4138 tex.src_sel_x = ctx->src[i].swizzle[0]; 4139 tex.src_sel_y = ctx->src[i].swizzle[1]; 4140 tex.src_sel_z = ctx->src[i].swizzle[2]; 4141 tex.src_sel_w = ctx->src[i].swizzle[3]; 4142 tex.src_rel = ctx->src[i].rel; 4143 } 4144 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */ 4145 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 4146 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 4147 tex.coord_type_x = 1; 4148 tex.coord_type_y = 1; 4149 tex.coord_type_z = 1; 4150 tex.coord_type_w = 1; 4151 } 4152 r = r600_bytecode_add_tex(ctx->bc, &tex); 4153 if (r) 4154 return r; 4155 } 4156 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 4157 int out_chan; 4158 /* Add perspective divide */ 4159 if (ctx->bc->chip_class == CAYMAN) { 4160 out_chan = 2; 4161 for (i = 0; i < 3; i++) { 4162 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4163 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 4164 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 4165 4166 alu.dst.sel = ctx->temp_reg; 4167 alu.dst.chan = i; 4168 if (i == 2) 4169 alu.last = 1; 4170 if (out_chan == i) 4171 alu.dst.write = 1; 4172 r = r600_bytecode_add_alu(ctx->bc, &alu); 4173 if (r) 4174 return r; 4175 } 4176 4177 } else { 4178 out_chan = 3; 4179 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4180 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 4181 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 4182 4183 alu.dst.sel = ctx->temp_reg; 4184 alu.dst.chan = out_chan; 4185 alu.last = 1; 4186 alu.dst.write = 1; 4187 r = r600_bytecode_add_alu(ctx->bc, &alu); 4188 if (r) 4189 return r; 4190 } 4191 4192 for (i = 0; i < 3; i++) { 4193 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4194 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 4195 alu.src[0].sel = ctx->temp_reg; 4196 alu.src[0].chan = out_chan; 4197 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4198 alu.dst.sel = ctx->temp_reg; 4199 alu.dst.chan = i; 4200 alu.dst.write = 1; 4201 r = r600_bytecode_add_alu(ctx->bc, &alu); 4202 if (r) 4203 return r; 4204 } 4205 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4206 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 4207 alu.src[0].sel = V_SQ_ALU_SRC_1; 4208 alu.src[0].chan = 0; 4209 alu.dst.sel = ctx->temp_reg; 4210 alu.dst.chan = 3; 4211 alu.last = 1; 4212 alu.dst.write = 1; 4213 r = r600_bytecode_add_alu(ctx->bc, &alu); 4214 if (r) 4215 return r; 4216 src_loaded = TRUE; 4217 src_gpr = ctx->temp_reg; 4218 } 4219 4220 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 4221 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 4222 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 4223 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 4224 inst->Instruction.Opcode != TGSI_OPCODE_TXQ && 4225 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { 4226 4227 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 4228 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 4229 4230 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 4231 for (i = 0; i < 4; i++) { 4232 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4233 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 4234 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 4235 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 4236 alu.dst.sel = ctx->temp_reg; 4237 alu.dst.chan = i; 4238 if (i == 3) 4239 alu.last = 1; 4240 alu.dst.write = 1; 4241 r = r600_bytecode_add_alu(ctx->bc, &alu); 4242 if (r) 4243 return r; 4244 } 4245 4246 /* tmp1.z = RCP_e(|tmp1.z|) */ 4247 if (ctx->bc->chip_class == CAYMAN) { 4248 for (i = 0; i < 3; i++) { 4249 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4250 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 4251 alu.src[0].sel = ctx->temp_reg; 4252 alu.src[0].chan = 2; 4253 alu.src[0].abs = 1; 4254 alu.dst.sel = ctx->temp_reg; 4255 alu.dst.chan = i; 4256 if (i == 2) 4257 alu.dst.write = 1; 4258 if (i == 2) 4259 alu.last = 1; 4260 r = r600_bytecode_add_alu(ctx->bc, &alu); 4261 if (r) 4262 return r; 4263 } 4264 } else { 4265 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4266 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 4267 alu.src[0].sel = ctx->temp_reg; 4268 alu.src[0].chan = 2; 4269 alu.src[0].abs = 1; 4270 alu.dst.sel = ctx->temp_reg; 4271 alu.dst.chan = 2; 4272 alu.dst.write = 1; 4273 alu.last = 1; 4274 r = r600_bytecode_add_alu(ctx->bc, &alu); 4275 if (r) 4276 return r; 4277 } 4278 4279 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 4280 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 4281 * muladd has no writemask, have to use another temp 4282 */ 4283 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4284 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 4285 alu.is_op3 = 1; 4286 4287 alu.src[0].sel = ctx->temp_reg; 4288 alu.src[0].chan = 0; 4289 alu.src[1].sel = ctx->temp_reg; 4290 alu.src[1].chan = 2; 4291 4292 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 4293 alu.src[2].chan = 0; 4294 alu.src[2].value = *(uint32_t *)&one_point_five; 4295 4296 alu.dst.sel = ctx->temp_reg; 4297 alu.dst.chan = 0; 4298 alu.dst.write = 1; 4299 4300 r = r600_bytecode_add_alu(ctx->bc, &alu); 4301 if (r) 4302 return r; 4303 4304 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4305 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 4306 alu.is_op3 = 1; 4307 4308 alu.src[0].sel = ctx->temp_reg; 4309 alu.src[0].chan = 1; 4310 alu.src[1].sel = ctx->temp_reg; 4311 alu.src[1].chan = 2; 4312 4313 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 4314 alu.src[2].chan = 0; 4315 alu.src[2].value = *(uint32_t *)&one_point_five; 4316 4317 alu.dst.sel = ctx->temp_reg; 4318 alu.dst.chan = 1; 4319 alu.dst.write = 1; 4320 4321 alu.last = 1; 4322 r = r600_bytecode_add_alu(ctx->bc, &alu); 4323 if (r) 4324 return r; 4325 /* write initial compare value into Z component 4326 - W src 0 for shadow cube 4327 - X src 1 for shadow cube array */ 4328 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 4329 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 4330 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4331 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 4332 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) 4333 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 4334 else 4335 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 4336 alu.dst.sel = ctx->temp_reg; 4337 alu.dst.chan = 2; 4338 alu.dst.write = 1; 4339 alu.last = 1; 4340 r = r600_bytecode_add_alu(ctx->bc, &alu); 4341 if (r) 4342 return r; 4343 } 4344 4345 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 4346 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 4347 if (ctx->bc->chip_class >= EVERGREEN) { 4348 int mytmp = r600_get_temp(ctx); 4349 static const float eight = 8.0f; 4350 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4351 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 4352 alu.src[0].sel = ctx->temp_reg; 4353 alu.src[0].chan = 3; 4354 alu.dst.sel = mytmp; 4355 alu.dst.chan = 0; 4356 alu.dst.write = 1; 4357 alu.last = 1; 4358 r = r600_bytecode_add_alu(ctx->bc, &alu); 4359 if (r) 4360 return r; 4361 4362 /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ 4363 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4364 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 4365 alu.is_op3 = 1; 4366 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 4367 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4368 alu.src[1].chan = 0; 4369 alu.src[1].value = *(uint32_t *)&eight; 4370 alu.src[2].sel = mytmp; 4371 alu.src[2].chan = 0; 4372 alu.dst.sel = ctx->temp_reg; 4373 alu.dst.chan = 3; 4374 alu.dst.write = 1; 4375 alu.last = 1; 4376 r = r600_bytecode_add_alu(ctx->bc, &alu); 4377 if (r) 4378 return r; 4379 } else if (ctx->bc->chip_class < EVERGREEN) { 4380 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 4381 tex.inst = SQ_TEX_INST_SET_CUBEMAP_INDEX; 4382 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 4383 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 4384 tex.src_gpr = r600_get_temp(ctx); 4385 tex.src_sel_x = 0; 4386 tex.src_sel_y = 0; 4387 tex.src_sel_z = 0; 4388 tex.src_sel_w = 0; 4389 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 4390 tex.coord_type_x = 1; 4391 tex.coord_type_y = 1; 4392 tex.coord_type_z = 1; 4393 tex.coord_type_w = 1; 4394 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4395 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 4396 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 4397 alu.dst.sel = tex.src_gpr; 4398 alu.dst.chan = 0; 4399 alu.last = 1; 4400 alu.dst.write = 1; 4401 r = r600_bytecode_add_alu(ctx->bc, &alu); 4402 if (r) 4403 return r; 4404 4405 r = r600_bytecode_add_tex(ctx->bc, &tex); 4406 if (r) 4407 return r; 4408 } 4409 4410 } 4411 4412 /* for cube forms of lod and bias we need to route things */ 4413 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB || 4414 inst->Instruction.Opcode == TGSI_OPCODE_TXL || 4415 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 4416 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { 4417 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4418 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 4419 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 4420 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 4421 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 4422 else 4423 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 4424 alu.dst.sel = ctx->temp_reg; 4425 alu.dst.chan = 2; 4426 alu.last = 1; 4427 alu.dst.write = 1; 4428 r = r600_bytecode_add_alu(ctx->bc, &alu); 4429 if (r) 4430 return r; 4431 } 4432 4433 src_loaded = TRUE; 4434 src_gpr = ctx->temp_reg; 4435 } 4436 4437 if (src_requires_loading && !src_loaded) { 4438 for (i = 0; i < 4; i++) { 4439 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4440 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 4441 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4442 alu.dst.sel = ctx->temp_reg; 4443 alu.dst.chan = i; 4444 if (i == 3) 4445 alu.last = 1; 4446 alu.dst.write = 1; 4447 r = r600_bytecode_add_alu(ctx->bc, &alu); 4448 if (r) 4449 return r; 4450 } 4451 src_loaded = TRUE; 4452 src_gpr = ctx->temp_reg; 4453 } 4454 4455 /* Obtain the sample index for reading a compressed MSAA color texture. 4456 * To read the FMASK, we use the ldfptr instruction, which tells us 4457 * where the samples are stored. 4458 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, 4459 * which is the identity mapping. Each nibble says which physical sample 4460 * should be fetched to get that sample. 4461 * 4462 * Assume src.z contains the sample index. It should be modified like this: 4463 * src.z = (ldfptr() >> (src.z * 4)) & 0xF; 4464 * Then fetch the texel with src. 4465 */ 4466 if (read_compressed_msaa) { 4467 unsigned sample_chan = inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ? 3 : 4; 4468 unsigned temp = r600_get_temp(ctx); 4469 assert(src_loaded); 4470 4471 /* temp.w = ldfptr() */ 4472 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 4473 tex.inst = SQ_TEX_INST_LD; 4474 tex.inst_mod = 1; /* to indicate this is ldfptr */ 4475 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 4476 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 4477 tex.src_gpr = src_gpr; 4478 tex.dst_gpr = temp; 4479 tex.dst_sel_x = 7; /* mask out these components */ 4480 tex.dst_sel_y = 7; 4481 tex.dst_sel_z = 7; 4482 tex.dst_sel_w = 0; /* store X */ 4483 tex.src_sel_x = 0; 4484 tex.src_sel_y = 1; 4485 tex.src_sel_z = 2; 4486 tex.src_sel_w = 3; 4487 tex.offset_x = offset_x; 4488 tex.offset_y = offset_y; 4489 tex.offset_z = offset_z; 4490 r = r600_bytecode_add_tex(ctx->bc, &tex); 4491 if (r) 4492 return r; 4493 4494 /* temp.x = sample_index*4 */ 4495 if (ctx->bc->chip_class == CAYMAN) { 4496 for (i = 0 ; i < 4; i++) { 4497 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4498 alu.inst = ctx->inst_info->r600_opcode; 4499 alu.src[0].sel = src_gpr; 4500 alu.src[0].chan = sample_chan; 4501 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4502 alu.src[1].value = 4; 4503 alu.dst.sel = temp; 4504 alu.dst.chan = i; 4505 alu.dst.write = i == 0; 4506 if (i == 3) 4507 alu.last = 1; 4508 r = r600_bytecode_add_alu(ctx->bc, &alu); 4509 if (r) 4510 return r; 4511 } 4512 } else { 4513 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4514 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT); 4515 alu.src[0].sel = src_gpr; 4516 alu.src[0].chan = sample_chan; 4517 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4518 alu.src[1].value = 4; 4519 alu.dst.sel = temp; 4520 alu.dst.chan = 0; 4521 alu.dst.write = 1; 4522 alu.last = 1; 4523 r = r600_bytecode_add_alu(ctx->bc, &alu); 4524 if (r) 4525 return r; 4526 } 4527 4528 /* sample_index = temp.w >> temp.x */ 4529 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4530 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT); 4531 alu.src[0].sel = temp; 4532 alu.src[0].chan = 3; 4533 alu.src[1].sel = temp; 4534 alu.src[1].chan = 0; 4535 alu.dst.sel = src_gpr; 4536 alu.dst.chan = sample_chan; 4537 alu.dst.write = 1; 4538 alu.last = 1; 4539 r = r600_bytecode_add_alu(ctx->bc, &alu); 4540 if (r) 4541 return r; 4542 4543 /* sample_index & 0xF */ 4544 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4545 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT); 4546 alu.src[0].sel = src_gpr; 4547 alu.src[0].chan = sample_chan; 4548 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4549 alu.src[1].value = 0xF; 4550 alu.dst.sel = src_gpr; 4551 alu.dst.chan = sample_chan; 4552 alu.dst.write = 1; 4553 alu.last = 1; 4554 r = r600_bytecode_add_alu(ctx->bc, &alu); 4555 if (r) 4556 return r; 4557#if 0 4558 /* visualize the FMASK */ 4559 for (i = 0; i < 4; i++) { 4560 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4561 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 4562 alu.src[0].sel = src_gpr; 4563 alu.src[0].chan = sample_chan; 4564 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 4565 alu.dst.chan = i; 4566 alu.dst.write = 1; 4567 alu.last = 1; 4568 r = r600_bytecode_add_alu(ctx->bc, &alu); 4569 if (r) 4570 return r; 4571 } 4572 return 0; 4573#endif 4574 } 4575 4576 /* does this shader want a num layers from TXQ for a cube array? */ 4577 if (has_txq_cube_array_z) { 4578 int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 4579 4580 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4581 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 4582 4583 alu.src[0].sel = 512 + (id / 4); 4584 alu.src[0].kc_bank = R600_TXQ_CONST_BUFFER; 4585 alu.src[0].chan = id % 4; 4586 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 4587 alu.last = 1; 4588 r = r600_bytecode_add_alu(ctx->bc, &alu); 4589 if (r) 4590 return r; 4591 /* disable writemask from texture instruction */ 4592 inst->Dst[0].Register.WriteMask &= ~4; 4593 } 4594 4595 opcode = ctx->inst_info->r600_opcode; 4596 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 4597 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 4598 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 4599 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 4600 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 4601 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 4602 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 4603 switch (opcode) { 4604 case SQ_TEX_INST_SAMPLE: 4605 opcode = SQ_TEX_INST_SAMPLE_C; 4606 break; 4607 case SQ_TEX_INST_SAMPLE_L: 4608 opcode = SQ_TEX_INST_SAMPLE_C_L; 4609 break; 4610 case SQ_TEX_INST_SAMPLE_LB: 4611 opcode = SQ_TEX_INST_SAMPLE_C_LB; 4612 break; 4613 case SQ_TEX_INST_SAMPLE_G: 4614 opcode = SQ_TEX_INST_SAMPLE_C_G; 4615 break; 4616 } 4617 } 4618 4619 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 4620 tex.inst = opcode; 4621 4622 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 4623 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 4624 tex.src_gpr = src_gpr; 4625 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 4626 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 4627 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 4628 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 4629 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 4630 4631 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) { 4632 tex.src_sel_x = 4; 4633 tex.src_sel_y = 4; 4634 tex.src_sel_z = 4; 4635 tex.src_sel_w = 4; 4636 } else if (src_loaded) { 4637 tex.src_sel_x = 0; 4638 tex.src_sel_y = 1; 4639 tex.src_sel_z = 2; 4640 tex.src_sel_w = 3; 4641 } else { 4642 tex.src_sel_x = ctx->src[0].swizzle[0]; 4643 tex.src_sel_y = ctx->src[0].swizzle[1]; 4644 tex.src_sel_z = ctx->src[0].swizzle[2]; 4645 tex.src_sel_w = ctx->src[0].swizzle[3]; 4646 tex.src_rel = ctx->src[0].rel; 4647 } 4648 4649 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE || 4650 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 4651 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 4652 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 4653 tex.src_sel_x = 1; 4654 tex.src_sel_y = 0; 4655 tex.src_sel_z = 3; 4656 tex.src_sel_w = 2; /* route Z compare or Lod value into W */ 4657 } 4658 4659 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 4660 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 4661 tex.coord_type_x = 1; 4662 tex.coord_type_y = 1; 4663 } 4664 tex.coord_type_z = 1; 4665 tex.coord_type_w = 1; 4666 4667 tex.offset_x = offset_x; 4668 tex.offset_y = offset_y; 4669 tex.offset_z = offset_z; 4670 4671 /* Put the depth for comparison in W. 4672 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 4673 * Some instructions expect the depth in Z. */ 4674 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 4675 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 4676 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 4677 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 4678 opcode != SQ_TEX_INST_SAMPLE_C_L && 4679 opcode != SQ_TEX_INST_SAMPLE_C_LB) { 4680 tex.src_sel_w = tex.src_sel_z; 4681 } 4682 4683 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 4684 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 4685 if (opcode == SQ_TEX_INST_SAMPLE_C_L || 4686 opcode == SQ_TEX_INST_SAMPLE_C_LB) { 4687 /* the array index is read from Y */ 4688 tex.coord_type_y = 0; 4689 } else { 4690 /* the array index is read from Z */ 4691 tex.coord_type_z = 0; 4692 tex.src_sel_z = tex.src_sel_y; 4693 } 4694 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 4695 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 4696 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 4697 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 4698 (ctx->bc->chip_class >= EVERGREEN))) 4699 /* the array index is read from Z */ 4700 tex.coord_type_z = 0; 4701 4702 r = r600_bytecode_add_tex(ctx->bc, &tex); 4703 if (r) 4704 return r; 4705 4706 /* add shadow ambient support - gallium doesn't do it yet */ 4707 return 0; 4708} 4709 4710static int tgsi_lrp(struct r600_shader_ctx *ctx) 4711{ 4712 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4713 struct r600_bytecode_alu alu; 4714 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4715 unsigned i; 4716 int r; 4717 4718 /* optimize if it's just an equal balance */ 4719 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 4720 for (i = 0; i < lasti + 1; i++) { 4721 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4722 continue; 4723 4724 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4725 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 4726 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4727 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 4728 alu.omod = 3; 4729 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4730 alu.dst.chan = i; 4731 if (i == lasti) { 4732 alu.last = 1; 4733 } 4734 r = r600_bytecode_add_alu(ctx->bc, &alu); 4735 if (r) 4736 return r; 4737 } 4738 return 0; 4739 } 4740 4741 /* 1 - src0 */ 4742 for (i = 0; i < lasti + 1; i++) { 4743 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4744 continue; 4745 4746 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4747 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 4748 alu.src[0].sel = V_SQ_ALU_SRC_1; 4749 alu.src[0].chan = 0; 4750 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4751 r600_bytecode_src_toggle_neg(&alu.src[1]); 4752 alu.dst.sel = ctx->temp_reg; 4753 alu.dst.chan = i; 4754 if (i == lasti) { 4755 alu.last = 1; 4756 } 4757 alu.dst.write = 1; 4758 r = r600_bytecode_add_alu(ctx->bc, &alu); 4759 if (r) 4760 return r; 4761 } 4762 4763 /* (1 - src0) * src2 */ 4764 for (i = 0; i < lasti + 1; i++) { 4765 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4766 continue; 4767 4768 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4769 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 4770 alu.src[0].sel = ctx->temp_reg; 4771 alu.src[0].chan = i; 4772 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 4773 alu.dst.sel = ctx->temp_reg; 4774 alu.dst.chan = i; 4775 if (i == lasti) { 4776 alu.last = 1; 4777 } 4778 alu.dst.write = 1; 4779 r = r600_bytecode_add_alu(ctx->bc, &alu); 4780 if (r) 4781 return r; 4782 } 4783 4784 /* src0 * src1 + (1 - src0) * src2 */ 4785 for (i = 0; i < lasti + 1; i++) { 4786 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4787 continue; 4788 4789 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4790 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 4791 alu.is_op3 = 1; 4792 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4793 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4794 alu.src[2].sel = ctx->temp_reg; 4795 alu.src[2].chan = i; 4796 4797 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4798 alu.dst.chan = i; 4799 if (i == lasti) { 4800 alu.last = 1; 4801 } 4802 r = r600_bytecode_add_alu(ctx->bc, &alu); 4803 if (r) 4804 return r; 4805 } 4806 return 0; 4807} 4808 4809static int tgsi_cmp(struct r600_shader_ctx *ctx) 4810{ 4811 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4812 struct r600_bytecode_alu alu; 4813 int i, r; 4814 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4815 4816 for (i = 0; i < lasti + 1; i++) { 4817 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4818 continue; 4819 4820 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4821 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 4822 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4823 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 4824 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 4825 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4826 alu.dst.chan = i; 4827 alu.dst.write = 1; 4828 alu.is_op3 = 1; 4829 if (i == lasti) 4830 alu.last = 1; 4831 r = r600_bytecode_add_alu(ctx->bc, &alu); 4832 if (r) 4833 return r; 4834 } 4835 return 0; 4836} 4837 4838static int tgsi_ucmp(struct r600_shader_ctx *ctx) 4839{ 4840 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4841 struct r600_bytecode_alu alu; 4842 int i, r; 4843 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4844 4845 for (i = 0; i < lasti + 1; i++) { 4846 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4847 continue; 4848 4849 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4850 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 4851 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4852 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 4853 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 4854 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4855 alu.dst.chan = i; 4856 alu.dst.write = 1; 4857 alu.is_op3 = 1; 4858 if (i == lasti) 4859 alu.last = 1; 4860 r = r600_bytecode_add_alu(ctx->bc, &alu); 4861 if (r) 4862 return r; 4863 } 4864 return 0; 4865} 4866 4867static int tgsi_xpd(struct r600_shader_ctx *ctx) 4868{ 4869 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4870 static const unsigned int src0_swizzle[] = {2, 0, 1}; 4871 static const unsigned int src1_swizzle[] = {1, 2, 0}; 4872 struct r600_bytecode_alu alu; 4873 uint32_t use_temp = 0; 4874 int i, r; 4875 4876 if (inst->Dst[0].Register.WriteMask != 0xf) 4877 use_temp = 1; 4878 4879 for (i = 0; i < 4; i++) { 4880 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4881 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 4882 if (i < 3) { 4883 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 4884 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 4885 } else { 4886 alu.src[0].sel = V_SQ_ALU_SRC_0; 4887 alu.src[0].chan = i; 4888 alu.src[1].sel = V_SQ_ALU_SRC_0; 4889 alu.src[1].chan = i; 4890 } 4891 4892 alu.dst.sel = ctx->temp_reg; 4893 alu.dst.chan = i; 4894 alu.dst.write = 1; 4895 4896 if (i == 3) 4897 alu.last = 1; 4898 r = r600_bytecode_add_alu(ctx->bc, &alu); 4899 if (r) 4900 return r; 4901 } 4902 4903 for (i = 0; i < 4; i++) { 4904 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4905 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 4906 4907 if (i < 3) { 4908 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 4909 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 4910 } else { 4911 alu.src[0].sel = V_SQ_ALU_SRC_0; 4912 alu.src[0].chan = i; 4913 alu.src[1].sel = V_SQ_ALU_SRC_0; 4914 alu.src[1].chan = i; 4915 } 4916 4917 alu.src[2].sel = ctx->temp_reg; 4918 alu.src[2].neg = 1; 4919 alu.src[2].chan = i; 4920 4921 if (use_temp) 4922 alu.dst.sel = ctx->temp_reg; 4923 else 4924 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4925 alu.dst.chan = i; 4926 alu.dst.write = 1; 4927 alu.is_op3 = 1; 4928 if (i == 3) 4929 alu.last = 1; 4930 r = r600_bytecode_add_alu(ctx->bc, &alu); 4931 if (r) 4932 return r; 4933 } 4934 if (use_temp) 4935 return tgsi_helper_copy(ctx, inst); 4936 return 0; 4937} 4938 4939static int tgsi_exp(struct r600_shader_ctx *ctx) 4940{ 4941 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4942 struct r600_bytecode_alu alu; 4943 int r; 4944 int i; 4945 4946 /* result.x = 2^floor(src); */ 4947 if (inst->Dst[0].Register.WriteMask & 1) { 4948 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4949 4950 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 4951 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4952 4953 alu.dst.sel = ctx->temp_reg; 4954 alu.dst.chan = 0; 4955 alu.dst.write = 1; 4956 alu.last = 1; 4957 r = r600_bytecode_add_alu(ctx->bc, &alu); 4958 if (r) 4959 return r; 4960 4961 if (ctx->bc->chip_class == CAYMAN) { 4962 for (i = 0; i < 3; i++) { 4963 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 4964 alu.src[0].sel = ctx->temp_reg; 4965 alu.src[0].chan = 0; 4966 4967 alu.dst.sel = ctx->temp_reg; 4968 alu.dst.chan = i; 4969 alu.dst.write = i == 0; 4970 alu.last = i == 2; 4971 r = r600_bytecode_add_alu(ctx->bc, &alu); 4972 if (r) 4973 return r; 4974 } 4975 } else { 4976 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 4977 alu.src[0].sel = ctx->temp_reg; 4978 alu.src[0].chan = 0; 4979 4980 alu.dst.sel = ctx->temp_reg; 4981 alu.dst.chan = 0; 4982 alu.dst.write = 1; 4983 alu.last = 1; 4984 r = r600_bytecode_add_alu(ctx->bc, &alu); 4985 if (r) 4986 return r; 4987 } 4988 } 4989 4990 /* result.y = tmp - floor(tmp); */ 4991 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 4992 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4993 4994 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 4995 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4996 4997 alu.dst.sel = ctx->temp_reg; 4998#if 0 4999 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5000 if (r) 5001 return r; 5002#endif 5003 alu.dst.write = 1; 5004 alu.dst.chan = 1; 5005 5006 alu.last = 1; 5007 5008 r = r600_bytecode_add_alu(ctx->bc, &alu); 5009 if (r) 5010 return r; 5011 } 5012 5013 /* result.z = RoughApprox2ToX(tmp);*/ 5014 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 5015 if (ctx->bc->chip_class == CAYMAN) { 5016 for (i = 0; i < 3; i++) { 5017 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5018 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 5019 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5020 5021 alu.dst.sel = ctx->temp_reg; 5022 alu.dst.chan = i; 5023 if (i == 2) { 5024 alu.dst.write = 1; 5025 alu.last = 1; 5026 } 5027 5028 r = r600_bytecode_add_alu(ctx->bc, &alu); 5029 if (r) 5030 return r; 5031 } 5032 } else { 5033 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5034 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 5035 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5036 5037 alu.dst.sel = ctx->temp_reg; 5038 alu.dst.write = 1; 5039 alu.dst.chan = 2; 5040 5041 alu.last = 1; 5042 5043 r = r600_bytecode_add_alu(ctx->bc, &alu); 5044 if (r) 5045 return r; 5046 } 5047 } 5048 5049 /* result.w = 1.0;*/ 5050 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 5051 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5052 5053 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 5054 alu.src[0].sel = V_SQ_ALU_SRC_1; 5055 alu.src[0].chan = 0; 5056 5057 alu.dst.sel = ctx->temp_reg; 5058 alu.dst.chan = 3; 5059 alu.dst.write = 1; 5060 alu.last = 1; 5061 r = r600_bytecode_add_alu(ctx->bc, &alu); 5062 if (r) 5063 return r; 5064 } 5065 return tgsi_helper_copy(ctx, inst); 5066} 5067 5068static int tgsi_log(struct r600_shader_ctx *ctx) 5069{ 5070 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5071 struct r600_bytecode_alu alu; 5072 int r; 5073 int i; 5074 5075 /* result.x = floor(log2(|src|)); */ 5076 if (inst->Dst[0].Register.WriteMask & 1) { 5077 if (ctx->bc->chip_class == CAYMAN) { 5078 for (i = 0; i < 3; i++) { 5079 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5080 5081 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 5082 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5083 r600_bytecode_src_set_abs(&alu.src[0]); 5084 5085 alu.dst.sel = ctx->temp_reg; 5086 alu.dst.chan = i; 5087 if (i == 0) 5088 alu.dst.write = 1; 5089 if (i == 2) 5090 alu.last = 1; 5091 r = r600_bytecode_add_alu(ctx->bc, &alu); 5092 if (r) 5093 return r; 5094 } 5095 5096 } else { 5097 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5098 5099 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 5100 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5101 r600_bytecode_src_set_abs(&alu.src[0]); 5102 5103 alu.dst.sel = ctx->temp_reg; 5104 alu.dst.chan = 0; 5105 alu.dst.write = 1; 5106 alu.last = 1; 5107 r = r600_bytecode_add_alu(ctx->bc, &alu); 5108 if (r) 5109 return r; 5110 } 5111 5112 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 5113 alu.src[0].sel = ctx->temp_reg; 5114 alu.src[0].chan = 0; 5115 5116 alu.dst.sel = ctx->temp_reg; 5117 alu.dst.chan = 0; 5118 alu.dst.write = 1; 5119 alu.last = 1; 5120 5121 r = r600_bytecode_add_alu(ctx->bc, &alu); 5122 if (r) 5123 return r; 5124 } 5125 5126 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 5127 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 5128 5129 if (ctx->bc->chip_class == CAYMAN) { 5130 for (i = 0; i < 3; i++) { 5131 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5132 5133 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 5134 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5135 r600_bytecode_src_set_abs(&alu.src[0]); 5136 5137 alu.dst.sel = ctx->temp_reg; 5138 alu.dst.chan = i; 5139 if (i == 1) 5140 alu.dst.write = 1; 5141 if (i == 2) 5142 alu.last = 1; 5143 5144 r = r600_bytecode_add_alu(ctx->bc, &alu); 5145 if (r) 5146 return r; 5147 } 5148 } else { 5149 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5150 5151 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 5152 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5153 r600_bytecode_src_set_abs(&alu.src[0]); 5154 5155 alu.dst.sel = ctx->temp_reg; 5156 alu.dst.chan = 1; 5157 alu.dst.write = 1; 5158 alu.last = 1; 5159 5160 r = r600_bytecode_add_alu(ctx->bc, &alu); 5161 if (r) 5162 return r; 5163 } 5164 5165 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5166 5167 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 5168 alu.src[0].sel = ctx->temp_reg; 5169 alu.src[0].chan = 1; 5170 5171 alu.dst.sel = ctx->temp_reg; 5172 alu.dst.chan = 1; 5173 alu.dst.write = 1; 5174 alu.last = 1; 5175 5176 r = r600_bytecode_add_alu(ctx->bc, &alu); 5177 if (r) 5178 return r; 5179 5180 if (ctx->bc->chip_class == CAYMAN) { 5181 for (i = 0; i < 3; i++) { 5182 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5183 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 5184 alu.src[0].sel = ctx->temp_reg; 5185 alu.src[0].chan = 1; 5186 5187 alu.dst.sel = ctx->temp_reg; 5188 alu.dst.chan = i; 5189 if (i == 1) 5190 alu.dst.write = 1; 5191 if (i == 2) 5192 alu.last = 1; 5193 5194 r = r600_bytecode_add_alu(ctx->bc, &alu); 5195 if (r) 5196 return r; 5197 } 5198 } else { 5199 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5200 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 5201 alu.src[0].sel = ctx->temp_reg; 5202 alu.src[0].chan = 1; 5203 5204 alu.dst.sel = ctx->temp_reg; 5205 alu.dst.chan = 1; 5206 alu.dst.write = 1; 5207 alu.last = 1; 5208 5209 r = r600_bytecode_add_alu(ctx->bc, &alu); 5210 if (r) 5211 return r; 5212 } 5213 5214 if (ctx->bc->chip_class == CAYMAN) { 5215 for (i = 0; i < 3; i++) { 5216 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5217 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 5218 alu.src[0].sel = ctx->temp_reg; 5219 alu.src[0].chan = 1; 5220 5221 alu.dst.sel = ctx->temp_reg; 5222 alu.dst.chan = i; 5223 if (i == 1) 5224 alu.dst.write = 1; 5225 if (i == 2) 5226 alu.last = 1; 5227 5228 r = r600_bytecode_add_alu(ctx->bc, &alu); 5229 if (r) 5230 return r; 5231 } 5232 } else { 5233 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5234 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 5235 alu.src[0].sel = ctx->temp_reg; 5236 alu.src[0].chan = 1; 5237 5238 alu.dst.sel = ctx->temp_reg; 5239 alu.dst.chan = 1; 5240 alu.dst.write = 1; 5241 alu.last = 1; 5242 5243 r = r600_bytecode_add_alu(ctx->bc, &alu); 5244 if (r) 5245 return r; 5246 } 5247 5248 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5249 5250 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 5251 5252 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5253 r600_bytecode_src_set_abs(&alu.src[0]); 5254 5255 alu.src[1].sel = ctx->temp_reg; 5256 alu.src[1].chan = 1; 5257 5258 alu.dst.sel = ctx->temp_reg; 5259 alu.dst.chan = 1; 5260 alu.dst.write = 1; 5261 alu.last = 1; 5262 5263 r = r600_bytecode_add_alu(ctx->bc, &alu); 5264 if (r) 5265 return r; 5266 } 5267 5268 /* result.z = log2(|src|);*/ 5269 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 5270 if (ctx->bc->chip_class == CAYMAN) { 5271 for (i = 0; i < 3; i++) { 5272 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5273 5274 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 5275 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5276 r600_bytecode_src_set_abs(&alu.src[0]); 5277 5278 alu.dst.sel = ctx->temp_reg; 5279 if (i == 2) 5280 alu.dst.write = 1; 5281 alu.dst.chan = i; 5282 if (i == 2) 5283 alu.last = 1; 5284 5285 r = r600_bytecode_add_alu(ctx->bc, &alu); 5286 if (r) 5287 return r; 5288 } 5289 } else { 5290 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5291 5292 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 5293 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5294 r600_bytecode_src_set_abs(&alu.src[0]); 5295 5296 alu.dst.sel = ctx->temp_reg; 5297 alu.dst.write = 1; 5298 alu.dst.chan = 2; 5299 alu.last = 1; 5300 5301 r = r600_bytecode_add_alu(ctx->bc, &alu); 5302 if (r) 5303 return r; 5304 } 5305 } 5306 5307 /* result.w = 1.0; */ 5308 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 5309 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5310 5311 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 5312 alu.src[0].sel = V_SQ_ALU_SRC_1; 5313 alu.src[0].chan = 0; 5314 5315 alu.dst.sel = ctx->temp_reg; 5316 alu.dst.chan = 3; 5317 alu.dst.write = 1; 5318 alu.last = 1; 5319 5320 r = r600_bytecode_add_alu(ctx->bc, &alu); 5321 if (r) 5322 return r; 5323 } 5324 5325 return tgsi_helper_copy(ctx, inst); 5326} 5327 5328static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 5329{ 5330 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5331 struct r600_bytecode_alu alu; 5332 int r; 5333 5334 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5335 5336 switch (inst->Instruction.Opcode) { 5337 case TGSI_OPCODE_ARL: 5338 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 5339 break; 5340 case TGSI_OPCODE_ARR: 5341 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 5342 break; 5343 case TGSI_OPCODE_UARL: 5344 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 5345 break; 5346 default: 5347 assert(0); 5348 return -1; 5349 } 5350 5351 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5352 alu.last = 1; 5353 alu.dst.sel = ctx->bc->ar_reg; 5354 alu.dst.write = 1; 5355 r = r600_bytecode_add_alu(ctx->bc, &alu); 5356 if (r) 5357 return r; 5358 5359 ctx->bc->ar_loaded = 0; 5360 return 0; 5361} 5362static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 5363{ 5364 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5365 struct r600_bytecode_alu alu; 5366 int r; 5367 5368 switch (inst->Instruction.Opcode) { 5369 case TGSI_OPCODE_ARL: 5370 memset(&alu, 0, sizeof(alu)); 5371 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 5372 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5373 alu.dst.sel = ctx->bc->ar_reg; 5374 alu.dst.write = 1; 5375 alu.last = 1; 5376 5377 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5378 return r; 5379 5380 memset(&alu, 0, sizeof(alu)); 5381 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 5382 alu.src[0].sel = ctx->bc->ar_reg; 5383 alu.dst.sel = ctx->bc->ar_reg; 5384 alu.dst.write = 1; 5385 alu.last = 1; 5386 5387 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5388 return r; 5389 break; 5390 case TGSI_OPCODE_ARR: 5391 memset(&alu, 0, sizeof(alu)); 5392 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 5393 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5394 alu.dst.sel = ctx->bc->ar_reg; 5395 alu.dst.write = 1; 5396 alu.last = 1; 5397 5398 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5399 return r; 5400 break; 5401 case TGSI_OPCODE_UARL: 5402 memset(&alu, 0, sizeof(alu)); 5403 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 5404 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5405 alu.dst.sel = ctx->bc->ar_reg; 5406 alu.dst.write = 1; 5407 alu.last = 1; 5408 5409 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5410 return r; 5411 break; 5412 default: 5413 assert(0); 5414 return -1; 5415 } 5416 5417 ctx->bc->ar_loaded = 0; 5418 return 0; 5419} 5420 5421static int tgsi_opdst(struct r600_shader_ctx *ctx) 5422{ 5423 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5424 struct r600_bytecode_alu alu; 5425 int i, r = 0; 5426 5427 for (i = 0; i < 4; i++) { 5428 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5429 5430 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 5431 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5432 5433 if (i == 0 || i == 3) { 5434 alu.src[0].sel = V_SQ_ALU_SRC_1; 5435 } else { 5436 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5437 } 5438 5439 if (i == 0 || i == 2) { 5440 alu.src[1].sel = V_SQ_ALU_SRC_1; 5441 } else { 5442 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5443 } 5444 if (i == 3) 5445 alu.last = 1; 5446 r = r600_bytecode_add_alu(ctx->bc, &alu); 5447 if (r) 5448 return r; 5449 } 5450 return 0; 5451} 5452 5453static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 5454{ 5455 struct r600_bytecode_alu alu; 5456 int r; 5457 5458 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5459 alu.inst = opcode; 5460 alu.execute_mask = 1; 5461 alu.update_pred = 1; 5462 5463 alu.dst.sel = ctx->temp_reg; 5464 alu.dst.write = 1; 5465 alu.dst.chan = 0; 5466 5467 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5468 alu.src[1].sel = V_SQ_ALU_SRC_0; 5469 alu.src[1].chan = 0; 5470 5471 alu.last = 1; 5472 5473 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 5474 if (r) 5475 return r; 5476 return 0; 5477} 5478 5479static int pops(struct r600_shader_ctx *ctx, int pops) 5480{ 5481 unsigned force_pop = ctx->bc->force_add_cf; 5482 5483 if (!force_pop) { 5484 int alu_pop = 3; 5485 if (ctx->bc->cf_last) { 5486 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)) 5487 alu_pop = 0; 5488 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER)) 5489 alu_pop = 1; 5490 } 5491 alu_pop += pops; 5492 if (alu_pop == 1) { 5493 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER); 5494 ctx->bc->force_add_cf = 1; 5495 } else if (alu_pop == 2) { 5496 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER); 5497 ctx->bc->force_add_cf = 1; 5498 } else { 5499 force_pop = 1; 5500 } 5501 } 5502 5503 if (force_pop) { 5504 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 5505 ctx->bc->cf_last->pop_count = pops; 5506 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 5507 } 5508 5509 return 0; 5510} 5511 5512static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 5513{ 5514 switch(reason) { 5515 case FC_PUSH_VPM: 5516 ctx->bc->callstack[ctx->bc->call_sp].current--; 5517 break; 5518 case FC_PUSH_WQM: 5519 case FC_LOOP: 5520 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 5521 break; 5522 case FC_REP: 5523 /* TOODO : for 16 vp asic should -= 2; */ 5524 ctx->bc->callstack[ctx->bc->call_sp].current --; 5525 break; 5526 } 5527} 5528 5529static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 5530{ 5531 if (check_max_only) { 5532 int diff; 5533 switch (reason) { 5534 case FC_PUSH_VPM: 5535 diff = 1; 5536 break; 5537 case FC_PUSH_WQM: 5538 diff = 4; 5539 break; 5540 default: 5541 assert(0); 5542 diff = 0; 5543 } 5544 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 5545 ctx->bc->callstack[ctx->bc->call_sp].max) { 5546 ctx->bc->callstack[ctx->bc->call_sp].max = 5547 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 5548 } 5549 return; 5550 } 5551 switch (reason) { 5552 case FC_PUSH_VPM: 5553 ctx->bc->callstack[ctx->bc->call_sp].current++; 5554 break; 5555 case FC_PUSH_WQM: 5556 case FC_LOOP: 5557 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 5558 break; 5559 case FC_REP: 5560 ctx->bc->callstack[ctx->bc->call_sp].current++; 5561 break; 5562 } 5563 5564 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 5565 ctx->bc->callstack[ctx->bc->call_sp].max) { 5566 ctx->bc->callstack[ctx->bc->call_sp].max = 5567 ctx->bc->callstack[ctx->bc->call_sp].current; 5568 } 5569} 5570 5571static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 5572{ 5573 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 5574 5575 sp->mid = realloc((void *)sp->mid, 5576 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 5577 sp->mid[sp->num_mid] = ctx->bc->cf_last; 5578 sp->num_mid++; 5579} 5580 5581static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 5582{ 5583 ctx->bc->fc_sp++; 5584 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 5585 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 5586} 5587 5588static void fc_poplevel(struct r600_shader_ctx *ctx) 5589{ 5590 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 5591 free(sp->mid); 5592 sp->mid = NULL; 5593 sp->num_mid = 0; 5594 sp->start = NULL; 5595 sp->type = 0; 5596 ctx->bc->fc_sp--; 5597} 5598 5599#if 0 5600static int emit_return(struct r600_shader_ctx *ctx) 5601{ 5602 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); 5603 return 0; 5604} 5605 5606static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 5607{ 5608 5609 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 5610 ctx->bc->cf_last->pop_count = pops; 5611 /* XXX work out offset */ 5612 return 0; 5613} 5614 5615static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 5616{ 5617 return 0; 5618} 5619 5620static void emit_testflag(struct r600_shader_ctx *ctx) 5621{ 5622 5623} 5624 5625static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 5626{ 5627 emit_testflag(ctx); 5628 emit_jump_to_offset(ctx, 1, 4); 5629 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 5630 pops(ctx, ifidx + 1); 5631 emit_return(ctx); 5632} 5633 5634static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 5635{ 5636 emit_testflag(ctx); 5637 5638 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 5639 ctx->bc->cf_last->pop_count = 1; 5640 5641 fc_set_mid(ctx, fc_sp); 5642 5643 pops(ctx, 1); 5644} 5645#endif 5646 5647static int tgsi_if(struct r600_shader_ctx *ctx) 5648{ 5649 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)); 5650 5651 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 5652 5653 fc_pushlevel(ctx, FC_IF); 5654 5655 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 5656 return 0; 5657} 5658 5659static int tgsi_else(struct r600_shader_ctx *ctx) 5660{ 5661 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 5662 ctx->bc->cf_last->pop_count = 1; 5663 5664 fc_set_mid(ctx, ctx->bc->fc_sp); 5665 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 5666 return 0; 5667} 5668 5669static int tgsi_endif(struct r600_shader_ctx *ctx) 5670{ 5671 pops(ctx, 1); 5672 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 5673 R600_ERR("if/endif unbalanced in shader\n"); 5674 return -1; 5675 } 5676 5677 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 5678 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 5679 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 5680 } else { 5681 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 5682 } 5683 fc_poplevel(ctx); 5684 5685 callstack_decrease_current(ctx, FC_PUSH_VPM); 5686 return 0; 5687} 5688 5689static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 5690{ 5691 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not 5692 * limited to 4096 iterations, like the other LOOP_* instructions. */ 5693 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10)); 5694 5695 fc_pushlevel(ctx, FC_LOOP); 5696 5697 /* check stack depth */ 5698 callstack_check_depth(ctx, FC_LOOP, 0); 5699 return 0; 5700} 5701 5702static int tgsi_endloop(struct r600_shader_ctx *ctx) 5703{ 5704 int i; 5705 5706 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 5707 5708 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 5709 R600_ERR("loop/endloop in shader code are not paired.\n"); 5710 return -EINVAL; 5711 } 5712 5713 /* fixup loop pointers - from r600isa 5714 LOOP END points to CF after LOOP START, 5715 LOOP START point to CF after LOOP END 5716 BRK/CONT point to LOOP END CF 5717 */ 5718 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 5719 5720 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 5721 5722 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 5723 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 5724 } 5725 /* XXX add LOOPRET support */ 5726 fc_poplevel(ctx); 5727 callstack_decrease_current(ctx, FC_LOOP); 5728 return 0; 5729} 5730 5731static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 5732{ 5733 unsigned int fscp; 5734 5735 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 5736 { 5737 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 5738 break; 5739 } 5740 5741 if (fscp == 0) { 5742 R600_ERR("Break not inside loop/endloop pair\n"); 5743 return -EINVAL; 5744 } 5745 5746 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 5747 5748 fc_set_mid(ctx, fscp); 5749 5750 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 5751 return 0; 5752} 5753 5754static int tgsi_umad(struct r600_shader_ctx *ctx) 5755{ 5756 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5757 struct r600_bytecode_alu alu; 5758 int i, j, r; 5759 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5760 5761 /* src0 * src1 */ 5762 for (i = 0; i < lasti + 1; i++) { 5763 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5764 continue; 5765 5766 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5767 5768 alu.dst.chan = i; 5769 alu.dst.sel = ctx->temp_reg; 5770 alu.dst.write = 1; 5771 5772 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 5773 for (j = 0; j < 2; j++) { 5774 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 5775 } 5776 5777 alu.last = 1; 5778 r = r600_bytecode_add_alu(ctx->bc, &alu); 5779 if (r) 5780 return r; 5781 } 5782 5783 5784 for (i = 0; i < lasti + 1; i++) { 5785 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5786 continue; 5787 5788 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5789 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5790 5791 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 5792 5793 alu.src[0].sel = ctx->temp_reg; 5794 alu.src[0].chan = i; 5795 5796 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 5797 if (i == lasti) { 5798 alu.last = 1; 5799 } 5800 r = r600_bytecode_add_alu(ctx->bc, &alu); 5801 if (r) 5802 return r; 5803 } 5804 return 0; 5805} 5806 5807static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 5808 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 5809 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 5810 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 5811 5812 /* XXX: 5813 * For state trackers other than OpenGL, we'll want to use 5814 * _RECIP_IEEE instead. 5815 */ 5816 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 5817 5818 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 5819 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 5820 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 5821 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 5822 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 5823 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5824 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5825 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 5826 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 5827 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 5828 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 5829 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 5830 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 5831 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 5832 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 5833 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5834 /* gap */ 5835 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5836 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5837 /* gap */ 5838 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5839 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5840 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 5841 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5842 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 5843 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 5844 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 5845 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 5846 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 5847 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 5848 /* gap */ 5849 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5850 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 5851 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5852 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5853 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 5854 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 5855 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 5856 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 5857 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5858 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5859 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5860 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5861 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5862 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 5863 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5864 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 5865 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 5866 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 5867 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 5868 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5869 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 5870 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 5871 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 5872 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5873 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5874 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5875 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5876 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5877 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5878 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 5879 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5880 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5881 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5882 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 5883 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 5884 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 5885 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 5886 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5887 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5888 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 5889 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 5890 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 5891 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 5892 /* gap */ 5893 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5894 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5895 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 5896 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 5897 /* gap */ 5898 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5899 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5900 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5901 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5902 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, 5903 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, 5904 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 5905 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 5906 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans}, 5907 /* gap */ 5908 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5909 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 5910 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 5911 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod}, 5912 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 5913 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5914 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 5915 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 5916 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 5917 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5918 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5919 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 5920 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5921 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 5922 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5923 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 5924 /* gap */ 5925 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5926 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5927 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5928 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5929 /* gap */ 5930 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5931 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5932 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5933 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5934 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5935 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5936 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5937 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5938 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 5939 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 5940 /* gap */ 5941 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5942 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans}, 5943 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 5944 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 5945 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 5946 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, 5947 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 5948 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans}, 5949 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 5950 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2_trans}, 5951 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans}, 5952 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 5953 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv}, 5954 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 5955 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 5956 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 5957 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod}, 5958 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans}, 5959 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 5960 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 5961 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans}, 5962 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap}, 5963 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap}, 5964 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5965 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5966 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5967 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5968 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 5969 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, 5970 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, 5971 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 5972 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 5973 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 5974 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 5975 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 5976 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 5977 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, 5978 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 5979 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 5980 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl}, 5981 {TGSI_OPCODE_UCMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ucmp}, 5982 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 5983 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 5984 {TGSI_OPCODE_LOAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5985 {TGSI_OPCODE_STORE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5986 {TGSI_OPCODE_MFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5987 {TGSI_OPCODE_LFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5988 {TGSI_OPCODE_SFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5989 {TGSI_OPCODE_BARRIER, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5990 {TGSI_OPCODE_ATOMUADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5991 {TGSI_OPCODE_ATOMXCHG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5992 {TGSI_OPCODE_ATOMCAS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5993 {TGSI_OPCODE_ATOMAND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5994 {TGSI_OPCODE_ATOMOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5995 {TGSI_OPCODE_ATOMXOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5996 {TGSI_OPCODE_ATOMUMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5997 {TGSI_OPCODE_ATOMUMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5998 {TGSI_OPCODE_ATOMIMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5999 {TGSI_OPCODE_ATOMIMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6000 {TGSI_OPCODE_TEX2, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 6001 {TGSI_OPCODE_TXB2, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 6002 {TGSI_OPCODE_TXL2, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 6003 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6004}; 6005 6006static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 6007 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 6008 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 6009 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 6010 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 6011 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, 6012 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 6013 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 6014 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 6015 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 6016 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 6017 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 6018 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 6019 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 6020 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 6021 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 6022 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 6023 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 6024 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 6025 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 6026 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6027 /* gap */ 6028 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6029 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6030 /* gap */ 6031 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6032 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6033 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 6034 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6035 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 6036 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 6037 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 6038 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 6039 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 6040 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 6041 /* gap */ 6042 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6043 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 6044 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6045 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 6046 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 6047 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 6048 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 6049 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 6050 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6051 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6052 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6053 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6054 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6055 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 6056 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6057 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 6058 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 6059 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 6060 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 6061 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6062 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 6063 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 6064 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 6065 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6066 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6067 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6068 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6069 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6070 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6071 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 6072 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6073 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6074 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6075 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 6076 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 6077 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 6078 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 6079 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6080 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6081 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 6082 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 6083 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 6084 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 6085 /* gap */ 6086 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6087 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6088 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 6089 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 6090 /* gap */ 6091 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6092 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6093 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6094 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6095 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, 6096 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, 6097 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 6098 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 6099 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2}, 6100 /* gap */ 6101 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6102 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 6103 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 6104 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod}, 6105 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 6106 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6107 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 6108 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 6109 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 6110 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6111 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6112 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 6113 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6114 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 6115 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6116 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 6117 /* gap */ 6118 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6119 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6120 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6121 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6122 /* gap */ 6123 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6124 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6125 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6126 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6127 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6128 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6129 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6130 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6131 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 6132 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 6133 /* gap */ 6134 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6135 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i}, 6136 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 6137 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 6138 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 6139 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, 6140 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 6141 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2}, 6142 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 6143 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i}, 6144 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans}, 6145 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 6146 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv}, 6147 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 6148 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 6149 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 6150 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod}, 6151 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans}, 6152 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 6153 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 6154 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2}, 6155 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap}, 6156 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2}, 6157 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6158 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6159 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6160 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6161 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 6162 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, 6163 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, 6164 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 6165 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 6166 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 6167 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 6168 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 6169 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 6170 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, 6171 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 6172 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 6173 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, 6174 {TGSI_OPCODE_UCMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ucmp}, 6175 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 6176 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 6177 {TGSI_OPCODE_LOAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6178 {TGSI_OPCODE_STORE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6179 {TGSI_OPCODE_MFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6180 {TGSI_OPCODE_LFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6181 {TGSI_OPCODE_SFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6182 {TGSI_OPCODE_BARRIER, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6183 {TGSI_OPCODE_ATOMUADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6184 {TGSI_OPCODE_ATOMXCHG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6185 {TGSI_OPCODE_ATOMCAS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6186 {TGSI_OPCODE_ATOMAND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6187 {TGSI_OPCODE_ATOMOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6188 {TGSI_OPCODE_ATOMXOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6189 {TGSI_OPCODE_ATOMUMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6190 {TGSI_OPCODE_ATOMUMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6191 {TGSI_OPCODE_ATOMIMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6192 {TGSI_OPCODE_ATOMIMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6193 {TGSI_OPCODE_TEX2, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 6194 {TGSI_OPCODE_TXB2, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 6195 {TGSI_OPCODE_TXL2, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 6196 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6197}; 6198 6199static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 6200 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 6201 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 6202 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 6203 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr}, 6204 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr}, 6205 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 6206 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 6207 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 6208 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 6209 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 6210 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 6211 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 6212 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 6213 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 6214 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 6215 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 6216 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 6217 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 6218 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 6219 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6220 /* gap */ 6221 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6222 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6223 /* gap */ 6224 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6225 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6226 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 6227 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6228 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 6229 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 6230 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, 6231 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, 6232 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, 6233 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 6234 /* gap */ 6235 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6236 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 6237 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6238 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 6239 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig}, 6240 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 6241 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 6242 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 6243 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6244 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6245 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6246 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6247 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6248 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 6249 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6250 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 6251 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig}, 6252 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 6253 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 6254 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6255 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 6256 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 6257 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 6258 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6259 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6260 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6261 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6262 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6263 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6264 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 6265 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6266 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6267 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6268 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 6269 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 6270 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 6271 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 6272 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6273 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6274 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 6275 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 6276 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 6277 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 6278 /* gap */ 6279 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6280 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6281 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 6282 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 6283 /* gap */ 6284 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6285 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6286 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6287 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6288 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, 6289 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2}, 6290 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 6291 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 6292 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2}, 6293 /* gap */ 6294 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6295 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 6296 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 6297 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod}, 6298 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 6299 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6300 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 6301 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 6302 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 6303 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6304 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6305 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 6306 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6307 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 6308 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6309 {TGSI_OPCODE_TXQ_LZ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 6310 /* gap */ 6311 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6312 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6313 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6314 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6315 /* gap */ 6316 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6317 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6318 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6319 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6320 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6321 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6322 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6323 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6324 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 6325 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 6326 /* gap */ 6327 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6328 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2}, 6329 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 6330 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 6331 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 6332 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, 6333 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 6334 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2}, 6335 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 6336 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2}, 6337 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2}, 6338 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 6339 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv}, 6340 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 6341 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 6342 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 6343 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod}, 6344 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, cayman_mul_int_instr}, 6345 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 6346 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 6347 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2}, 6348 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap}, 6349 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2}, 6350 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6351 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6352 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6353 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6354 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 6355 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, 6356 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, 6357 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 6358 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 6359 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 6360 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 6361 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 6362 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 6363 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, 6364 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 6365 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 6366 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, 6367 {TGSI_OPCODE_UCMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ucmp}, 6368 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 6369 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 6370 {TGSI_OPCODE_LOAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6371 {TGSI_OPCODE_STORE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6372 {TGSI_OPCODE_MFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6373 {TGSI_OPCODE_LFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6374 {TGSI_OPCODE_SFENCE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6375 {TGSI_OPCODE_BARRIER, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6376 {TGSI_OPCODE_ATOMUADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6377 {TGSI_OPCODE_ATOMXCHG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6378 {TGSI_OPCODE_ATOMCAS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6379 {TGSI_OPCODE_ATOMAND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6380 {TGSI_OPCODE_ATOMOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6381 {TGSI_OPCODE_ATOMXOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6382 {TGSI_OPCODE_ATOMUMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6383 {TGSI_OPCODE_ATOMUMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6384 {TGSI_OPCODE_ATOMIMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6385 {TGSI_OPCODE_ATOMIMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6386 {TGSI_OPCODE_TEX2, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 6387 {TGSI_OPCODE_TXB2, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 6388 {TGSI_OPCODE_TXL2, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 6389 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 6390}; 6391