svga_tgsi_vgpu10.c revision 13eb5f596bc8ece3d1805b388aa53917e6158d7b
1/********************************************************** 2 * Copyright 1998-2013 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26/** 27 * @file svga_tgsi_vgpu10.c 28 * 29 * TGSI -> VGPU10 shader translation. 30 * 31 * \author Mingcheng Chen 32 * \author Brian Paul 33 */ 34 35#include "pipe/p_compiler.h" 36#include "pipe/p_shader_tokens.h" 37#include "pipe/p_defines.h" 38#include "tgsi/tgsi_build.h" 39#include "tgsi/tgsi_dump.h" 40#include "tgsi/tgsi_info.h" 41#include "tgsi/tgsi_parse.h" 42#include "tgsi/tgsi_scan.h" 43#include "tgsi/tgsi_two_side.h" 44#include "tgsi/tgsi_aa_point.h" 45#include "tgsi/tgsi_util.h" 46#include "util/u_math.h" 47#include "util/u_memory.h" 48#include "util/u_bitmask.h" 49#include "util/u_debug.h" 50#include "util/u_pstipple.h" 51 52#include "svga_context.h" 53#include "svga_debug.h" 54#include "svga_link.h" 55#include "svga_shader.h" 56#include "svga_tgsi.h" 57 58#include "VGPU10ShaderTokens.h" 59 60 61#define INVALID_INDEX 99999 62#define MAX_INTERNAL_TEMPS 3 63#define MAX_SYSTEM_VALUES 4 64#define MAX_IMMEDIATE_COUNT \ 65 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) 66#define MAX_TEMP_ARRAYS 64 /* Enough? */ 67 68 69/** 70 * Clipping is complicated. There's four different cases which we 71 * handle during VS/GS shader translation: 72 */ 73enum clipping_mode 74{ 75 CLIP_NONE, /**< No clipping enabled */ 76 CLIP_LEGACY, /**< The shader has no clipping declarations or code but 77 * one or more user-defined clip planes are enabled. We 78 * generate extra code to emit clip distances. 79 */ 80 CLIP_DISTANCE, /**< The shader already declares clip distance output 81 * registers and has code to write to them. 82 */ 83 CLIP_VERTEX /**< The shader declares a clip vertex output register and 84 * has code that writes to the register. We convert the 85 * clipvertex position into one or more clip distances. 86 */ 87}; 88 89 90struct svga_shader_emitter_v10 91{ 92 /* The token output buffer */ 93 unsigned size; 94 char *buf; 95 char *ptr; 96 97 /* Information about the shader and state (does not change) */ 98 struct svga_compile_key key; 99 struct tgsi_shader_info info; 100 unsigned unit; 101 102 unsigned inst_start_token; 103 boolean discard_instruction; /**< throw away current instruction? */ 104 105 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; 106 unsigned num_immediates; /**< Number of immediates emitted */ 107 unsigned common_immediate_pos[8]; /**< literals for common immediates */ 108 unsigned num_common_immediates; 109 boolean immediates_emitted; 110 111 unsigned num_outputs; /**< include any extra outputs */ 112 /** The first extra output is reserved for 113 * non-adjusted vertex position for 114 * stream output purpose 115 */ 116 117 /* Temporary Registers */ 118 unsigned num_shader_temps; /**< num of temps used by original shader */ 119 unsigned internal_temp_count; /**< currently allocated internal temps */ 120 struct { 121 unsigned start, size; 122 } temp_arrays[MAX_TEMP_ARRAYS]; 123 unsigned num_temp_arrays; 124 125 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ 126 struct { 127 unsigned arrayId, index; 128 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ 129 130 /** Number of constants used by original shader for each constant buffer. 131 * The size should probably always match with that of svga_state.constbufs. 132 */ 133 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; 134 135 /* Samplers */ 136 unsigned num_samplers; 137 138 /* Address regs (really implemented with temps) */ 139 unsigned num_address_regs; 140 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; 141 142 /* Output register usage masks */ 143 ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; 144 145 /* To map TGSI system value index to VGPU shader input indexes */ 146 ubyte system_value_indexes[MAX_SYSTEM_VALUES]; 147 148 struct { 149 /* vertex position scale/translation */ 150 unsigned out_index; /**< the real position output reg */ 151 unsigned tmp_index; /**< the fake/temp position output reg */ 152 unsigned so_index; /**< the non-adjusted position output reg */ 153 unsigned prescale_scale_index, prescale_trans_index; 154 boolean need_prescale; 155 } vposition; 156 157 /* For vertex shaders only */ 158 struct { 159 /* viewport constant */ 160 unsigned viewport_index; 161 162 /* temp index of adjusted vertex attributes */ 163 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS]; 164 } vs; 165 166 /* For fragment shaders only */ 167 struct { 168 /* apha test */ 169 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */ 170 unsigned color_tmp_index; /**< fake/temp color output reg */ 171 unsigned alpha_ref_index; /**< immediate constant for alpha ref */ 172 173 /* front-face */ 174 unsigned face_input_index; /**< real fragment shader face reg (bool) */ 175 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ 176 177 unsigned pstipple_sampler_unit; 178 179 unsigned fragcoord_input_index; /**< real fragment position input reg */ 180 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ 181 } fs; 182 183 /* For geometry shaders only */ 184 struct { 185 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */ 186 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */ 187 unsigned input_size; /**< size of input arrays */ 188 unsigned prim_id_index; /**< primitive id register index */ 189 unsigned max_out_vertices; /**< maximum number of output vertices */ 190 } gs; 191 192 /* For vertex or geometry shaders */ 193 enum clipping_mode clip_mode; 194 unsigned clip_dist_out_index; /**< clip distance output register index */ 195 unsigned clip_dist_tmp_index; /**< clip distance temporary register */ 196 unsigned clip_dist_so_index; /**< clip distance shadow copy */ 197 198 /** Index of temporary holding the clipvertex coordinate */ 199 unsigned clip_vertex_out_index; /**< clip vertex output register index */ 200 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */ 201 202 /* user clip plane constant slot indexes */ 203 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; 204 205 unsigned num_output_writes; 206 boolean constant_color_output; 207 208 boolean uses_flat_interp; 209 210 /* For all shaders: const reg index for RECT coord scaling */ 211 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; 212 213 /* For all shaders: const reg index for texture buffer size */ 214 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; 215 216 /* VS/GS/FS Linkage info */ 217 struct shader_linkage linkage; 218 219 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ 220}; 221 222 223static boolean 224emit_post_helpers(struct svga_shader_emitter_v10 *emit); 225 226static boolean 227emit_vertex(struct svga_shader_emitter_v10 *emit, 228 const struct tgsi_full_instruction *inst); 229 230static char err_buf[128]; 231 232static boolean 233expand(struct svga_shader_emitter_v10 *emit) 234{ 235 char *new_buf; 236 unsigned newsize = emit->size * 2; 237 238 if (emit->buf != err_buf) 239 new_buf = REALLOC(emit->buf, emit->size, newsize); 240 else 241 new_buf = NULL; 242 243 if (!new_buf) { 244 emit->ptr = err_buf; 245 emit->buf = err_buf; 246 emit->size = sizeof(err_buf); 247 return FALSE; 248 } 249 250 emit->size = newsize; 251 emit->ptr = new_buf + (emit->ptr - emit->buf); 252 emit->buf = new_buf; 253 return TRUE; 254} 255 256/** 257 * Create and initialize a new svga_shader_emitter_v10 object. 258 */ 259static struct svga_shader_emitter_v10 * 260alloc_emitter(void) 261{ 262 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit)); 263 264 if (!emit) 265 return NULL; 266 267 /* to initialize the output buffer */ 268 emit->size = 512; 269 if (!expand(emit)) { 270 FREE(emit); 271 return NULL; 272 } 273 return emit; 274} 275 276/** 277 * Free an svga_shader_emitter_v10 object. 278 */ 279static void 280free_emitter(struct svga_shader_emitter_v10 *emit) 281{ 282 assert(emit); 283 FREE(emit->buf); /* will be NULL if translation succeeded */ 284 FREE(emit); 285} 286 287static inline boolean 288reserve(struct svga_shader_emitter_v10 *emit, 289 unsigned nr_dwords) 290{ 291 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) { 292 if (!expand(emit)) 293 return FALSE; 294 } 295 296 return TRUE; 297} 298 299static boolean 300emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword) 301{ 302 if (!reserve(emit, 1)) 303 return FALSE; 304 305 *(uint32 *)emit->ptr = dword; 306 emit->ptr += sizeof dword; 307 return TRUE; 308} 309 310static boolean 311emit_dwords(struct svga_shader_emitter_v10 *emit, 312 const uint32 *dwords, 313 unsigned nr) 314{ 315 if (!reserve(emit, nr)) 316 return FALSE; 317 318 memcpy(emit->ptr, dwords, nr * sizeof *dwords); 319 emit->ptr += nr * sizeof *dwords; 320 return TRUE; 321} 322 323/** Return the number of tokens in the emitter's buffer */ 324static unsigned 325emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit) 326{ 327 return (emit->ptr - emit->buf) / sizeof(unsigned); 328} 329 330 331/** 332 * Check for register overflow. If we overflow we'll set an 333 * error flag. This function can be called for register declarations 334 * or use as src/dst instruction operands. 335 * \param type register type. One of VGPU10_OPERAND_TYPE_x 336 or VGPU10_OPCODE_DCL_x 337 * \param index the register index 338 */ 339static void 340check_register_index(struct svga_shader_emitter_v10 *emit, 341 unsigned operandType, unsigned index) 342{ 343 bool overflow_before = emit->register_overflow; 344 345 switch (operandType) { 346 case VGPU10_OPERAND_TYPE_TEMP: 347 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP: 348 case VGPU10_OPCODE_DCL_TEMPS: 349 if (index >= VGPU10_MAX_TEMPS) { 350 emit->register_overflow = TRUE; 351 } 352 break; 353 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER: 354 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER: 355 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 356 emit->register_overflow = TRUE; 357 } 358 break; 359 case VGPU10_OPERAND_TYPE_INPUT: 360 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID: 361 case VGPU10_OPCODE_DCL_INPUT: 362 case VGPU10_OPCODE_DCL_INPUT_SGV: 363 case VGPU10_OPCODE_DCL_INPUT_SIV: 364 case VGPU10_OPCODE_DCL_INPUT_PS: 365 case VGPU10_OPCODE_DCL_INPUT_PS_SGV: 366 case VGPU10_OPCODE_DCL_INPUT_PS_SIV: 367 if ((emit->unit == PIPE_SHADER_VERTEX && 368 index >= VGPU10_MAX_VS_INPUTS) || 369 (emit->unit == PIPE_SHADER_GEOMETRY && 370 index >= VGPU10_MAX_GS_INPUTS) || 371 (emit->unit == PIPE_SHADER_FRAGMENT && 372 index >= VGPU10_MAX_FS_INPUTS)) { 373 emit->register_overflow = TRUE; 374 } 375 break; 376 case VGPU10_OPERAND_TYPE_OUTPUT: 377 case VGPU10_OPCODE_DCL_OUTPUT: 378 case VGPU10_OPCODE_DCL_OUTPUT_SGV: 379 case VGPU10_OPCODE_DCL_OUTPUT_SIV: 380 if ((emit->unit == PIPE_SHADER_VERTEX && 381 index >= VGPU10_MAX_VS_OUTPUTS) || 382 (emit->unit == PIPE_SHADER_GEOMETRY && 383 index >= VGPU10_MAX_GS_OUTPUTS) || 384 (emit->unit == PIPE_SHADER_FRAGMENT && 385 index >= VGPU10_MAX_FS_OUTPUTS)) { 386 emit->register_overflow = TRUE; 387 } 388 break; 389 case VGPU10_OPERAND_TYPE_SAMPLER: 390 case VGPU10_OPCODE_DCL_SAMPLER: 391 if (index >= VGPU10_MAX_SAMPLERS) { 392 emit->register_overflow = TRUE; 393 } 394 break; 395 case VGPU10_OPERAND_TYPE_RESOURCE: 396 case VGPU10_OPCODE_DCL_RESOURCE: 397 if (index >= VGPU10_MAX_RESOURCES) { 398 emit->register_overflow = TRUE; 399 } 400 break; 401 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: 402 if (index >= MAX_IMMEDIATE_COUNT) { 403 emit->register_overflow = TRUE; 404 } 405 break; 406 default: 407 assert(0); 408 ; /* nothing */ 409 } 410 411 if (emit->register_overflow && !overflow_before) { 412 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n", 413 operandType, index); 414 } 415} 416 417 418/** 419 * Examine misc state to determine the clipping mode. 420 */ 421static void 422determine_clipping_mode(struct svga_shader_emitter_v10 *emit) 423{ 424 if (emit->info.num_written_clipdistance > 0) { 425 emit->clip_mode = CLIP_DISTANCE; 426 } 427 else if (emit->info.writes_clipvertex) { 428 emit->clip_mode = CLIP_VERTEX; 429 } 430 else if (emit->key.clip_plane_enable) { 431 emit->clip_mode = CLIP_LEGACY; 432 } 433 else { 434 emit->clip_mode = CLIP_NONE; 435 } 436} 437 438 439/** 440 * For clip distance register declarations and clip distance register 441 * writes we need to mask the declaration usage or instruction writemask 442 * (respectively) against the set of the really-enabled clipping planes. 443 * 444 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables 445 * has a VS that writes to all 8 clip distance registers, but the plane enable 446 * flags are a subset of that. 447 * 448 * This function is used to apply the plane enable flags to the register 449 * declaration or instruction writemask. 450 * 451 * \param writemask the declaration usage mask or instruction writemask 452 * \param clip_reg_index which clip plane register is being declared/written. 453 * The legal values are 0 and 1 (two clip planes per 454 * register, for a total of 8 clip planes) 455 */ 456static unsigned 457apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit, 458 unsigned writemask, unsigned clip_reg_index) 459{ 460 unsigned shift; 461 462 assert(clip_reg_index < 2); 463 464 /* four clip planes per clip register: */ 465 shift = clip_reg_index * 4; 466 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf); 467 468 return writemask; 469} 470 471 472/** 473 * Translate gallium shader type into VGPU10 type. 474 */ 475static VGPU10_PROGRAM_TYPE 476translate_shader_type(unsigned type) 477{ 478 switch (type) { 479 case PIPE_SHADER_VERTEX: 480 return VGPU10_VERTEX_SHADER; 481 case PIPE_SHADER_GEOMETRY: 482 return VGPU10_GEOMETRY_SHADER; 483 case PIPE_SHADER_FRAGMENT: 484 return VGPU10_PIXEL_SHADER; 485 default: 486 assert(!"Unexpected shader type"); 487 return VGPU10_VERTEX_SHADER; 488 } 489} 490 491 492/** 493 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x 494 * Note: we only need to translate the opcodes for "simple" instructions, 495 * as seen below. All other opcodes are handled/translated specially. 496 */ 497static VGPU10_OPCODE_TYPE 498translate_opcode(unsigned opcode) 499{ 500 switch (opcode) { 501 case TGSI_OPCODE_MOV: 502 return VGPU10_OPCODE_MOV; 503 case TGSI_OPCODE_MUL: 504 return VGPU10_OPCODE_MUL; 505 case TGSI_OPCODE_ADD: 506 return VGPU10_OPCODE_ADD; 507 case TGSI_OPCODE_DP3: 508 return VGPU10_OPCODE_DP3; 509 case TGSI_OPCODE_DP4: 510 return VGPU10_OPCODE_DP4; 511 case TGSI_OPCODE_MIN: 512 return VGPU10_OPCODE_MIN; 513 case TGSI_OPCODE_MAX: 514 return VGPU10_OPCODE_MAX; 515 case TGSI_OPCODE_MAD: 516 return VGPU10_OPCODE_MAD; 517 case TGSI_OPCODE_SQRT: 518 return VGPU10_OPCODE_SQRT; 519 case TGSI_OPCODE_FRC: 520 return VGPU10_OPCODE_FRC; 521 case TGSI_OPCODE_FLR: 522 return VGPU10_OPCODE_ROUND_NI; 523 case TGSI_OPCODE_FSEQ: 524 return VGPU10_OPCODE_EQ; 525 case TGSI_OPCODE_FSGE: 526 return VGPU10_OPCODE_GE; 527 case TGSI_OPCODE_FSNE: 528 return VGPU10_OPCODE_NE; 529 case TGSI_OPCODE_DDX: 530 return VGPU10_OPCODE_DERIV_RTX; 531 case TGSI_OPCODE_DDY: 532 return VGPU10_OPCODE_DERIV_RTY; 533 case TGSI_OPCODE_RET: 534 return VGPU10_OPCODE_RET; 535 case TGSI_OPCODE_DIV: 536 return VGPU10_OPCODE_DIV; 537 case TGSI_OPCODE_IDIV: 538 return VGPU10_OPCODE_IDIV; 539 case TGSI_OPCODE_DP2: 540 return VGPU10_OPCODE_DP2; 541 case TGSI_OPCODE_BRK: 542 return VGPU10_OPCODE_BREAK; 543 case TGSI_OPCODE_IF: 544 return VGPU10_OPCODE_IF; 545 case TGSI_OPCODE_ELSE: 546 return VGPU10_OPCODE_ELSE; 547 case TGSI_OPCODE_ENDIF: 548 return VGPU10_OPCODE_ENDIF; 549 case TGSI_OPCODE_CEIL: 550 return VGPU10_OPCODE_ROUND_PI; 551 case TGSI_OPCODE_I2F: 552 return VGPU10_OPCODE_ITOF; 553 case TGSI_OPCODE_NOT: 554 return VGPU10_OPCODE_NOT; 555 case TGSI_OPCODE_TRUNC: 556 return VGPU10_OPCODE_ROUND_Z; 557 case TGSI_OPCODE_SHL: 558 return VGPU10_OPCODE_ISHL; 559 case TGSI_OPCODE_AND: 560 return VGPU10_OPCODE_AND; 561 case TGSI_OPCODE_OR: 562 return VGPU10_OPCODE_OR; 563 case TGSI_OPCODE_XOR: 564 return VGPU10_OPCODE_XOR; 565 case TGSI_OPCODE_CONT: 566 return VGPU10_OPCODE_CONTINUE; 567 case TGSI_OPCODE_EMIT: 568 return VGPU10_OPCODE_EMIT; 569 case TGSI_OPCODE_ENDPRIM: 570 return VGPU10_OPCODE_CUT; 571 case TGSI_OPCODE_BGNLOOP: 572 return VGPU10_OPCODE_LOOP; 573 case TGSI_OPCODE_ENDLOOP: 574 return VGPU10_OPCODE_ENDLOOP; 575 case TGSI_OPCODE_ENDSUB: 576 return VGPU10_OPCODE_RET; 577 case TGSI_OPCODE_NOP: 578 return VGPU10_OPCODE_NOP; 579 case TGSI_OPCODE_BREAKC: 580 return VGPU10_OPCODE_BREAKC; 581 case TGSI_OPCODE_END: 582 return VGPU10_OPCODE_RET; 583 case TGSI_OPCODE_F2I: 584 return VGPU10_OPCODE_FTOI; 585 case TGSI_OPCODE_IMAX: 586 return VGPU10_OPCODE_IMAX; 587 case TGSI_OPCODE_IMIN: 588 return VGPU10_OPCODE_IMIN; 589 case TGSI_OPCODE_UDIV: 590 case TGSI_OPCODE_UMOD: 591 case TGSI_OPCODE_MOD: 592 return VGPU10_OPCODE_UDIV; 593 case TGSI_OPCODE_IMUL_HI: 594 return VGPU10_OPCODE_IMUL; 595 case TGSI_OPCODE_INEG: 596 return VGPU10_OPCODE_INEG; 597 case TGSI_OPCODE_ISHR: 598 return VGPU10_OPCODE_ISHR; 599 case TGSI_OPCODE_ISGE: 600 return VGPU10_OPCODE_IGE; 601 case TGSI_OPCODE_ISLT: 602 return VGPU10_OPCODE_ILT; 603 case TGSI_OPCODE_F2U: 604 return VGPU10_OPCODE_FTOU; 605 case TGSI_OPCODE_UADD: 606 return VGPU10_OPCODE_IADD; 607 case TGSI_OPCODE_U2F: 608 return VGPU10_OPCODE_UTOF; 609 case TGSI_OPCODE_UCMP: 610 return VGPU10_OPCODE_MOVC; 611 case TGSI_OPCODE_UMAD: 612 return VGPU10_OPCODE_UMAD; 613 case TGSI_OPCODE_UMAX: 614 return VGPU10_OPCODE_UMAX; 615 case TGSI_OPCODE_UMIN: 616 return VGPU10_OPCODE_UMIN; 617 case TGSI_OPCODE_UMUL: 618 case TGSI_OPCODE_UMUL_HI: 619 return VGPU10_OPCODE_UMUL; 620 case TGSI_OPCODE_USEQ: 621 return VGPU10_OPCODE_IEQ; 622 case TGSI_OPCODE_USGE: 623 return VGPU10_OPCODE_UGE; 624 case TGSI_OPCODE_USHR: 625 return VGPU10_OPCODE_USHR; 626 case TGSI_OPCODE_USLT: 627 return VGPU10_OPCODE_ULT; 628 case TGSI_OPCODE_USNE: 629 return VGPU10_OPCODE_INE; 630 case TGSI_OPCODE_SWITCH: 631 return VGPU10_OPCODE_SWITCH; 632 case TGSI_OPCODE_CASE: 633 return VGPU10_OPCODE_CASE; 634 case TGSI_OPCODE_DEFAULT: 635 return VGPU10_OPCODE_DEFAULT; 636 case TGSI_OPCODE_ENDSWITCH: 637 return VGPU10_OPCODE_ENDSWITCH; 638 case TGSI_OPCODE_FSLT: 639 return VGPU10_OPCODE_LT; 640 case TGSI_OPCODE_ROUND: 641 return VGPU10_OPCODE_ROUND_NE; 642 default: 643 assert(!"Unexpected TGSI opcode in translate_opcode()"); 644 return VGPU10_OPCODE_NOP; 645 } 646} 647 648 649/** 650 * Translate a TGSI register file type into a VGPU10 operand type. 651 * \param array is the TGSI_FILE_TEMPORARY register an array? 652 */ 653static VGPU10_OPERAND_TYPE 654translate_register_file(enum tgsi_file_type file, boolean array) 655{ 656 switch (file) { 657 case TGSI_FILE_CONSTANT: 658 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 659 case TGSI_FILE_INPUT: 660 return VGPU10_OPERAND_TYPE_INPUT; 661 case TGSI_FILE_OUTPUT: 662 return VGPU10_OPERAND_TYPE_OUTPUT; 663 case TGSI_FILE_TEMPORARY: 664 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP 665 : VGPU10_OPERAND_TYPE_TEMP; 666 case TGSI_FILE_IMMEDIATE: 667 /* all immediates are 32-bit values at this time so 668 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time. 669 */ 670 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER; 671 case TGSI_FILE_SAMPLER: 672 return VGPU10_OPERAND_TYPE_SAMPLER; 673 case TGSI_FILE_SYSTEM_VALUE: 674 return VGPU10_OPERAND_TYPE_INPUT; 675 676 /* XXX TODO more cases to finish */ 677 678 default: 679 assert(!"Bad tgsi register file!"); 680 return VGPU10_OPERAND_TYPE_NULL; 681 } 682} 683 684 685/** 686 * Emit a null dst register 687 */ 688static void 689emit_null_dst_register(struct svga_shader_emitter_v10 *emit) 690{ 691 VGPU10OperandToken0 operand; 692 693 operand.value = 0; 694 operand.operandType = VGPU10_OPERAND_TYPE_NULL; 695 operand.numComponents = VGPU10_OPERAND_0_COMPONENT; 696 697 emit_dword(emit, operand.value); 698} 699 700 701/** 702 * If the given register is a temporary, return the array ID. 703 * Else return zero. 704 */ 705static unsigned 706get_temp_array_id(const struct svga_shader_emitter_v10 *emit, 707 unsigned file, unsigned index) 708{ 709 if (file == TGSI_FILE_TEMPORARY) { 710 return emit->temp_map[index].arrayId; 711 } 712 else { 713 return 0; 714 } 715} 716 717 718/** 719 * If the given register is a temporary, convert the index from a TGSI 720 * TEMPORARY index to a VGPU10 temp index. 721 */ 722static unsigned 723remap_temp_index(const struct svga_shader_emitter_v10 *emit, 724 unsigned file, unsigned index) 725{ 726 if (file == TGSI_FILE_TEMPORARY) { 727 return emit->temp_map[index].index; 728 } 729 else { 730 return index; 731 } 732} 733 734 735/** 736 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc). 737 * Note: the operandType field must already be initialized. 738 */ 739static VGPU10OperandToken0 740setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, 741 VGPU10OperandToken0 operand0, 742 unsigned file, 743 boolean indirect, boolean index2D, 744 unsigned tempArrayID) 745{ 746 unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_0D; 747 748 /* 749 * Compute index dimensions 750 */ 751 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 || 752 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 753 /* there's no swizzle for in-line immediates */ 754 indexDim = VGPU10_OPERAND_INDEX_0D; 755 assert(operand0.selectionMode == 0); 756 } 757 else { 758 if (index2D || 759 tempArrayID > 0 || 760 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 761 indexDim = VGPU10_OPERAND_INDEX_2D; 762 } 763 else { 764 indexDim = VGPU10_OPERAND_INDEX_1D; 765 } 766 } 767 768 /* 769 * Compute index representations (immediate, relative, etc). 770 */ 771 if (tempArrayID > 0) { 772 assert(file == TGSI_FILE_TEMPORARY); 773 /* First index is the array ID, second index is the array element */ 774 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 775 if (indirect) { 776 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 777 } 778 else { 779 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 780 } 781 } 782 else if (indirect) { 783 if (file == TGSI_FILE_CONSTANT) { 784 /* index[0] indicates which constant buffer while index[1] indicates 785 * the position in the constant buffer. 786 */ 787 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 788 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 789 } 790 else { 791 /* All other register files are 1-dimensional */ 792 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 793 } 794 } 795 else { 796 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 797 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 798 } 799 800 operand0.indexDimension = indexDim; 801 operand0.index0Representation = index0Rep; 802 operand0.index1Representation = index1Rep; 803 804 return operand0; 805} 806 807 808/** 809 * Emit the operand for expressing an address register for indirect indexing. 810 * Note that the address register is really just a temp register. 811 * \param addr_reg_index which address register to use 812 */ 813static void 814emit_indirect_register(struct svga_shader_emitter_v10 *emit, 815 unsigned addr_reg_index) 816{ 817 unsigned tmp_reg_index; 818 VGPU10OperandToken0 operand0; 819 820 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS); 821 822 tmp_reg_index = emit->address_reg_index[addr_reg_index]; 823 824 /* operand0 is a simple temporary register, selecting one component */ 825 operand0.value = 0; 826 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP; 827 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 828 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 829 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 830 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 831 operand0.swizzleX = 0; 832 operand0.swizzleY = 1; 833 operand0.swizzleZ = 2; 834 operand0.swizzleW = 3; 835 836 emit_dword(emit, operand0.value); 837 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index)); 838} 839 840 841/** 842 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens. 843 * \param emit the emitter context 844 * \param reg the TGSI dst register to translate 845 */ 846static void 847emit_dst_register(struct svga_shader_emitter_v10 *emit, 848 const struct tgsi_full_dst_register *reg) 849{ 850 unsigned file = reg->Register.File; 851 unsigned index = reg->Register.Index; 852 const unsigned sem_name = emit->info.output_semantic_name[index]; 853 const unsigned sem_index = emit->info.output_semantic_index[index]; 854 unsigned writemask = reg->Register.WriteMask; 855 const unsigned indirect = reg->Register.Indirect; 856 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 857 const unsigned index2d = reg->Register.Dimension; 858 VGPU10OperandToken0 operand0; 859 860 if (file == TGSI_FILE_OUTPUT) { 861 if (emit->unit == PIPE_SHADER_VERTEX || 862 emit->unit == PIPE_SHADER_GEOMETRY) { 863 if (index == emit->vposition.out_index && 864 emit->vposition.tmp_index != INVALID_INDEX) { 865 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the 866 * vertex position result in a temporary so that we can modify 867 * it in the post_helper() code. 868 */ 869 file = TGSI_FILE_TEMPORARY; 870 index = emit->vposition.tmp_index; 871 } 872 else if (sem_name == TGSI_SEMANTIC_CLIPDIST && 873 emit->clip_dist_tmp_index != INVALID_INDEX) { 874 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. 875 * We store the clip distance in a temporary first, then 876 * we'll copy it to the shadow copy and to CLIPDIST with the 877 * enabled planes mask in emit_clip_distance_instructions(). 878 */ 879 file = TGSI_FILE_TEMPORARY; 880 index = emit->clip_dist_tmp_index + sem_index; 881 } 882 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && 883 emit->clip_vertex_tmp_index != INVALID_INDEX) { 884 /* replace the CLIPVERTEX output register with a temporary */ 885 assert(emit->clip_mode == CLIP_VERTEX); 886 assert(sem_index == 0); 887 file = TGSI_FILE_TEMPORARY; 888 index = emit->clip_vertex_tmp_index; 889 } 890 } 891 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 892 if (sem_name == TGSI_SEMANTIC_POSITION) { 893 /* Fragment depth output register */ 894 operand0.value = 0; 895 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 896 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 897 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 898 emit_dword(emit, operand0.value); 899 return; 900 } 901 else if (index == emit->fs.color_out_index[0] && 902 emit->fs.color_tmp_index != INVALID_INDEX) { 903 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the 904 * fragment color result in a temporary so that we can read it 905 * it in the post_helper() code. 906 */ 907 file = TGSI_FILE_TEMPORARY; 908 index = emit->fs.color_tmp_index; 909 } 910 else { 911 /* Typically, for fragment shaders, the output register index 912 * matches the color semantic index. But not when we write to 913 * the fragment depth register. In that case, OUT[0] will be 914 * fragdepth and OUT[1] will be the 0th color output. We need 915 * to use the semantic index for color outputs. 916 */ 917 assert(sem_name == TGSI_SEMANTIC_COLOR); 918 index = emit->info.output_semantic_index[index]; 919 920 emit->num_output_writes++; 921 } 922 } 923 } 924 925 /* init operand tokens to all zero */ 926 operand0.value = 0; 927 928 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 929 930 /* the operand has a writemask */ 931 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 932 933 /* Which of the four dest components to write to. Note that we can use a 934 * simple assignment here since TGSI writemasks match VGPU10 writemasks. 935 */ 936 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X); 937 operand0.mask = writemask; 938 939 /* translate TGSI register file type to VGPU10 operand type */ 940 operand0.operandType = translate_register_file(file, tempArrayId > 0); 941 942 check_register_index(emit, operand0.operandType, index); 943 944 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 945 index2d, tempArrayId); 946 947 /* Emit tokens */ 948 emit_dword(emit, operand0.value); 949 if (tempArrayId > 0) { 950 emit_dword(emit, tempArrayId); 951 } 952 953 emit_dword(emit, remap_temp_index(emit, file, index)); 954 955 if (indirect) { 956 emit_indirect_register(emit, reg->Indirect.Index); 957 } 958} 959 960 961/** 962 * Translate a src register of a TGSI instruction and emit VGPU10 tokens. 963 */ 964static void 965emit_src_register(struct svga_shader_emitter_v10 *emit, 966 const struct tgsi_full_src_register *reg) 967{ 968 unsigned file = reg->Register.File; 969 unsigned index = reg->Register.Index; 970 const unsigned indirect = reg->Register.Indirect; 971 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 972 const unsigned index2d = reg->Register.Dimension; 973 const unsigned swizzleX = reg->Register.SwizzleX; 974 const unsigned swizzleY = reg->Register.SwizzleY; 975 const unsigned swizzleZ = reg->Register.SwizzleZ; 976 const unsigned swizzleW = reg->Register.SwizzleW; 977 const unsigned absolute = reg->Register.Absolute; 978 const unsigned negate = reg->Register.Negate; 979 bool is_prim_id = FALSE; 980 981 VGPU10OperandToken0 operand0; 982 VGPU10OperandToken1 operand1; 983 984 if (emit->unit == PIPE_SHADER_FRAGMENT && 985 file == TGSI_FILE_INPUT) { 986 if (index == emit->fs.face_input_index) { 987 /* Replace INPUT[FACE] with TEMP[FACE] */ 988 file = TGSI_FILE_TEMPORARY; 989 index = emit->fs.face_tmp_index; 990 } 991 else if (index == emit->fs.fragcoord_input_index) { 992 /* Replace INPUT[POSITION] with TEMP[POSITION] */ 993 file = TGSI_FILE_TEMPORARY; 994 index = emit->fs.fragcoord_tmp_index; 995 } 996 else { 997 /* We remap fragment shader inputs to that FS input indexes 998 * match up with VS/GS output indexes. 999 */ 1000 index = emit->linkage.input_map[index]; 1001 } 1002 } 1003 else if (emit->unit == PIPE_SHADER_GEOMETRY && 1004 file == TGSI_FILE_INPUT) { 1005 is_prim_id = (index == emit->gs.prim_id_index); 1006 index = emit->linkage.input_map[index]; 1007 } 1008 else if (emit->unit == PIPE_SHADER_VERTEX) { 1009 if (file == TGSI_FILE_INPUT) { 1010 /* if input is adjusted... */ 1011 if ((emit->key.vs.adjust_attrib_w_1 | 1012 emit->key.vs.adjust_attrib_itof | 1013 emit->key.vs.adjust_attrib_utof | 1014 emit->key.vs.attrib_is_bgra | 1015 emit->key.vs.attrib_puint_to_snorm | 1016 emit->key.vs.attrib_puint_to_uscaled | 1017 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) { 1018 file = TGSI_FILE_TEMPORARY; 1019 index = emit->vs.adjusted_input[index]; 1020 } 1021 } 1022 else if (file == TGSI_FILE_SYSTEM_VALUE) { 1023 assert(index < Elements(emit->system_value_indexes)); 1024 index = emit->system_value_indexes[index]; 1025 } 1026 } 1027 1028 operand0.value = operand1.value = 0; 1029 1030 if (is_prim_id) { 1031 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 1032 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 1033 } 1034 else { 1035 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1036 operand0.operandType = translate_register_file(file, tempArrayId > 0); 1037 } 1038 1039 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 1040 index2d, tempArrayId); 1041 1042 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 && 1043 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 1044 /* there's no swizzle for in-line immediates */ 1045 if (swizzleX == swizzleY && 1046 swizzleX == swizzleZ && 1047 swizzleX == swizzleW) { 1048 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1049 } 1050 else { 1051 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1052 } 1053 1054 operand0.swizzleX = swizzleX; 1055 operand0.swizzleY = swizzleY; 1056 operand0.swizzleZ = swizzleZ; 1057 operand0.swizzleW = swizzleW; 1058 1059 if (absolute || negate) { 1060 operand0.extended = 1; 1061 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER; 1062 if (absolute && !negate) 1063 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS; 1064 if (!absolute && negate) 1065 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG; 1066 if (absolute && negate) 1067 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG; 1068 } 1069 } 1070 1071 /* Emit the operand tokens */ 1072 emit_dword(emit, operand0.value); 1073 if (operand0.extended) 1074 emit_dword(emit, operand1.value); 1075 1076 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) { 1077 /* Emit the four float/int in-line immediate values */ 1078 unsigned *c; 1079 assert(index < Elements(emit->immediates)); 1080 assert(file == TGSI_FILE_IMMEDIATE); 1081 assert(swizzleX < 4); 1082 assert(swizzleY < 4); 1083 assert(swizzleZ < 4); 1084 assert(swizzleW < 4); 1085 c = (unsigned *) emit->immediates[index]; 1086 emit_dword(emit, c[swizzleX]); 1087 emit_dword(emit, c[swizzleY]); 1088 emit_dword(emit, c[swizzleZ]); 1089 emit_dword(emit, c[swizzleW]); 1090 } 1091 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) { 1092 /* Emit the register index(es) */ 1093 if (index2d || 1094 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 1095 emit_dword(emit, reg->Dimension.Index); 1096 } 1097 1098 if (tempArrayId > 0) { 1099 emit_dword(emit, tempArrayId); 1100 } 1101 1102 emit_dword(emit, remap_temp_index(emit, file, index)); 1103 1104 if (indirect) { 1105 emit_indirect_register(emit, reg->Indirect.Index); 1106 } 1107 } 1108} 1109 1110 1111/** 1112 * Emit a resource operand (for use with a SAMPLE instruction). 1113 */ 1114static void 1115emit_resource_register(struct svga_shader_emitter_v10 *emit, 1116 unsigned resource_number) 1117{ 1118 VGPU10OperandToken0 operand0; 1119 1120 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number); 1121 1122 /* init */ 1123 operand0.value = 0; 1124 1125 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 1126 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1127 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1128 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1129 operand0.swizzleX = VGPU10_COMPONENT_X; 1130 operand0.swizzleY = VGPU10_COMPONENT_Y; 1131 operand0.swizzleZ = VGPU10_COMPONENT_Z; 1132 operand0.swizzleW = VGPU10_COMPONENT_W; 1133 1134 emit_dword(emit, operand0.value); 1135 emit_dword(emit, resource_number); 1136} 1137 1138 1139/** 1140 * Emit a sampler operand (for use with a SAMPLE instruction). 1141 */ 1142static void 1143emit_sampler_register(struct svga_shader_emitter_v10 *emit, 1144 unsigned sampler_number) 1145{ 1146 VGPU10OperandToken0 operand0; 1147 1148 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); 1149 1150 /* init */ 1151 operand0.value = 0; 1152 1153 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 1154 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1155 1156 emit_dword(emit, operand0.value); 1157 emit_dword(emit, sampler_number); 1158} 1159 1160 1161/** 1162 * Emit an operand which reads the IS_FRONT_FACING register. 1163 */ 1164static void 1165emit_face_register(struct svga_shader_emitter_v10 *emit) 1166{ 1167 VGPU10OperandToken0 operand0; 1168 unsigned index = emit->linkage.input_map[emit->fs.face_input_index]; 1169 1170 /* init */ 1171 operand0.value = 0; 1172 1173 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT; 1174 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1175 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1176 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1177 1178 operand0.swizzleX = VGPU10_COMPONENT_X; 1179 operand0.swizzleY = VGPU10_COMPONENT_X; 1180 operand0.swizzleZ = VGPU10_COMPONENT_X; 1181 operand0.swizzleW = VGPU10_COMPONENT_X; 1182 1183 emit_dword(emit, operand0.value); 1184 emit_dword(emit, index); 1185} 1186 1187 1188/** 1189 * Emit the token for a VGPU10 opcode. 1190 * \param saturate clamp result to [0,1]? 1191 */ 1192static void 1193emit_opcode(struct svga_shader_emitter_v10 *emit, 1194 unsigned vgpu10_opcode, boolean saturate) 1195{ 1196 VGPU10OpcodeToken0 token0; 1197 1198 token0.value = 0; /* init all fields to zero */ 1199 token0.opcodeType = vgpu10_opcode; 1200 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1201 token0.saturate = saturate; 1202 1203 emit_dword(emit, token0.value); 1204} 1205 1206 1207/** 1208 * Emit the token for a VGPU10 resinfo instruction. 1209 * \param modifier return type modifier, _uint or _rcpFloat. 1210 * TODO: We may want to remove this parameter if it will 1211 * only ever be used as _uint. 1212 */ 1213static void 1214emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit, 1215 VGPU10_RESINFO_RETURN_TYPE modifier) 1216{ 1217 VGPU10OpcodeToken0 token0; 1218 1219 token0.value = 0; /* init all fields to zero */ 1220 token0.opcodeType = VGPU10_OPCODE_RESINFO; 1221 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1222 token0.resinfoReturnType = modifier; 1223 1224 emit_dword(emit, token0.value); 1225} 1226 1227 1228/** 1229 * Emit opcode tokens for a texture sample instruction. Texture instructions 1230 * can be rather complicated (texel offsets, etc) so we have this specialized 1231 * function. 1232 */ 1233static void 1234emit_sample_opcode(struct svga_shader_emitter_v10 *emit, 1235 unsigned vgpu10_opcode, boolean saturate, 1236 const int offsets[3]) 1237{ 1238 VGPU10OpcodeToken0 token0; 1239 VGPU10OpcodeToken1 token1; 1240 1241 token0.value = 0; /* init all fields to zero */ 1242 token0.opcodeType = vgpu10_opcode; 1243 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1244 token0.saturate = saturate; 1245 1246 if (offsets[0] || offsets[1] || offsets[2]) { 1247 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1248 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1249 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1250 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1251 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1252 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1253 1254 token0.extended = 1; 1255 token1.value = 0; 1256 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS; 1257 token1.offsetU = offsets[0]; 1258 token1.offsetV = offsets[1]; 1259 token1.offsetW = offsets[2]; 1260 } 1261 1262 emit_dword(emit, token0.value); 1263 if (token0.extended) { 1264 emit_dword(emit, token1.value); 1265 } 1266} 1267 1268 1269/** 1270 * Emit a DISCARD opcode token. 1271 * If nonzero is set, we'll discard the fragment if the X component is not 0. 1272 * Otherwise, we'll discard the fragment if the X component is 0. 1273 */ 1274static void 1275emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero) 1276{ 1277 VGPU10OpcodeToken0 opcode0; 1278 1279 opcode0.value = 0; 1280 opcode0.opcodeType = VGPU10_OPCODE_DISCARD; 1281 if (nonzero) 1282 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 1283 1284 emit_dword(emit, opcode0.value); 1285} 1286 1287 1288/** 1289 * We need to call this before we begin emitting a VGPU10 instruction. 1290 */ 1291static void 1292begin_emit_instruction(struct svga_shader_emitter_v10 *emit) 1293{ 1294 assert(emit->inst_start_token == 0); 1295 /* Save location of the instruction's VGPU10OpcodeToken0 token. 1296 * Note, we can't save a pointer because it would become invalid if 1297 * we have to realloc the output buffer. 1298 */ 1299 emit->inst_start_token = emit_get_num_tokens(emit); 1300} 1301 1302 1303/** 1304 * We need to call this after we emit the last token of a VGPU10 instruction. 1305 * This function patches in the opcode token's instructionLength field. 1306 */ 1307static void 1308end_emit_instruction(struct svga_shader_emitter_v10 *emit) 1309{ 1310 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; 1311 unsigned inst_length; 1312 1313 assert(emit->inst_start_token > 0); 1314 1315 if (emit->discard_instruction) { 1316 /* Back up the emit->ptr to where this instruction started so 1317 * that we discard the current instruction. 1318 */ 1319 emit->ptr = (char *) (tokens + emit->inst_start_token); 1320 } 1321 else { 1322 /* Compute instruction length and patch that into the start of 1323 * the instruction. 1324 */ 1325 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token; 1326 1327 assert(inst_length > 0); 1328 1329 tokens[emit->inst_start_token].instructionLength = inst_length; 1330 } 1331 1332 emit->inst_start_token = 0; /* reset to zero for error checking */ 1333 emit->discard_instruction = FALSE; 1334} 1335 1336 1337/** 1338 * Return index for a free temporary register. 1339 */ 1340static unsigned 1341get_temp_index(struct svga_shader_emitter_v10 *emit) 1342{ 1343 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS); 1344 return emit->num_shader_temps + emit->internal_temp_count++; 1345} 1346 1347 1348/** 1349 * Release the temporaries which were generated by get_temp_index(). 1350 */ 1351static void 1352free_temp_indexes(struct svga_shader_emitter_v10 *emit) 1353{ 1354 emit->internal_temp_count = 0; 1355} 1356 1357 1358/** 1359 * Create a tgsi_full_src_register. 1360 */ 1361static struct tgsi_full_src_register 1362make_src_reg(unsigned file, unsigned index) 1363{ 1364 struct tgsi_full_src_register reg; 1365 1366 memset(®, 0, sizeof(reg)); 1367 reg.Register.File = file; 1368 reg.Register.Index = index; 1369 reg.Register.SwizzleX = TGSI_SWIZZLE_X; 1370 reg.Register.SwizzleY = TGSI_SWIZZLE_Y; 1371 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1372 reg.Register.SwizzleW = TGSI_SWIZZLE_W; 1373 return reg; 1374} 1375 1376 1377/** 1378 * Create a tgsi_full_src_register for a temporary. 1379 */ 1380static struct tgsi_full_src_register 1381make_src_temp_reg(unsigned index) 1382{ 1383 return make_src_reg(TGSI_FILE_TEMPORARY, index); 1384} 1385 1386 1387/** 1388 * Create a tgsi_full_src_register for a constant. 1389 */ 1390static struct tgsi_full_src_register 1391make_src_const_reg(unsigned index) 1392{ 1393 return make_src_reg(TGSI_FILE_CONSTANT, index); 1394} 1395 1396 1397/** 1398 * Create a tgsi_full_src_register for an immediate constant. 1399 */ 1400static struct tgsi_full_src_register 1401make_src_immediate_reg(unsigned index) 1402{ 1403 return make_src_reg(TGSI_FILE_IMMEDIATE, index); 1404} 1405 1406 1407/** 1408 * Create a tgsi_full_dst_register. 1409 */ 1410static struct tgsi_full_dst_register 1411make_dst_reg(unsigned file, unsigned index) 1412{ 1413 struct tgsi_full_dst_register reg; 1414 1415 memset(®, 0, sizeof(reg)); 1416 reg.Register.File = file; 1417 reg.Register.Index = index; 1418 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW; 1419 return reg; 1420} 1421 1422 1423/** 1424 * Create a tgsi_full_dst_register for a temporary. 1425 */ 1426static struct tgsi_full_dst_register 1427make_dst_temp_reg(unsigned index) 1428{ 1429 return make_dst_reg(TGSI_FILE_TEMPORARY, index); 1430} 1431 1432 1433/** 1434 * Create a tgsi_full_dst_register for an output. 1435 */ 1436static struct tgsi_full_dst_register 1437make_dst_output_reg(unsigned index) 1438{ 1439 return make_dst_reg(TGSI_FILE_OUTPUT, index); 1440} 1441 1442 1443/** 1444 * Create negated tgsi_full_src_register. 1445 */ 1446static struct tgsi_full_src_register 1447negate_src(const struct tgsi_full_src_register *reg) 1448{ 1449 struct tgsi_full_src_register neg = *reg; 1450 neg.Register.Negate = !reg->Register.Negate; 1451 return neg; 1452} 1453 1454/** 1455 * Create absolute value of a tgsi_full_src_register. 1456 */ 1457static struct tgsi_full_src_register 1458absolute_src(const struct tgsi_full_src_register *reg) 1459{ 1460 struct tgsi_full_src_register absolute = *reg; 1461 absolute.Register.Absolute = 1; 1462 return absolute; 1463} 1464 1465 1466/** Return the named swizzle term from the src register */ 1467static inline unsigned 1468get_swizzle(const struct tgsi_full_src_register *reg, unsigned term) 1469{ 1470 switch (term) { 1471 case TGSI_SWIZZLE_X: 1472 return reg->Register.SwizzleX; 1473 case TGSI_SWIZZLE_Y: 1474 return reg->Register.SwizzleY; 1475 case TGSI_SWIZZLE_Z: 1476 return reg->Register.SwizzleZ; 1477 case TGSI_SWIZZLE_W: 1478 return reg->Register.SwizzleW; 1479 default: 1480 assert(!"Bad swizzle"); 1481 return TGSI_SWIZZLE_X; 1482 } 1483} 1484 1485 1486/** 1487 * Create swizzled tgsi_full_src_register. 1488 */ 1489static struct tgsi_full_src_register 1490swizzle_src(const struct tgsi_full_src_register *reg, 1491 unsigned swizzleX, unsigned swizzleY, 1492 unsigned swizzleZ, unsigned swizzleW) 1493{ 1494 struct tgsi_full_src_register swizzled = *reg; 1495 /* Note: we swizzle the current swizzle */ 1496 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX); 1497 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY); 1498 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ); 1499 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW); 1500 return swizzled; 1501} 1502 1503 1504/** 1505 * Create swizzled tgsi_full_src_register where all the swizzle 1506 * terms are the same. 1507 */ 1508static struct tgsi_full_src_register 1509scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle) 1510{ 1511 struct tgsi_full_src_register swizzled = *reg; 1512 /* Note: we swizzle the current swizzle */ 1513 swizzled.Register.SwizzleX = 1514 swizzled.Register.SwizzleY = 1515 swizzled.Register.SwizzleZ = 1516 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle); 1517 return swizzled; 1518} 1519 1520 1521/** 1522 * Create new tgsi_full_dst_register with writemask. 1523 * \param mask bitmask of TGSI_WRITEMASK_[XYZW] 1524 */ 1525static struct tgsi_full_dst_register 1526writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask) 1527{ 1528 struct tgsi_full_dst_register masked = *reg; 1529 masked.Register.WriteMask = mask; 1530 return masked; 1531} 1532 1533 1534/** 1535 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW. 1536 */ 1537static boolean 1538same_swizzle_terms(const struct tgsi_full_src_register *reg) 1539{ 1540 return (reg->Register.SwizzleX == reg->Register.SwizzleY && 1541 reg->Register.SwizzleY == reg->Register.SwizzleZ && 1542 reg->Register.SwizzleZ == reg->Register.SwizzleW); 1543} 1544 1545 1546/** 1547 * Search the vector for the value 'x' and return its position. 1548 */ 1549static int 1550find_imm_in_vec4(const union tgsi_immediate_data vec[4], 1551 union tgsi_immediate_data x) 1552{ 1553 unsigned i; 1554 for (i = 0; i < 4; i++) { 1555 if (vec[i].Int == x.Int) 1556 return i; 1557 } 1558 return -1; 1559} 1560 1561 1562/** 1563 * Helper used by make_immediate_reg(), make_immediate_reg_4(). 1564 */ 1565static int 1566find_immediate(struct svga_shader_emitter_v10 *emit, 1567 union tgsi_immediate_data x, unsigned startIndex) 1568{ 1569 const unsigned endIndex = emit->num_immediates; 1570 unsigned i; 1571 1572 assert(emit->immediates_emitted); 1573 1574 /* Search immediates for x, y, z, w */ 1575 for (i = startIndex; i < endIndex; i++) { 1576 if (x.Int == emit->immediates[i][0].Int || 1577 x.Int == emit->immediates[i][1].Int || 1578 x.Int == emit->immediates[i][2].Int || 1579 x.Int == emit->immediates[i][3].Int) { 1580 return i; 1581 } 1582 } 1583 /* Should never try to use an immediate value that wasn't pre-declared */ 1584 assert(!"find_immediate() failed!"); 1585 return -1; 1586} 1587 1588 1589/** 1590 * Return a tgsi_full_src_register for an immediate/literal 1591 * union tgsi_immediate_data[4] value. 1592 * Note: the values must have been previously declared/allocated in 1593 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same 1594 * vec4 immediate. 1595 */ 1596static struct tgsi_full_src_register 1597make_immediate_reg_4(struct svga_shader_emitter_v10 *emit, 1598 const union tgsi_immediate_data imm[4]) 1599{ 1600 struct tgsi_full_src_register reg; 1601 unsigned i; 1602 1603 for (i = 0; i < emit->num_common_immediates; i++) { 1604 /* search for first component value */ 1605 int immpos = find_immediate(emit, imm[0], i); 1606 int x, y, z, w; 1607 1608 assert(immpos >= 0); 1609 1610 /* find remaining components within the immediate vector */ 1611 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]); 1612 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]); 1613 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]); 1614 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]); 1615 1616 if (x >=0 && y >= 0 && z >= 0 && w >= 0) { 1617 /* found them all */ 1618 memset(®, 0, sizeof(reg)); 1619 reg.Register.File = TGSI_FILE_IMMEDIATE; 1620 reg.Register.Index = immpos; 1621 reg.Register.SwizzleX = x; 1622 reg.Register.SwizzleY = y; 1623 reg.Register.SwizzleZ = z; 1624 reg.Register.SwizzleW = w; 1625 return reg; 1626 } 1627 /* else, keep searching */ 1628 } 1629 1630 assert(!"Failed to find immediate register!"); 1631 1632 /* Just return IMM[0].xxxx */ 1633 memset(®, 0, sizeof(reg)); 1634 reg.Register.File = TGSI_FILE_IMMEDIATE; 1635 return reg; 1636} 1637 1638 1639/** 1640 * Return a tgsi_full_src_register for an immediate/literal 1641 * union tgsi_immediate_data value of the form {value, value, value, value}. 1642 * \sa make_immediate_reg_4() regarding allowed values. 1643 */ 1644static struct tgsi_full_src_register 1645make_immediate_reg(struct svga_shader_emitter_v10 *emit, 1646 union tgsi_immediate_data value) 1647{ 1648 struct tgsi_full_src_register reg; 1649 int immpos = find_immediate(emit, value, 0); 1650 1651 assert(immpos >= 0); 1652 1653 memset(®, 0, sizeof(reg)); 1654 reg.Register.File = TGSI_FILE_IMMEDIATE; 1655 reg.Register.Index = immpos; 1656 reg.Register.SwizzleX = 1657 reg.Register.SwizzleY = 1658 reg.Register.SwizzleZ = 1659 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value); 1660 1661 return reg; 1662} 1663 1664 1665/** 1666 * Return a tgsi_full_src_register for an immediate/literal float[4] value. 1667 * \sa make_immediate_reg_4() regarding allowed values. 1668 */ 1669static struct tgsi_full_src_register 1670make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit, 1671 float x, float y, float z, float w) 1672{ 1673 union tgsi_immediate_data imm[4]; 1674 imm[0].Float = x; 1675 imm[1].Float = y; 1676 imm[2].Float = z; 1677 imm[3].Float = w; 1678 return make_immediate_reg_4(emit, imm); 1679} 1680 1681 1682/** 1683 * Return a tgsi_full_src_register for an immediate/literal float value 1684 * of the form {value, value, value, value}. 1685 * \sa make_immediate_reg_4() regarding allowed values. 1686 */ 1687static struct tgsi_full_src_register 1688make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value) 1689{ 1690 union tgsi_immediate_data imm; 1691 imm.Float = value; 1692 return make_immediate_reg(emit, imm); 1693} 1694 1695 1696/** 1697 * Return a tgsi_full_src_register for an immediate/literal int[4] vector. 1698 */ 1699static struct tgsi_full_src_register 1700make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit, 1701 int x, int y, int z, int w) 1702{ 1703 union tgsi_immediate_data imm[4]; 1704 imm[0].Int = x; 1705 imm[1].Int = y; 1706 imm[2].Int = z; 1707 imm[3].Int = w; 1708 return make_immediate_reg_4(emit, imm); 1709} 1710 1711 1712/** 1713 * Return a tgsi_full_src_register for an immediate/literal int value 1714 * of the form {value, value, value, value}. 1715 * \sa make_immediate_reg_4() regarding allowed values. 1716 */ 1717static struct tgsi_full_src_register 1718make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value) 1719{ 1720 union tgsi_immediate_data imm; 1721 imm.Int = value; 1722 return make_immediate_reg(emit, imm); 1723} 1724 1725 1726/** 1727 * Allocate space for a union tgsi_immediate_data[4] immediate. 1728 * \return the index/position of the immediate. 1729 */ 1730static unsigned 1731alloc_immediate_4(struct svga_shader_emitter_v10 *emit, 1732 const union tgsi_immediate_data imm[4]) 1733{ 1734 unsigned n = emit->num_immediates++; 1735 assert(!emit->immediates_emitted); 1736 assert(n < Elements(emit->immediates)); 1737 emit->immediates[n][0] = imm[0]; 1738 emit->immediates[n][1] = imm[1]; 1739 emit->immediates[n][2] = imm[2]; 1740 emit->immediates[n][3] = imm[3]; 1741 return n; 1742} 1743 1744 1745/** 1746 * Allocate space for a float[4] immediate. 1747 * \return the index/position of the immediate. 1748 */ 1749static unsigned 1750alloc_immediate_float4(struct svga_shader_emitter_v10 *emit, 1751 float x, float y, float z, float w) 1752{ 1753 union tgsi_immediate_data imm[4]; 1754 imm[0].Float = x; 1755 imm[1].Float = y; 1756 imm[2].Float = z; 1757 imm[3].Float = w; 1758 return alloc_immediate_4(emit, imm); 1759} 1760 1761 1762/** 1763 * Allocate space for a int[4] immediate. 1764 * \return the index/position of the immediate. 1765 */ 1766static unsigned 1767alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, 1768 int x, int y, int z, int w) 1769{ 1770 union tgsi_immediate_data imm[4]; 1771 imm[0].Int = x; 1772 imm[1].Int = y; 1773 imm[2].Int = z; 1774 imm[3].Int = w; 1775 return alloc_immediate_4(emit, imm); 1776} 1777 1778 1779/** 1780 * Allocate a shader input to store a system value. 1781 */ 1782static unsigned 1783alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) 1784{ 1785 const unsigned n = emit->info.num_inputs + index; 1786 assert(index < Elements(emit->system_value_indexes)); 1787 emit->system_value_indexes[index] = n; 1788 return n; 1789} 1790 1791 1792/** 1793 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10. 1794 */ 1795static boolean 1796emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, 1797 const struct tgsi_full_immediate *imm) 1798{ 1799 /* We don't actually emit any code here. We just save the 1800 * immediate values and emit them later. 1801 */ 1802 alloc_immediate_4(emit, imm->u); 1803 return TRUE; 1804} 1805 1806 1807/** 1808 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block 1809 * containing all the immediate values previously allocated 1810 * with alloc_immediate_4(). 1811 */ 1812static boolean 1813emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit) 1814{ 1815 VGPU10OpcodeToken0 token; 1816 1817 assert(!emit->immediates_emitted); 1818 1819 token.value = 0; 1820 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA; 1821 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER; 1822 1823 /* Note: no begin/end_emit_instruction() calls */ 1824 emit_dword(emit, token.value); 1825 emit_dword(emit, 2 + 4 * emit->num_immediates); 1826 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates); 1827 1828 emit->immediates_emitted = TRUE; 1829 1830 return TRUE; 1831} 1832 1833 1834/** 1835 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10 1836 * interpolation mode. 1837 * \return a VGPU10_INTERPOLATION_x value 1838 */ 1839static unsigned 1840translate_interpolation(const struct svga_shader_emitter_v10 *emit, 1841 unsigned interp, unsigned interpolate_loc) 1842{ 1843 if (interp == TGSI_INTERPOLATE_COLOR) { 1844 interp = emit->key.fs.flatshade ? 1845 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE; 1846 } 1847 1848 switch (interp) { 1849 case TGSI_INTERPOLATE_CONSTANT: 1850 return VGPU10_INTERPOLATION_CONSTANT; 1851 case TGSI_INTERPOLATE_LINEAR: 1852 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1853 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID : 1854 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; 1855 case TGSI_INTERPOLATE_PERSPECTIVE: 1856 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1857 VGPU10_INTERPOLATION_LINEAR_CENTROID : 1858 VGPU10_INTERPOLATION_LINEAR; 1859 default: 1860 assert(!"Unexpected interpolation mode"); 1861 return VGPU10_INTERPOLATION_CONSTANT; 1862 } 1863} 1864 1865 1866/** 1867 * Translate a TGSI property to VGPU10. 1868 * Don't emit any instructions yet, only need to gather the primitive property information. 1869 * The output primitive topology might be changed later. The final property instructions 1870 * will be emitted as part of the pre-helper code. 1871 */ 1872static boolean 1873emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, 1874 const struct tgsi_full_property *prop) 1875{ 1876 static const VGPU10_PRIMITIVE primType[] = { 1877 VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */ 1878 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */ 1879 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */ 1880 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */ 1881 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */ 1882 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */ 1883 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */ 1884 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */ 1885 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1886 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1887 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1888 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1889 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1890 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1891 }; 1892 1893 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = { 1894 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */ 1895 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */ 1896 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */ 1897 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */ 1898 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */ 1899 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */ 1900 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */ 1901 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */ 1902 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1903 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1904 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1905 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1906 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1907 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1908 }; 1909 1910 static const unsigned inputArraySize[] = { 1911 0, /* VGPU10_PRIMITIVE_UNDEFINED */ 1912 1, /* VGPU10_PRIMITIVE_POINT */ 1913 2, /* VGPU10_PRIMITIVE_LINE */ 1914 3, /* VGPU10_PRIMITIVE_TRIANGLE */ 1915 0, 1916 0, 1917 4, /* VGPU10_PRIMITIVE_LINE_ADJ */ 1918 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */ 1919 }; 1920 1921 switch (prop->Property.PropertyName) { 1922 case TGSI_PROPERTY_GS_INPUT_PRIM: 1923 assert(prop->u[0].Data < Elements(primType)); 1924 emit->gs.prim_type = primType[prop->u[0].Data]; 1925 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED); 1926 emit->gs.input_size = inputArraySize[emit->gs.prim_type]; 1927 break; 1928 1929 case TGSI_PROPERTY_GS_OUTPUT_PRIM: 1930 assert(prop->u[0].Data < Elements(primTopology)); 1931 emit->gs.prim_topology = primTopology[prop->u[0].Data]; 1932 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED); 1933 break; 1934 1935 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: 1936 emit->gs.max_out_vertices = prop->u[0].Data; 1937 break; 1938 1939 default: 1940 break; 1941 } 1942 1943 return TRUE; 1944} 1945 1946 1947static void 1948emit_property_instruction(struct svga_shader_emitter_v10 *emit, 1949 VGPU10OpcodeToken0 opcode0, unsigned nData, 1950 unsigned data) 1951{ 1952 begin_emit_instruction(emit); 1953 emit_dword(emit, opcode0.value); 1954 if (nData) 1955 emit_dword(emit, data); 1956 end_emit_instruction(emit); 1957} 1958 1959 1960/** 1961 * Emit property instructions 1962 */ 1963static void 1964emit_property_instructions(struct svga_shader_emitter_v10 *emit) 1965{ 1966 VGPU10OpcodeToken0 opcode0; 1967 1968 assert(emit->unit == PIPE_SHADER_GEOMETRY); 1969 1970 /* emit input primitive type declaration */ 1971 opcode0.value = 0; 1972 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE; 1973 opcode0.primitive = emit->gs.prim_type; 1974 emit_property_instruction(emit, opcode0, 0, 0); 1975 1976 /* emit output primitive topology declaration */ 1977 opcode0.value = 0; 1978 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; 1979 opcode0.primitiveTopology = emit->gs.prim_topology; 1980 emit_property_instruction(emit, opcode0, 0, 0); 1981 1982 /* emit max output vertices */ 1983 opcode0.value = 0; 1984 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT; 1985 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices); 1986} 1987 1988 1989/** 1990 * Emit a vgpu10 declaration "instruction". 1991 * \param index the register index 1992 * \param size array size of the operand. In most cases, it is 1, 1993 * but for inputs to geometry shader, the array size varies 1994 * depending on the primitive type. 1995 */ 1996static void 1997emit_decl_instruction(struct svga_shader_emitter_v10 *emit, 1998 VGPU10OpcodeToken0 opcode0, 1999 VGPU10OperandToken0 operand0, 2000 VGPU10NameToken name_token, 2001 unsigned index, unsigned size) 2002{ 2003 assert(opcode0.opcodeType); 2004 assert(operand0.mask); 2005 2006 begin_emit_instruction(emit); 2007 emit_dword(emit, opcode0.value); 2008 2009 emit_dword(emit, operand0.value); 2010 2011 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { 2012 /* Next token is the index of the register to declare */ 2013 emit_dword(emit, index); 2014 } 2015 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { 2016 /* Next token is the size of the register */ 2017 emit_dword(emit, size); 2018 2019 /* Followed by the index of the register */ 2020 emit_dword(emit, index); 2021 } 2022 2023 if (name_token.value) { 2024 emit_dword(emit, name_token.value); 2025 } 2026 2027 end_emit_instruction(emit); 2028} 2029 2030 2031/** 2032 * Emit the declaration for a shader input. 2033 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx 2034 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x 2035 * \param dim index dimension 2036 * \param index the input register index 2037 * \param size array size of the operand. In most cases, it is 1, 2038 * but for inputs to geometry shader, the array size varies 2039 * depending on the primitive type. 2040 * \param name one of VGPU10_NAME_x 2041 * \parma numComp number of components 2042 * \param selMode component selection mode 2043 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2044 * \param interpMode interpolation mode 2045 */ 2046static void 2047emit_input_declaration(struct svga_shader_emitter_v10 *emit, 2048 unsigned opcodeType, unsigned operandType, 2049 unsigned dim, unsigned index, unsigned size, 2050 unsigned name, unsigned numComp, 2051 unsigned selMode, unsigned usageMask, 2052 unsigned interpMode) 2053{ 2054 VGPU10OpcodeToken0 opcode0; 2055 VGPU10OperandToken0 operand0; 2056 VGPU10NameToken name_token; 2057 2058 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2059 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || 2060 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || 2061 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || 2062 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); 2063 assert(operandType == VGPU10_OPERAND_TYPE_INPUT || 2064 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID); 2065 assert(numComp <= VGPU10_OPERAND_4_COMPONENT); 2066 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); 2067 assert(dim <= VGPU10_OPERAND_INDEX_3D); 2068 assert(name == VGPU10_NAME_UNDEFINED || 2069 name == VGPU10_NAME_POSITION || 2070 name == VGPU10_NAME_INSTANCE_ID || 2071 name == VGPU10_NAME_VERTEX_ID || 2072 name == VGPU10_NAME_PRIMITIVE_ID || 2073 name == VGPU10_NAME_IS_FRONT_FACE); 2074 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || 2075 interpMode == VGPU10_INTERPOLATION_CONSTANT || 2076 interpMode == VGPU10_INTERPOLATION_LINEAR || 2077 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || 2078 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || 2079 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID); 2080 2081 check_register_index(emit, opcodeType, index); 2082 2083 opcode0.value = operand0.value = name_token.value = 0; 2084 2085 opcode0.opcodeType = opcodeType; 2086 opcode0.interpolationMode = interpMode; 2087 2088 operand0.operandType = operandType; 2089 operand0.numComponents = numComp; 2090 operand0.selectionMode = selMode; 2091 operand0.mask = usageMask; 2092 operand0.indexDimension = dim; 2093 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2094 if (dim == VGPU10_OPERAND_INDEX_2D) 2095 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2096 2097 name_token.name = name; 2098 2099 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); 2100} 2101 2102 2103/** 2104 * Emit the declaration for a shader output. 2105 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx 2106 * \param index the output register index 2107 * \param name one of VGPU10_NAME_x 2108 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2109 */ 2110static void 2111emit_output_declaration(struct svga_shader_emitter_v10 *emit, 2112 unsigned type, unsigned index, 2113 unsigned name, unsigned usageMask) 2114{ 2115 VGPU10OpcodeToken0 opcode0; 2116 VGPU10OperandToken0 operand0; 2117 VGPU10NameToken name_token; 2118 2119 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2120 assert(type == VGPU10_OPCODE_DCL_OUTPUT || 2121 type == VGPU10_OPCODE_DCL_OUTPUT_SGV || 2122 type == VGPU10_OPCODE_DCL_OUTPUT_SIV); 2123 assert(name == VGPU10_NAME_UNDEFINED || 2124 name == VGPU10_NAME_POSITION || 2125 name == VGPU10_NAME_PRIMITIVE_ID || 2126 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || 2127 name == VGPU10_NAME_CLIP_DISTANCE); 2128 2129 check_register_index(emit, type, index); 2130 2131 opcode0.value = operand0.value = name_token.value = 0; 2132 2133 opcode0.opcodeType = type; 2134 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; 2135 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2136 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2137 operand0.mask = usageMask; 2138 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2139 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2140 2141 name_token.name = name; 2142 2143 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); 2144} 2145 2146 2147/** 2148 * Emit the declaration for the fragment depth output. 2149 */ 2150static void 2151emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) 2152{ 2153 VGPU10OpcodeToken0 opcode0; 2154 VGPU10OperandToken0 operand0; 2155 VGPU10NameToken name_token; 2156 2157 assert(emit->unit == PIPE_SHADER_FRAGMENT); 2158 2159 opcode0.value = operand0.value = name_token.value = 0; 2160 2161 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; 2162 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 2163 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 2164 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 2165 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2166 2167 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); 2168} 2169 2170 2171/** 2172 * Emit the declaration for a system value input/output. 2173 */ 2174static void 2175emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, 2176 unsigned semantic_name, unsigned index) 2177{ 2178 switch (semantic_name) { 2179 case TGSI_SEMANTIC_INSTANCEID: 2180 index = alloc_system_value_index(emit, index); 2181 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2182 VGPU10_OPERAND_TYPE_INPUT, 2183 VGPU10_OPERAND_INDEX_1D, 2184 index, 1, 2185 VGPU10_NAME_INSTANCE_ID, 2186 VGPU10_OPERAND_4_COMPONENT, 2187 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2188 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2189 VGPU10_INTERPOLATION_UNDEFINED); 2190 break; 2191 case TGSI_SEMANTIC_VERTEXID: 2192 index = alloc_system_value_index(emit, index); 2193 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2194 VGPU10_OPERAND_TYPE_INPUT, 2195 VGPU10_OPERAND_INDEX_1D, 2196 index, 1, 2197 VGPU10_NAME_VERTEX_ID, 2198 VGPU10_OPERAND_4_COMPONENT, 2199 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2200 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2201 VGPU10_INTERPOLATION_UNDEFINED); 2202 break; 2203 default: 2204 ; /* XXX */ 2205 } 2206} 2207 2208/** 2209 * Translate a TGSI declaration to VGPU10. 2210 */ 2211static boolean 2212emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, 2213 const struct tgsi_full_declaration *decl) 2214{ 2215 switch (decl->Declaration.File) { 2216 case TGSI_FILE_INPUT: 2217 /* do nothing - see emit_input_declarations() */ 2218 return TRUE; 2219 2220 case TGSI_FILE_OUTPUT: 2221 assert(decl->Range.First == decl->Range.Last); 2222 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; 2223 return TRUE; 2224 2225 case TGSI_FILE_TEMPORARY: 2226 /* Don't declare the temps here. Just keep track of how many 2227 * and emit the declaration later. 2228 */ 2229 if (decl->Declaration.Array) { 2230 /* Indexed temporary array. Save the start index of the array 2231 * and the size of the array. 2232 */ 2233 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); 2234 unsigned i; 2235 2236 assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); 2237 2238 /* Save this array so we can emit the declaration for it later */ 2239 emit->temp_arrays[arrayID].start = decl->Range.First; 2240 emit->temp_arrays[arrayID].size = 2241 decl->Range.Last - decl->Range.First + 1; 2242 2243 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); 2244 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); 2245 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); 2246 2247 /* Fill in the temp_map entries for this array */ 2248 for (i = decl->Range.First; i <= decl->Range.Last; i++) { 2249 emit->temp_map[i].arrayId = arrayID; 2250 emit->temp_map[i].index = i - decl->Range.First; 2251 } 2252 } 2253 2254 /* for all temps, indexed or not, keep track of highest index */ 2255 emit->num_shader_temps = MAX2(emit->num_shader_temps, 2256 decl->Range.Last + 1); 2257 return TRUE; 2258 2259 case TGSI_FILE_CONSTANT: 2260 /* Don't declare constants here. Just keep track and emit later. */ 2261 { 2262 unsigned constbuf = 0, num_consts; 2263 if (decl->Declaration.Dimension) { 2264 constbuf = decl->Dim.Index2D; 2265 } 2266 /* We throw an assertion here when, in fact, the shader should never 2267 * have linked due to constbuf index out of bounds, so we shouldn't 2268 * have reached here. 2269 */ 2270 assert(constbuf < Elements(emit->num_shader_consts)); 2271 2272 num_consts = MAX2(emit->num_shader_consts[constbuf], 2273 decl->Range.Last + 1); 2274 2275 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 2276 debug_printf("Warning: constant buffer is declared to size [%u]" 2277 " but [%u] is the limit.\n", 2278 num_consts, 2279 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2280 } 2281 /* The linker doesn't enforce the max UBO size so we clamp here */ 2282 emit->num_shader_consts[constbuf] = 2283 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2284 } 2285 return TRUE; 2286 2287 case TGSI_FILE_IMMEDIATE: 2288 assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); 2289 return FALSE; 2290 2291 case TGSI_FILE_SYSTEM_VALUE: 2292 emit_system_value_declaration(emit, decl->Semantic.Name, 2293 decl->Range.First); 2294 return TRUE; 2295 2296 case TGSI_FILE_SAMPLER: 2297 /* Don't declare samplers here. Just keep track and emit later. */ 2298 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); 2299 return TRUE; 2300 2301 case TGSI_FILE_RESOURCE: 2302 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ 2303 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ 2304 assert(!"TGSI_FILE_RESOURCE not handled yet"); 2305 return FALSE; 2306 2307 case TGSI_FILE_ADDRESS: 2308 emit->num_address_regs = MAX2(emit->num_address_regs, 2309 decl->Range.Last + 1); 2310 return TRUE; 2311 2312 case TGSI_FILE_SAMPLER_VIEW: 2313 /* Not used at this time, but maybe in the future. 2314 * See emit_resource_declarations(). 2315 */ 2316 return TRUE; 2317 2318 default: 2319 assert(!"Unexpected type of declaration"); 2320 return FALSE; 2321 } 2322} 2323 2324 2325 2326/** 2327 * Emit all input declarations. 2328 */ 2329static boolean 2330emit_input_declarations(struct svga_shader_emitter_v10 *emit) 2331{ 2332 unsigned i; 2333 2334 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2335 2336 for (i = 0; i < emit->linkage.num_inputs; i++) { 2337 unsigned semantic_name = emit->info.input_semantic_name[i]; 2338 unsigned usage_mask = emit->info.input_usage_mask[i]; 2339 unsigned index = emit->linkage.input_map[i]; 2340 unsigned type, interpolationMode, name; 2341 2342 if (usage_mask == 0) 2343 continue; /* register is not actually used */ 2344 2345 if (semantic_name == TGSI_SEMANTIC_POSITION) { 2346 /* fragment position input */ 2347 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2348 interpolationMode = VGPU10_INTERPOLATION_LINEAR; 2349 name = VGPU10_NAME_POSITION; 2350 if (usage_mask & TGSI_WRITEMASK_W) { 2351 /* we need to replace use of 'w' with '1/w' */ 2352 emit->fs.fragcoord_input_index = i; 2353 } 2354 } 2355 else if (semantic_name == TGSI_SEMANTIC_FACE) { 2356 /* fragment front-facing input */ 2357 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2358 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2359 name = VGPU10_NAME_IS_FRONT_FACE; 2360 emit->fs.face_input_index = i; 2361 } 2362 else if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2363 /* primitive ID */ 2364 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2365 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2366 name = VGPU10_NAME_PRIMITIVE_ID; 2367 } 2368 else { 2369 /* general fragment input */ 2370 type = VGPU10_OPCODE_DCL_INPUT_PS; 2371 interpolationMode = 2372 translate_interpolation(emit, 2373 emit->info.input_interpolate[i], 2374 emit->info.input_interpolate_loc[i]); 2375 2376 /* keeps track if flat interpolation mode is being used */ 2377 emit->uses_flat_interp = emit->uses_flat_interp || 2378 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); 2379 2380 name = VGPU10_NAME_UNDEFINED; 2381 } 2382 2383 emit_input_declaration(emit, type, 2384 VGPU10_OPERAND_TYPE_INPUT, 2385 VGPU10_OPERAND_INDEX_1D, index, 1, 2386 name, 2387 VGPU10_OPERAND_4_COMPONENT, 2388 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2389 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2390 interpolationMode); 2391 } 2392 } 2393 else if (emit->unit == PIPE_SHADER_GEOMETRY) { 2394 2395 for (i = 0; i < emit->info.num_inputs; i++) { 2396 unsigned semantic_name = emit->info.input_semantic_name[i]; 2397 unsigned usage_mask = emit->info.input_usage_mask[i]; 2398 unsigned index = emit->linkage.input_map[i]; 2399 unsigned opcodeType, operandType; 2400 unsigned numComp, selMode; 2401 unsigned name; 2402 unsigned dim; 2403 2404 if (usage_mask == 0) 2405 continue; /* register is not actually used */ 2406 2407 opcodeType = VGPU10_OPCODE_DCL_INPUT; 2408 operandType = VGPU10_OPERAND_TYPE_INPUT; 2409 numComp = VGPU10_OPERAND_4_COMPONENT; 2410 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2411 name = VGPU10_NAME_UNDEFINED; 2412 2413 /* all geometry shader inputs are two dimensional except gl_PrimitiveID */ 2414 dim = VGPU10_OPERAND_INDEX_2D; 2415 2416 if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2417 /* Primitive ID */ 2418 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 2419 dim = VGPU10_OPERAND_INDEX_0D; 2420 numComp = VGPU10_OPERAND_0_COMPONENT; 2421 selMode = 0; 2422 2423 /* also save the register index so we can check for 2424 * primitive id when emit src register. We need to modify the 2425 * operand type, index dimension when emit primitive id src reg. 2426 */ 2427 emit->gs.prim_id_index = i; 2428 } 2429 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2430 /* vertex position input */ 2431 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; 2432 name = VGPU10_NAME_POSITION; 2433 } 2434 2435 emit_input_declaration(emit, opcodeType, operandType, 2436 dim, index, 2437 emit->gs.input_size, 2438 name, 2439 numComp, selMode, 2440 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2441 VGPU10_INTERPOLATION_UNDEFINED); 2442 } 2443 } 2444 else { 2445 assert(emit->unit == PIPE_SHADER_VERTEX); 2446 2447 for (i = 0; i < emit->info.num_inputs; i++) { 2448 unsigned usage_mask = emit->info.input_usage_mask[i]; 2449 unsigned index = i; 2450 2451 if (usage_mask == 0) 2452 continue; /* register is not actually used */ 2453 2454 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 2455 VGPU10_OPERAND_TYPE_INPUT, 2456 VGPU10_OPERAND_INDEX_1D, index, 1, 2457 VGPU10_NAME_UNDEFINED, 2458 VGPU10_OPERAND_4_COMPONENT, 2459 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2460 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2461 VGPU10_INTERPOLATION_UNDEFINED); 2462 } 2463 } 2464 2465 return TRUE; 2466} 2467 2468 2469/** 2470 * Emit all output declarations. 2471 */ 2472static boolean 2473emit_output_declarations(struct svga_shader_emitter_v10 *emit) 2474{ 2475 unsigned i; 2476 2477 for (i = 0; i < emit->info.num_outputs; i++) { 2478 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ 2479 const unsigned semantic_name = emit->info.output_semantic_name[i]; 2480 const unsigned semantic_index = emit->info.output_semantic_index[i]; 2481 unsigned index = i; 2482 2483 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2484 if (semantic_name == TGSI_SEMANTIC_COLOR) { 2485 assert(semantic_index < Elements(emit->fs.color_out_index)); 2486 2487 emit->fs.color_out_index[semantic_index] = index; 2488 2489 /* The semantic index is the shader's color output/buffer index */ 2490 emit_output_declaration(emit, 2491 VGPU10_OPCODE_DCL_OUTPUT, semantic_index, 2492 VGPU10_NAME_UNDEFINED, 2493 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2494 2495 if (semantic_index == 0) { 2496 if (emit->key.fs.write_color0_to_n_cbufs > 1) { 2497 /* Emit declarations for the additional color outputs 2498 * for broadcasting. 2499 */ 2500 unsigned j; 2501 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { 2502 /* Allocate a new output index */ 2503 unsigned idx = emit->info.num_outputs + j - 1; 2504 emit->fs.color_out_index[j] = idx; 2505 emit_output_declaration(emit, 2506 VGPU10_OPCODE_DCL_OUTPUT, idx, 2507 VGPU10_NAME_UNDEFINED, 2508 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2509 emit->info.output_semantic_index[idx] = j; 2510 } 2511 } 2512 } 2513 else { 2514 assert(!emit->key.fs.write_color0_to_n_cbufs); 2515 } 2516 } 2517 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2518 /* Fragment depth output */ 2519 emit_fragdepth_output_declaration(emit); 2520 } 2521 else { 2522 assert(!"Bad output semantic name"); 2523 } 2524 } 2525 else { 2526 /* VS or GS */ 2527 unsigned name, type; 2528 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2529 2530 switch (semantic_name) { 2531 case TGSI_SEMANTIC_POSITION: 2532 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2533 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2534 name = VGPU10_NAME_POSITION; 2535 /* Save the index of the vertex position output register */ 2536 emit->vposition.out_index = index; 2537 break; 2538 case TGSI_SEMANTIC_CLIPDIST: 2539 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2540 name = VGPU10_NAME_CLIP_DISTANCE; 2541 /* save the starting index of the clip distance output register */ 2542 if (semantic_index == 0) 2543 emit->clip_dist_out_index = index; 2544 writemask = emit->output_usage_mask[index]; 2545 writemask = apply_clip_plane_mask(emit, writemask, semantic_index); 2546 if (writemask == 0x0) { 2547 continue; /* discard this do-nothing declaration */ 2548 } 2549 break; 2550 case TGSI_SEMANTIC_PRIMID: 2551 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2552 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2553 name = VGPU10_NAME_PRIMITIVE_ID; 2554 break; 2555 case TGSI_SEMANTIC_LAYER: 2556 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2557 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2558 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; 2559 break; 2560 case TGSI_SEMANTIC_CLIPVERTEX: 2561 type = VGPU10_OPCODE_DCL_OUTPUT; 2562 name = VGPU10_NAME_UNDEFINED; 2563 emit->clip_vertex_out_index = index; 2564 break; 2565 default: 2566 /* generic output */ 2567 type = VGPU10_OPCODE_DCL_OUTPUT; 2568 name = VGPU10_NAME_UNDEFINED; 2569 } 2570 2571 emit_output_declaration(emit, type, index, name, writemask); 2572 } 2573 } 2574 2575 if (emit->vposition.so_index != INVALID_INDEX && 2576 emit->vposition.out_index != INVALID_INDEX) { 2577 2578 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2579 2580 /* Emit the declaration for the non-adjusted vertex position 2581 * for stream output purpose 2582 */ 2583 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2584 emit->vposition.so_index, 2585 VGPU10_NAME_UNDEFINED, 2586 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2587 } 2588 2589 if (emit->clip_dist_so_index != INVALID_INDEX && 2590 emit->clip_dist_out_index != INVALID_INDEX) { 2591 2592 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2593 2594 /* Emit the declaration for the clip distance shadow copy which 2595 * will be used for stream output purpose and for clip distance 2596 * varying variable 2597 */ 2598 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2599 emit->clip_dist_so_index, 2600 VGPU10_NAME_UNDEFINED, 2601 emit->output_usage_mask[emit->clip_dist_out_index]); 2602 2603 if (emit->info.num_written_clipdistance > 4) { 2604 /* for the second clip distance register, each handles 4 planes */ 2605 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2606 emit->clip_dist_so_index + 1, 2607 VGPU10_NAME_UNDEFINED, 2608 emit->output_usage_mask[emit->clip_dist_out_index+1]); 2609 } 2610 } 2611 2612 return TRUE; 2613} 2614 2615 2616/** 2617 * Emit the declaration for the temporary registers. 2618 */ 2619static boolean 2620emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) 2621{ 2622 unsigned total_temps, reg, i; 2623 2624 total_temps = emit->num_shader_temps; 2625 2626 /* Allocate extra temps for specially-implemented instructions, 2627 * such as LIT. 2628 */ 2629 total_temps += MAX_INTERNAL_TEMPS; 2630 2631 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { 2632 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || 2633 emit->key.clip_plane_enable || 2634 emit->vposition.so_index != INVALID_INDEX) { 2635 emit->vposition.tmp_index = total_temps; 2636 total_temps += 1; 2637 } 2638 2639 if (emit->unit == PIPE_SHADER_VERTEX) { 2640 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | 2641 emit->key.vs.adjust_attrib_itof | 2642 emit->key.vs.adjust_attrib_utof | 2643 emit->key.vs.attrib_is_bgra | 2644 emit->key.vs.attrib_puint_to_snorm | 2645 emit->key.vs.attrib_puint_to_uscaled | 2646 emit->key.vs.attrib_puint_to_sscaled); 2647 while (attrib_mask) { 2648 unsigned index = u_bit_scan(&attrib_mask); 2649 emit->vs.adjusted_input[index] = total_temps++; 2650 } 2651 } 2652 2653 if (emit->clip_mode == CLIP_DISTANCE) { 2654 /* We need to write the clip distance to a temporary register 2655 * first. Then it will be copied to the shadow copy for 2656 * the clip distance varying variable and stream output purpose. 2657 * It will also be copied to the actual CLIPDIST register 2658 * according to the enabled clip planes 2659 */ 2660 emit->clip_dist_tmp_index = total_temps++; 2661 if (emit->info.num_written_clipdistance > 4) 2662 total_temps++; /* second clip register */ 2663 } 2664 else if (emit->clip_mode == CLIP_VERTEX) { 2665 /* We need to convert the TGSI CLIPVERTEX output to one or more 2666 * clip distances. Allocate a temp reg for the clipvertex here. 2667 */ 2668 assert(emit->info.writes_clipvertex > 0); 2669 emit->clip_vertex_tmp_index = total_temps; 2670 total_temps++; 2671 } 2672 } 2673 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 2674 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS || 2675 emit->key.fs.white_fragments || 2676 emit->key.fs.write_color0_to_n_cbufs > 1) { 2677 /* Allocate a temp to hold the output color */ 2678 emit->fs.color_tmp_index = total_temps; 2679 total_temps += 1; 2680 } 2681 2682 if (emit->fs.face_input_index != INVALID_INDEX) { 2683 /* Allocate a temp for the +/-1 face register */ 2684 emit->fs.face_tmp_index = total_temps; 2685 total_temps += 1; 2686 } 2687 2688 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 2689 /* Allocate a temp for modified fragment position register */ 2690 emit->fs.fragcoord_tmp_index = total_temps; 2691 total_temps += 1; 2692 } 2693 } 2694 2695 for (i = 0; i < emit->num_address_regs; i++) { 2696 emit->address_reg_index[i] = total_temps++; 2697 } 2698 2699 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10 2700 * temp indexes. Basically, we compact all the non-array temp register 2701 * indexes into a consecutive series. 2702 * 2703 * Before, we may have some TGSI declarations like: 2704 * DCL TEMP[0..1], LOCAL 2705 * DCL TEMP[2..4], ARRAY(1), LOCAL 2706 * DCL TEMP[5..7], ARRAY(2), LOCAL 2707 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things 2708 * 2709 * After, we'll have a map like this: 2710 * temp_map[0] = { array 0, index 0 } 2711 * temp_map[1] = { array 0, index 1 } 2712 * temp_map[2] = { array 1, index 0 } 2713 * temp_map[3] = { array 1, index 1 } 2714 * temp_map[4] = { array 1, index 2 } 2715 * temp_map[5] = { array 2, index 0 } 2716 * temp_map[6] = { array 2, index 1 } 2717 * temp_map[7] = { array 2, index 2 } 2718 * temp_map[8] = { array 0, index 2 } 2719 * temp_map[9] = { array 0, index 3 } 2720 * 2721 * We'll declare two arrays of 3 elements, plus a set of four non-indexed 2722 * temps numbered 0..3 2723 * 2724 * Any time we emit a temporary register index, we'll have to use the 2725 * temp_map[] table to convert the TGSI index to the VGPU10 index. 2726 * 2727 * Finally, we recompute the total_temps value here. 2728 */ 2729 reg = 0; 2730 for (i = 0; i < total_temps; i++) { 2731 if (emit->temp_map[i].arrayId == 0) { 2732 emit->temp_map[i].index = reg++; 2733 } 2734 } 2735 total_temps = reg; 2736 2737 if (0) { 2738 debug_printf("total_temps %u\n", total_temps); 2739 for (i = 0; i < 30; i++) { 2740 debug_printf("temp %u -> array %u index %u\n", 2741 i, emit->temp_map[i].arrayId, emit->temp_map[i].index); 2742 } 2743 } 2744 2745 /* Emit declaration of ordinary temp registers */ 2746 if (total_temps > 0) { 2747 VGPU10OpcodeToken0 opcode0; 2748 2749 opcode0.value = 0; 2750 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS; 2751 2752 begin_emit_instruction(emit); 2753 emit_dword(emit, opcode0.value); 2754 emit_dword(emit, total_temps); 2755 end_emit_instruction(emit); 2756 } 2757 2758 /* Emit declarations for indexable temp arrays. Skip 0th entry since 2759 * it's unused. 2760 */ 2761 for (i = 1; i < emit->num_temp_arrays; i++) { 2762 unsigned num_temps = emit->temp_arrays[i].size; 2763 2764 if (num_temps > 0) { 2765 VGPU10OpcodeToken0 opcode0; 2766 2767 opcode0.value = 0; 2768 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP; 2769 2770 begin_emit_instruction(emit); 2771 emit_dword(emit, opcode0.value); 2772 emit_dword(emit, i); /* which array */ 2773 emit_dword(emit, num_temps); 2774 emit_dword(emit, 4); /* num components */ 2775 end_emit_instruction(emit); 2776 2777 total_temps += num_temps; 2778 } 2779 } 2780 2781 /* Check that the grand total of all regular and indexed temps is 2782 * under the limit. 2783 */ 2784 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1); 2785 2786 return TRUE; 2787} 2788 2789 2790static boolean 2791emit_constant_declaration(struct svga_shader_emitter_v10 *emit) 2792{ 2793 VGPU10OpcodeToken0 opcode0; 2794 VGPU10OperandToken0 operand0; 2795 unsigned total_consts, i; 2796 2797 opcode0.value = 0; 2798 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER; 2799 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED; 2800 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */ 2801 2802 operand0.value = 0; 2803 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2804 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D; 2805 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2806 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2807 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 2808 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 2809 operand0.swizzleX = 0; 2810 operand0.swizzleY = 1; 2811 operand0.swizzleZ = 2; 2812 operand0.swizzleW = 3; 2813 2814 /** 2815 * Emit declaration for constant buffer [0]. We also allocate 2816 * room for the extra constants here. 2817 */ 2818 total_consts = emit->num_shader_consts[0]; 2819 2820 /* Now, allocate constant slots for the "extra" constants */ 2821 2822 /* Vertex position scale/translation */ 2823 if (emit->vposition.need_prescale) { 2824 emit->vposition.prescale_scale_index = total_consts++; 2825 emit->vposition.prescale_trans_index = total_consts++; 2826 } 2827 2828 if (emit->unit == PIPE_SHADER_VERTEX) { 2829 if (emit->key.vs.undo_viewport) { 2830 emit->vs.viewport_index = total_consts++; 2831 } 2832 } 2833 2834 /* user-defined clip planes */ 2835 if (emit->key.clip_plane_enable) { 2836 unsigned n = util_bitcount(emit->key.clip_plane_enable); 2837 assert(emit->unit == PIPE_SHADER_VERTEX || 2838 emit->unit == PIPE_SHADER_GEOMETRY); 2839 for (i = 0; i < n; i++) { 2840 emit->clip_plane_const[i] = total_consts++; 2841 } 2842 } 2843 2844 /* Texcoord scale factors for RECT textures */ 2845 { 2846 for (i = 0; i < emit->num_samplers; i++) { 2847 if (emit->key.tex[i].unnormalized) { 2848 emit->texcoord_scale_index[i] = total_consts++; 2849 } 2850 } 2851 } 2852 2853 /* Texture buffer sizes */ 2854 for (i = 0; i < emit->num_samplers; i++) { 2855 if (emit->key.tex[i].texture_target == PIPE_BUFFER) { 2856 emit->texture_buffer_size_index[i] = total_consts++; 2857 } 2858 } 2859 2860 if (total_consts > 0) { 2861 begin_emit_instruction(emit); 2862 emit_dword(emit, opcode0.value); 2863 emit_dword(emit, operand0.value); 2864 emit_dword(emit, 0); /* which const buffer slot */ 2865 emit_dword(emit, total_consts); 2866 end_emit_instruction(emit); 2867 } 2868 2869 /* Declare remaining constant buffers (UBOs) */ 2870 for (i = 1; i < Elements(emit->num_shader_consts); i++) { 2871 if (emit->num_shader_consts[i] > 0) { 2872 begin_emit_instruction(emit); 2873 emit_dword(emit, opcode0.value); 2874 emit_dword(emit, operand0.value); 2875 emit_dword(emit, i); /* which const buffer slot */ 2876 emit_dword(emit, emit->num_shader_consts[i]); 2877 end_emit_instruction(emit); 2878 } 2879 } 2880 2881 return TRUE; 2882} 2883 2884 2885/** 2886 * Emit declarations for samplers. 2887 */ 2888static boolean 2889emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) 2890{ 2891 unsigned i; 2892 2893 for (i = 0; i < emit->num_samplers; i++) { 2894 VGPU10OpcodeToken0 opcode0; 2895 VGPU10OperandToken0 operand0; 2896 2897 opcode0.value = 0; 2898 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER; 2899 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT; 2900 2901 operand0.value = 0; 2902 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 2903 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 2904 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2905 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2906 2907 begin_emit_instruction(emit); 2908 emit_dword(emit, opcode0.value); 2909 emit_dword(emit, operand0.value); 2910 emit_dword(emit, i); 2911 end_emit_instruction(emit); 2912 } 2913 2914 return TRUE; 2915} 2916 2917 2918/** 2919 * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x. 2920 */ 2921static unsigned 2922pipe_texture_to_resource_dimension(unsigned target, bool msaa) 2923{ 2924 switch (target) { 2925 case PIPE_BUFFER: 2926 return VGPU10_RESOURCE_DIMENSION_BUFFER; 2927 case PIPE_TEXTURE_1D: 2928 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2929 case PIPE_TEXTURE_2D: 2930 case PIPE_TEXTURE_RECT: 2931 return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS 2932 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2933 case PIPE_TEXTURE_3D: 2934 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; 2935 case PIPE_TEXTURE_CUBE: 2936 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 2937 case PIPE_TEXTURE_1D_ARRAY: 2938 return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY; 2939 case PIPE_TEXTURE_2D_ARRAY: 2940 return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY 2941 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY; 2942 case PIPE_TEXTURE_CUBE_ARRAY: 2943 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY; 2944 default: 2945 assert(!"Unexpected resource type"); 2946 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2947 } 2948} 2949 2950 2951/** 2952 * Given a tgsi_return_type, return true iff it is an integer type. 2953 */ 2954static boolean 2955is_integer_type(enum tgsi_return_type type) 2956{ 2957 switch (type) { 2958 case TGSI_RETURN_TYPE_SINT: 2959 case TGSI_RETURN_TYPE_UINT: 2960 return TRUE; 2961 case TGSI_RETURN_TYPE_FLOAT: 2962 case TGSI_RETURN_TYPE_UNORM: 2963 case TGSI_RETURN_TYPE_SNORM: 2964 return FALSE; 2965 case TGSI_RETURN_TYPE_COUNT: 2966 default: 2967 assert(!"is_integer_type: Unknown tgsi_return_type"); 2968 return FALSE; 2969 } 2970} 2971 2972 2973/** 2974 * Emit declarations for resources. 2975 * XXX When we're sure that all TGSI shaders will be generated with 2976 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may 2977 * rework this code. 2978 */ 2979static boolean 2980emit_resource_declarations(struct svga_shader_emitter_v10 *emit) 2981{ 2982 unsigned i; 2983 2984 /* Emit resource decl for each sampler */ 2985 for (i = 0; i < emit->num_samplers; i++) { 2986 VGPU10OpcodeToken0 opcode0; 2987 VGPU10OperandToken0 operand0; 2988 VGPU10ResourceReturnTypeToken return_type; 2989 VGPU10_RESOURCE_RETURN_TYPE rt; 2990 2991 opcode0.value = 0; 2992 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; 2993 opcode0.resourceDimension = 2994 pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target, 2995 emit->key.tex[i].texture_msaa); 2996 operand0.value = 0; 2997 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 2998 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 2999 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 3000 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3001 3002#if 1 3003 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */ 3004 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1); 3005 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1); 3006 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1); 3007 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1); 3008 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1); 3009 assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT); 3010 rt = emit->key.tex[i].return_type + 1; 3011#else 3012 switch (emit->key.tex[i].return_type) { 3013 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break; 3014 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break; 3015 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break; 3016 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break; 3017 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break; 3018 case TGSI_RETURN_TYPE_COUNT: 3019 default: 3020 rt = VGPU10_RETURN_TYPE_FLOAT; 3021 assert(!"emit_resource_declarations: Unknown tgsi_return_type"); 3022 } 3023#endif 3024 3025 return_type.value = 0; 3026 return_type.component0 = rt; 3027 return_type.component1 = rt; 3028 return_type.component2 = rt; 3029 return_type.component3 = rt; 3030 3031 begin_emit_instruction(emit); 3032 emit_dword(emit, opcode0.value); 3033 emit_dword(emit, operand0.value); 3034 emit_dword(emit, i); 3035 emit_dword(emit, return_type.value); 3036 end_emit_instruction(emit); 3037 } 3038 3039 return TRUE; 3040} 3041 3042static void 3043emit_instruction_op1(struct svga_shader_emitter_v10 *emit, 3044 unsigned opcode, 3045 const struct tgsi_full_dst_register *dst, 3046 const struct tgsi_full_src_register *src, 3047 boolean saturate) 3048{ 3049 begin_emit_instruction(emit); 3050 emit_opcode(emit, opcode, saturate); 3051 emit_dst_register(emit, dst); 3052 emit_src_register(emit, src); 3053 end_emit_instruction(emit); 3054} 3055 3056static void 3057emit_instruction_op2(struct svga_shader_emitter_v10 *emit, 3058 unsigned opcode, 3059 const struct tgsi_full_dst_register *dst, 3060 const struct tgsi_full_src_register *src1, 3061 const struct tgsi_full_src_register *src2, 3062 boolean saturate) 3063{ 3064 begin_emit_instruction(emit); 3065 emit_opcode(emit, opcode, saturate); 3066 emit_dst_register(emit, dst); 3067 emit_src_register(emit, src1); 3068 emit_src_register(emit, src2); 3069 end_emit_instruction(emit); 3070} 3071 3072static void 3073emit_instruction_op3(struct svga_shader_emitter_v10 *emit, 3074 unsigned opcode, 3075 const struct tgsi_full_dst_register *dst, 3076 const struct tgsi_full_src_register *src1, 3077 const struct tgsi_full_src_register *src2, 3078 const struct tgsi_full_src_register *src3, 3079 boolean saturate) 3080{ 3081 begin_emit_instruction(emit); 3082 emit_opcode(emit, opcode, saturate); 3083 emit_dst_register(emit, dst); 3084 emit_src_register(emit, src1); 3085 emit_src_register(emit, src2); 3086 emit_src_register(emit, src3); 3087 end_emit_instruction(emit); 3088} 3089 3090/** 3091 * Emit the actual clip distance instructions to be used for clipping 3092 * by copying the clip distance from the temporary registers to the 3093 * CLIPDIST registers written with the enabled planes mask. 3094 * Also copy the clip distance from the temporary to the clip distance 3095 * shadow copy register which will be referenced by the input shader 3096 */ 3097static void 3098emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) 3099{ 3100 struct tgsi_full_src_register tmp_clip_dist_src; 3101 struct tgsi_full_dst_register clip_dist_dst; 3102 3103 unsigned i; 3104 unsigned clip_plane_enable = emit->key.clip_plane_enable; 3105 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; 3106 int num_written_clipdist = emit->info.num_written_clipdistance; 3107 3108 assert(emit->clip_dist_out_index != INVALID_INDEX); 3109 assert(emit->clip_dist_tmp_index != INVALID_INDEX); 3110 3111 /** 3112 * Temporary reset the temporary clip dist register index so 3113 * that the copy to the real clip dist register will not 3114 * attempt to copy to the temporary register again 3115 */ 3116 emit->clip_dist_tmp_index = INVALID_INDEX; 3117 3118 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) { 3119 3120 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); 3121 3122 /** 3123 * copy to the shadow copy for use by varying variable and 3124 * stream output. All clip distances 3125 * will be written regardless of the enabled clipping planes. 3126 */ 3127 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3128 emit->clip_dist_so_index + i); 3129 3130 /* MOV clip_dist_so, tmp_clip_dist */ 3131 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3132 &tmp_clip_dist_src, FALSE); 3133 3134 /** 3135 * copy those clip distances to enabled clipping planes 3136 * to CLIPDIST registers for clipping 3137 */ 3138 if (clip_plane_enable & 0xf) { 3139 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3140 emit->clip_dist_out_index + i); 3141 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf); 3142 3143 /* MOV CLIPDIST, tmp_clip_dist */ 3144 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3145 &tmp_clip_dist_src, FALSE); 3146 } 3147 /* four clip planes per clip register */ 3148 clip_plane_enable >>= 4; 3149 } 3150 /** 3151 * set the temporary clip dist register index back to the 3152 * temporary index for the next vertex 3153 */ 3154 emit->clip_dist_tmp_index = clip_dist_tmp_index; 3155} 3156 3157/* Declare clip distance output registers for user-defined clip planes 3158 * or the TGSI_CLIPVERTEX output. 3159 */ 3160static void 3161emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) 3162{ 3163 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3164 unsigned index = emit->num_outputs; 3165 unsigned plane_mask; 3166 3167 assert(emit->unit == PIPE_SHADER_VERTEX || 3168 emit->unit == PIPE_SHADER_GEOMETRY); 3169 assert(num_clip_planes <= 8); 3170 3171 if (emit->clip_mode != CLIP_LEGACY && 3172 emit->clip_mode != CLIP_VERTEX) { 3173 return; 3174 } 3175 3176 if (num_clip_planes == 0) 3177 return; 3178 3179 /* Declare one or two clip output registers. The number of components 3180 * in the mask reflects the number of clip planes. For example, if 5 3181 * clip planes are needed, we'll declare outputs similar to: 3182 * dcl_output_siv o2.xyzw, clip_distance 3183 * dcl_output_siv o3.x, clip_distance 3184 */ 3185 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */ 3186 3187 plane_mask = (1 << num_clip_planes) - 1; 3188 if (plane_mask & 0xf) { 3189 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3190 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, 3191 VGPU10_NAME_CLIP_DISTANCE, cmask); 3192 emit->num_outputs++; 3193 } 3194 if (plane_mask & 0xf0) { 3195 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3196 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, 3197 VGPU10_NAME_CLIP_DISTANCE, cmask); 3198 emit->num_outputs++; 3199 } 3200} 3201 3202 3203/** 3204 * Emit the instructions for writing to the clip distance registers 3205 * to handle legacy/automatic clip planes. 3206 * For each clip plane, the distance is the dot product of the vertex 3207 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients. 3208 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE 3209 * output registers already declared. 3210 */ 3211static void 3212emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, 3213 unsigned vpos_tmp_index) 3214{ 3215 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3216 3217 assert(emit->clip_mode == CLIP_LEGACY); 3218 assert(num_clip_planes <= 8); 3219 3220 assert(emit->unit == PIPE_SHADER_VERTEX || 3221 emit->unit == PIPE_SHADER_GEOMETRY); 3222 3223 for (i = 0; i < num_clip_planes; i++) { 3224 struct tgsi_full_dst_register dst; 3225 struct tgsi_full_src_register plane_src, vpos_src; 3226 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3227 unsigned comp = i % 4; 3228 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3229 3230 /* create dst, src regs */ 3231 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3232 dst = writemask_dst(&dst, writemask); 3233 3234 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3235 vpos_src = make_src_temp_reg(vpos_tmp_index); 3236 3237 /* DP4 clip_dist, plane, vpos */ 3238 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3239 &plane_src, &vpos_src, FALSE); 3240 } 3241} 3242 3243 3244/** 3245 * Emit the instructions for computing the clip distance results from 3246 * the clip vertex temporary. 3247 * For each clip plane, the distance is the dot product of the clip vertex 3248 * position (found in a temp reg) and the clip plane coefficients. 3249 */ 3250static void 3251emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) 3252{ 3253 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable); 3254 unsigned i; 3255 struct tgsi_full_dst_register dst; 3256 struct tgsi_full_src_register clipvert_src; 3257 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index; 3258 3259 assert(emit->unit == PIPE_SHADER_VERTEX || 3260 emit->unit == PIPE_SHADER_GEOMETRY); 3261 3262 assert(emit->clip_mode == CLIP_VERTEX); 3263 3264 clipvert_src = make_src_temp_reg(clip_vertex_tmp); 3265 3266 for (i = 0; i < num_clip; i++) { 3267 struct tgsi_full_src_register plane_src; 3268 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3269 unsigned comp = i % 4; 3270 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3271 3272 /* create dst, src regs */ 3273 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3274 dst = writemask_dst(&dst, writemask); 3275 3276 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3277 3278 /* DP4 clip_dist, plane, vpos */ 3279 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3280 &plane_src, &clipvert_src, FALSE); 3281 } 3282 3283 /* copy temporary clip vertex register to the clip vertex register */ 3284 3285 assert(emit->clip_vertex_out_index != INVALID_INDEX); 3286 3287 /** 3288 * temporary reset the temporary clip vertex register index so 3289 * that copy to the clip vertex register will not attempt 3290 * to copy to the temporary register again 3291 */ 3292 emit->clip_vertex_tmp_index = INVALID_INDEX; 3293 3294 /* MOV clip_vertex, clip_vertex_tmp */ 3295 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index); 3296 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 3297 &dst, &clipvert_src, FALSE); 3298 3299 /** 3300 * set the temporary clip vertex register index back to the 3301 * temporary index for the next vertex 3302 */ 3303 emit->clip_vertex_tmp_index = clip_vertex_tmp; 3304} 3305 3306/** 3307 * Emit code to convert RGBA to BGRA 3308 */ 3309static void 3310emit_swap_r_b(struct svga_shader_emitter_v10 *emit, 3311 const struct tgsi_full_dst_register *dst, 3312 const struct tgsi_full_src_register *src) 3313{ 3314 struct tgsi_full_src_register bgra_src = 3315 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W); 3316 3317 begin_emit_instruction(emit); 3318 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 3319 emit_dst_register(emit, dst); 3320 emit_src_register(emit, &bgra_src); 3321 end_emit_instruction(emit); 3322} 3323 3324 3325/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */ 3326static void 3327emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit, 3328 const struct tgsi_full_dst_register *dst, 3329 const struct tgsi_full_src_register *src) 3330{ 3331 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f); 3332 struct tgsi_full_src_register two = 3333 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f); 3334 struct tgsi_full_src_register neg_two = 3335 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 3336 3337 unsigned val_tmp = get_temp_index(emit); 3338 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp); 3339 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp); 3340 3341 unsigned bias_tmp = get_temp_index(emit); 3342 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp); 3343 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp); 3344 3345 /* val = src * 2.0 */ 3346 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, 3347 src, &two, FALSE); 3348 3349 /* bias = src > 0.5 */ 3350 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, 3351 src, &half, FALSE); 3352 3353 /* bias = bias & -2.0 */ 3354 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst, 3355 &bias_src, &neg_two, FALSE); 3356 3357 /* dst = val + bias */ 3358 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst, 3359 &val_src, &bias_src, FALSE); 3360 3361 free_temp_indexes(emit); 3362} 3363 3364 3365/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */ 3366static void 3367emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit, 3368 const struct tgsi_full_dst_register *dst, 3369 const struct tgsi_full_src_register *src) 3370{ 3371 struct tgsi_full_src_register scale = 3372 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f); 3373 3374 /* dst = src * scale */ 3375 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE); 3376} 3377 3378 3379/** Convert from R32_UINT to 10_10_10_2_sscaled */ 3380static void 3381emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, 3382 const struct tgsi_full_dst_register *dst, 3383 const struct tgsi_full_src_register *src) 3384{ 3385 struct tgsi_full_src_register lshift = 3386 make_immediate_reg_int4(emit, 22, 12, 2, 0); 3387 struct tgsi_full_src_register rshift = 3388 make_immediate_reg_int4(emit, 22, 22, 22, 30); 3389 3390 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X); 3391 3392 unsigned tmp = get_temp_index(emit); 3393 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3394 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3395 3396 /* 3397 * r = (pixel << 22) >> 22; # signed int in [511, -512] 3398 * g = (pixel << 12) >> 22; # signed int in [511, -512] 3399 * b = (pixel << 2) >> 22; # signed int in [511, -512] 3400 * a = (pixel << 0) >> 30; # signed int in [1, -2] 3401 * dst = i_to_f(r,g,b,a); # convert to float 3402 */ 3403 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst, 3404 &src_xxxx, &lshift, FALSE); 3405 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst, 3406 &tmp_src, &rshift, FALSE); 3407 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE); 3408 3409 free_temp_indexes(emit); 3410} 3411 3412 3413/** 3414 * Emit code for TGSI_OPCODE_ABS instruction. 3415 */ 3416static boolean 3417emit_abs(struct svga_shader_emitter_v10 *emit, 3418 const struct tgsi_full_instruction *inst) 3419{ 3420 /* dst = ABS(s0): 3421 * dst = abs(s0) 3422 * Translates into: 3423 * MOV dst, abs(s0) 3424 */ 3425 struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]); 3426 3427 /* MOV dst, abs(s0) */ 3428 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 3429 &abs_src0, inst->Instruction.Saturate); 3430 3431 return TRUE; 3432} 3433 3434 3435/** 3436 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction. 3437 */ 3438static boolean 3439emit_arl_uarl(struct svga_shader_emitter_v10 *emit, 3440 const struct tgsi_full_instruction *inst) 3441{ 3442 unsigned index = inst->Dst[0].Register.Index; 3443 struct tgsi_full_dst_register dst; 3444 unsigned opcode; 3445 3446 assert(index < MAX_VGPU10_ADDR_REGS); 3447 dst = make_dst_temp_reg(emit->address_reg_index[index]); 3448 3449 /* ARL dst, s0 3450 * Translates into: 3451 * FTOI address_tmp, s0 3452 * 3453 * UARL dst, s0 3454 * Translates into: 3455 * MOV address_tmp, s0 3456 */ 3457 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL) 3458 opcode = VGPU10_OPCODE_FTOI; 3459 else 3460 opcode = VGPU10_OPCODE_MOV; 3461 3462 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE); 3463 3464 return TRUE; 3465} 3466 3467 3468/** 3469 * Emit code for TGSI_OPCODE_CAL instruction. 3470 */ 3471static boolean 3472emit_cal(struct svga_shader_emitter_v10 *emit, 3473 const struct tgsi_full_instruction *inst) 3474{ 3475 unsigned label = inst->Label.Label; 3476 VGPU10OperandToken0 operand; 3477 operand.value = 0; 3478 operand.operandType = VGPU10_OPERAND_TYPE_LABEL; 3479 3480 begin_emit_instruction(emit); 3481 emit_dword(emit, operand.value); 3482 emit_dword(emit, label); 3483 end_emit_instruction(emit); 3484 3485 return TRUE; 3486} 3487 3488 3489/** 3490 * Emit code for TGSI_OPCODE_IABS instruction. 3491 */ 3492static boolean 3493emit_iabs(struct svga_shader_emitter_v10 *emit, 3494 const struct tgsi_full_instruction *inst) 3495{ 3496 /* dst.x = (src0.x < 0) ? -src0.x : src0.x 3497 * dst.y = (src0.y < 0) ? -src0.y : src0.y 3498 * dst.z = (src0.z < 0) ? -src0.z : src0.z 3499 * dst.w = (src0.w < 0) ? -src0.w : src0.w 3500 * 3501 * Translates into 3502 * IMAX dst, src, neg(src) 3503 */ 3504 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); 3505 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], 3506 &inst->Src[0], &neg_src, FALSE); 3507 3508 return TRUE; 3509} 3510 3511 3512/** 3513 * Emit code for TGSI_OPCODE_CMP instruction. 3514 */ 3515static boolean 3516emit_cmp(struct svga_shader_emitter_v10 *emit, 3517 const struct tgsi_full_instruction *inst) 3518{ 3519 /* dst.x = (src0.x < 0) ? src1.x : src2.x 3520 * dst.y = (src0.y < 0) ? src1.y : src2.y 3521 * dst.z = (src0.z < 0) ? src1.z : src2.z 3522 * dst.w = (src0.w < 0) ? src1.w : src2.w 3523 * 3524 * Translates into 3525 * LT tmp, src0, 0.0 3526 * MOVC dst, tmp, src1, src2 3527 */ 3528 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3529 unsigned tmp = get_temp_index(emit); 3530 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3531 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3532 3533 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, 3534 &inst->Src[0], &zero, FALSE); 3535 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], 3536 &tmp_src, &inst->Src[1], &inst->Src[2], 3537 inst->Instruction.Saturate); 3538 3539 free_temp_indexes(emit); 3540 3541 return TRUE; 3542} 3543 3544 3545/** 3546 * Emit code for TGSI_OPCODE_DP2A instruction. 3547 */ 3548static boolean 3549emit_dp2a(struct svga_shader_emitter_v10 *emit, 3550 const struct tgsi_full_instruction *inst) 3551{ 3552 /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x 3553 * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x 3554 * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x 3555 * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x 3556 * Translate into 3557 * MAD tmp.x, s0.y, s1.y, s2.x 3558 * MAD tmp.x, s0.x, s1.x, tmp.x 3559 * MOV dst.xyzw, tmp.xxxx 3560 */ 3561 unsigned tmp = get_temp_index(emit); 3562 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3563 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3564 3565 struct tgsi_full_src_register tmp_src_xxxx = 3566 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3567 struct tgsi_full_dst_register tmp_dst_x = 3568 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3569 3570 struct tgsi_full_src_register src0_xxxx = 3571 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 3572 struct tgsi_full_src_register src0_yyyy = 3573 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 3574 struct tgsi_full_src_register src1_xxxx = 3575 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 3576 struct tgsi_full_src_register src1_yyyy = 3577 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 3578 struct tgsi_full_src_register src2_xxxx = 3579 scalar_src(&inst->Src[2], TGSI_SWIZZLE_X); 3580 3581 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy, 3582 &src1_yyyy, &src2_xxxx, FALSE); 3583 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx, 3584 &src1_xxxx, &tmp_src_xxxx, FALSE); 3585 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 3586 &tmp_src_xxxx, inst->Instruction.Saturate); 3587 3588 free_temp_indexes(emit); 3589 3590 return TRUE; 3591} 3592 3593 3594/** 3595 * Emit code for TGSI_OPCODE_DPH instruction. 3596 */ 3597static boolean 3598emit_dph(struct svga_shader_emitter_v10 *emit, 3599 const struct tgsi_full_instruction *inst) 3600{ 3601 /* 3602 * DP3 tmp, s0, s1 3603 * ADD dst, tmp, s1.wwww 3604 */ 3605 3606 struct tgsi_full_src_register s1_wwww = 3607 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, 3608 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 3609 3610 unsigned tmp = get_temp_index(emit); 3611 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3612 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3613 3614 /* DP3 tmp, s0, s1 */ 3615 emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0], 3616 &inst->Src[1], FALSE); 3617 3618 /* ADD dst, tmp, s1.wwww */ 3619 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src, 3620 &s1_wwww, inst->Instruction.Saturate); 3621 3622 free_temp_indexes(emit); 3623 3624 return TRUE; 3625} 3626 3627 3628/** 3629 * Emit code for TGSI_OPCODE_DST instruction. 3630 */ 3631static boolean 3632emit_dst(struct svga_shader_emitter_v10 *emit, 3633 const struct tgsi_full_instruction *inst) 3634{ 3635 /* 3636 * dst.x = 1 3637 * dst.y = src0.y * src1.y 3638 * dst.z = src0.z 3639 * dst.w = src1.w 3640 */ 3641 3642 struct tgsi_full_src_register s0_yyyy = 3643 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 3644 struct tgsi_full_src_register s0_zzzz = 3645 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 3646 struct tgsi_full_src_register s1_yyyy = 3647 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 3648 struct tgsi_full_src_register s1_wwww = 3649 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W); 3650 3651 /* 3652 * If dst and either src0 and src1 are the same we need 3653 * to create a temporary for it and insert a extra move. 3654 */ 3655 unsigned tmp_move = get_temp_index(emit); 3656 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3657 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3658 3659 /* MOV dst.x, 1.0 */ 3660 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3661 struct tgsi_full_dst_register dst_x = 3662 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3663 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3664 3665 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 3666 } 3667 3668 /* MUL dst.y, s0.y, s1.y */ 3669 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3670 struct tgsi_full_dst_register dst_y = 3671 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3672 3673 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, 3674 &s1_yyyy, inst->Instruction.Saturate); 3675 } 3676 3677 /* MOV dst.z, s0.z */ 3678 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3679 struct tgsi_full_dst_register dst_z = 3680 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3681 3682 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz, 3683 inst->Instruction.Saturate); 3684 } 3685 3686 /* MOV dst.w, s1.w */ 3687 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3688 struct tgsi_full_dst_register dst_w = 3689 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3690 3691 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww, 3692 inst->Instruction.Saturate); 3693 } 3694 3695 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3696 FALSE); 3697 free_temp_indexes(emit); 3698 3699 return TRUE; 3700} 3701 3702 3703 3704/** 3705 * Emit code for TGSI_OPCODE_ENDPRIM (GS only) 3706 */ 3707static boolean 3708emit_endprim(struct svga_shader_emitter_v10 *emit, 3709 const struct tgsi_full_instruction *inst) 3710{ 3711 assert(emit->unit == PIPE_SHADER_GEOMETRY); 3712 3713 /* We can't use emit_simple() because the TGSI instruction has one 3714 * operand (vertex stream number) which we must ignore for VGPU10. 3715 */ 3716 begin_emit_instruction(emit); 3717 emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); 3718 end_emit_instruction(emit); 3719 return TRUE; 3720} 3721 3722 3723/** 3724 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction. 3725 */ 3726static boolean 3727emit_ex2(struct svga_shader_emitter_v10 *emit, 3728 const struct tgsi_full_instruction *inst) 3729{ 3730 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x 3731 * while VGPU10 computes four values. 3732 * 3733 * dst = EX2(src): 3734 * dst.xyzw = 2.0 ^ src.x 3735 */ 3736 3737 struct tgsi_full_src_register src_xxxx = 3738 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3739 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3740 3741 /* EXP tmp, s0.xxxx */ 3742 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, 3743 inst->Instruction.Saturate); 3744 3745 return TRUE; 3746} 3747 3748 3749/** 3750 * Emit code for TGSI_OPCODE_EXP instruction. 3751 */ 3752static boolean 3753emit_exp(struct svga_shader_emitter_v10 *emit, 3754 const struct tgsi_full_instruction *inst) 3755{ 3756 /* 3757 * dst.x = 2 ^ floor(s0.x) 3758 * dst.y = s0.x - floor(s0.x) 3759 * dst.z = 2 ^ s0.x 3760 * dst.w = 1.0 3761 */ 3762 3763 struct tgsi_full_src_register src_xxxx = 3764 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 3765 unsigned tmp = get_temp_index(emit); 3766 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3767 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3768 3769 /* 3770 * If dst and src are the same we need to create 3771 * a temporary for it and insert a extra move. 3772 */ 3773 unsigned tmp_move = get_temp_index(emit); 3774 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3775 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3776 3777 /* only use X component of temp reg */ 3778 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3779 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3780 3781 /* ROUND_NI tmp.x, s0.x */ 3782 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 3783 &src_xxxx, FALSE); /* round to -infinity */ 3784 3785 /* EXP dst.x, tmp.x */ 3786 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3787 struct tgsi_full_dst_register dst_x = 3788 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3789 3790 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, 3791 inst->Instruction.Saturate); 3792 } 3793 3794 /* ADD dst.y, s0.x, -tmp */ 3795 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3796 struct tgsi_full_dst_register dst_y = 3797 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3798 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src); 3799 3800 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, 3801 &neg_tmp_src, inst->Instruction.Saturate); 3802 } 3803 3804 /* EXP dst.z, s0.x */ 3805 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3806 struct tgsi_full_dst_register dst_z = 3807 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3808 3809 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, 3810 inst->Instruction.Saturate); 3811 } 3812 3813 /* MOV dst.w, 1.0 */ 3814 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3815 struct tgsi_full_dst_register dst_w = 3816 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3817 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3818 3819 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, 3820 FALSE); 3821 } 3822 3823 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3824 FALSE); 3825 3826 free_temp_indexes(emit); 3827 3828 return TRUE; 3829} 3830 3831 3832/** 3833 * Emit code for TGSI_OPCODE_IF instruction. 3834 */ 3835static boolean 3836emit_if(struct svga_shader_emitter_v10 *emit, 3837 const struct tgsi_full_instruction *inst) 3838{ 3839 VGPU10OpcodeToken0 opcode0; 3840 3841 /* The src register should be a scalar */ 3842 assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY && 3843 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ && 3844 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW); 3845 3846 /* The only special thing here is that we need to set the 3847 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if 3848 * src.x is non-zero. 3849 */ 3850 opcode0.value = 0; 3851 opcode0.opcodeType = VGPU10_OPCODE_IF; 3852 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 3853 3854 begin_emit_instruction(emit); 3855 emit_dword(emit, opcode0.value); 3856 emit_src_register(emit, &inst->Src[0]); 3857 end_emit_instruction(emit); 3858 3859 return TRUE; 3860} 3861 3862 3863/** 3864 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of 3865 * the register components are negative). 3866 */ 3867static boolean 3868emit_kill_if(struct svga_shader_emitter_v10 *emit, 3869 const struct tgsi_full_instruction *inst) 3870{ 3871 unsigned tmp = get_temp_index(emit); 3872 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3873 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3874 3875 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3876 3877 struct tgsi_full_dst_register tmp_dst_x = 3878 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3879 struct tgsi_full_src_register tmp_src_xxxx = 3880 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3881 3882 /* tmp = src[0] < 0.0 */ 3883 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 3884 &zero, FALSE); 3885 3886 if (!same_swizzle_terms(&inst->Src[0])) { 3887 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to 3888 * logically OR the swizzle terms. Most uses of KILL_IF only 3889 * test one channel so it's good to avoid these extra steps. 3890 */ 3891 struct tgsi_full_src_register tmp_src_yyyy = 3892 scalar_src(&tmp_src, TGSI_SWIZZLE_Y); 3893 struct tgsi_full_src_register tmp_src_zzzz = 3894 scalar_src(&tmp_src, TGSI_SWIZZLE_Z); 3895 struct tgsi_full_src_register tmp_src_wwww = 3896 scalar_src(&tmp_src, TGSI_SWIZZLE_W); 3897 3898 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3899 &tmp_src_yyyy, FALSE); 3900 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3901 &tmp_src_zzzz, FALSE); 3902 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3903 &tmp_src_wwww, FALSE); 3904 } 3905 3906 begin_emit_instruction(emit); 3907 emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */ 3908 emit_src_register(emit, &tmp_src_xxxx); 3909 end_emit_instruction(emit); 3910 3911 free_temp_indexes(emit); 3912 3913 return TRUE; 3914} 3915 3916 3917/** 3918 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard). 3919 */ 3920static boolean 3921emit_kill(struct svga_shader_emitter_v10 *emit, 3922 const struct tgsi_full_instruction *inst) 3923{ 3924 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3925 3926 /* DISCARD if 0.0 is zero */ 3927 begin_emit_instruction(emit); 3928 emit_discard_opcode(emit, FALSE); 3929 emit_src_register(emit, &zero); 3930 end_emit_instruction(emit); 3931 3932 return TRUE; 3933} 3934 3935 3936/** 3937 * Emit code for TGSI_OPCODE_LG2 instruction. 3938 */ 3939static boolean 3940emit_lg2(struct svga_shader_emitter_v10 *emit, 3941 const struct tgsi_full_instruction *inst) 3942{ 3943 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x 3944 * while VGPU10 computes four values. 3945 * 3946 * dst = LG2(src): 3947 * dst.xyzw = log2(src.x) 3948 */ 3949 3950 struct tgsi_full_src_register src_xxxx = 3951 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3952 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3953 3954 /* LOG tmp, s0.xxxx */ 3955 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx, 3956 inst->Instruction.Saturate); 3957 3958 return TRUE; 3959} 3960 3961 3962/** 3963 * Emit code for TGSI_OPCODE_LIT instruction. 3964 */ 3965static boolean 3966emit_lit(struct svga_shader_emitter_v10 *emit, 3967 const struct tgsi_full_instruction *inst) 3968{ 3969 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3970 3971 /* 3972 * If dst and src are the same we need to create 3973 * a temporary for it and insert a extra move. 3974 */ 3975 unsigned tmp_move = get_temp_index(emit); 3976 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3977 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3978 3979 /* 3980 * dst.x = 1 3981 * dst.y = max(src.x, 0) 3982 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 3983 * dst.w = 1 3984 */ 3985 3986 /* MOV dst.x, 1.0 */ 3987 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3988 struct tgsi_full_dst_register dst_x = 3989 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3990 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 3991 } 3992 3993 /* MOV dst.w, 1.0 */ 3994 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3995 struct tgsi_full_dst_register dst_w = 3996 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3997 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 3998 } 3999 4000 /* MAX dst.y, src.x, 0.0 */ 4001 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4002 struct tgsi_full_dst_register dst_y = 4003 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 4004 struct tgsi_full_src_register zero = 4005 make_immediate_reg_float(emit, 0.0f); 4006 struct tgsi_full_src_register src_xxxx = 4007 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4008 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4009 4010 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, 4011 &zero, inst->Instruction.Saturate); 4012 } 4013 4014 /* 4015 * tmp1 = clamp(src.w, -128, 128); 4016 * MAX tmp1, src.w, -128 4017 * MIN tmp1, tmp1, 128 4018 * 4019 * tmp2 = max(tmp2, 0); 4020 * MAX tmp2, src.y, 0 4021 * 4022 * tmp1 = pow(tmp2, tmp1); 4023 * LOG tmp2, tmp2 4024 * MUL tmp1, tmp2, tmp1 4025 * EXP tmp1, tmp1 4026 * 4027 * tmp1 = (src.w == 0) ? 1 : tmp1; 4028 * EQ tmp2, 0, src.w 4029 * MOVC tmp1, tmp2, 1.0, tmp1 4030 * 4031 * dst.z = (0 < src.x) ? tmp1 : 0; 4032 * LT tmp2, 0, src.x 4033 * MOVC dst.z, tmp2, tmp1, 0.0 4034 */ 4035 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4036 struct tgsi_full_dst_register dst_z = 4037 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 4038 4039 unsigned tmp1 = get_temp_index(emit); 4040 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4041 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4042 unsigned tmp2 = get_temp_index(emit); 4043 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4044 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4045 4046 struct tgsi_full_src_register src_xxxx = 4047 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4048 struct tgsi_full_src_register src_yyyy = 4049 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 4050 struct tgsi_full_src_register src_wwww = 4051 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 4052 4053 struct tgsi_full_src_register zero = 4054 make_immediate_reg_float(emit, 0.0f); 4055 struct tgsi_full_src_register lowerbound = 4056 make_immediate_reg_float(emit, -128.0f); 4057 struct tgsi_full_src_register upperbound = 4058 make_immediate_reg_float(emit, 128.0f); 4059 4060 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, 4061 &lowerbound, FALSE); 4062 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, 4063 &upperbound, FALSE); 4064 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, 4065 &zero, FALSE); 4066 4067 /* POW tmp1, tmp2, tmp1 */ 4068 /* LOG tmp2, tmp2 */ 4069 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src, 4070 FALSE); 4071 4072 /* MUL tmp1, tmp2, tmp1 */ 4073 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, 4074 &tmp1_src, FALSE); 4075 4076 /* EXP tmp1, tmp1 */ 4077 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src, 4078 FALSE); 4079 4080 /* EQ tmp2, 0, src.w */ 4081 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, 4082 &src_wwww, FALSE); 4083 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ 4084 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, 4085 &tmp2_src, &one, &tmp1_src, FALSE); 4086 4087 /* LT tmp2, 0, src.x */ 4088 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, 4089 &src_xxxx, FALSE); 4090 /* MOVC dst.z, tmp2, tmp1, 0.0 */ 4091 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, 4092 &tmp2_src, &tmp1_src, &zero, FALSE); 4093 } 4094 4095 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 4096 FALSE); 4097 free_temp_indexes(emit); 4098 4099 return TRUE; 4100} 4101 4102 4103/** 4104 * Emit code for TGSI_OPCODE_LOG instruction. 4105 */ 4106static boolean 4107emit_log(struct svga_shader_emitter_v10 *emit, 4108 const struct tgsi_full_instruction *inst) 4109{ 4110 /* 4111 * dst.x = floor(lg2(abs(s0.x))) 4112 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) 4113 * dst.z = lg2(abs(s0.x)) 4114 * dst.w = 1.0 4115 */ 4116 4117 struct tgsi_full_src_register src_xxxx = 4118 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4119 unsigned tmp = get_temp_index(emit); 4120 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4121 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4122 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); 4123 4124 /* only use X component of temp reg */ 4125 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4126 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4127 4128 /* LOG tmp.x, abs(s0.x) */ 4129 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 4130 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, 4131 &abs_src_xxxx, FALSE); 4132 } 4133 4134 /* MOV dst.z, tmp.x */ 4135 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4136 struct tgsi_full_dst_register dst_z = 4137 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); 4138 4139 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, 4140 &tmp_src, inst->Instruction.Saturate); 4141 } 4142 4143 /* FLR tmp.x, tmp.x */ 4144 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { 4145 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 4146 &tmp_src, FALSE); 4147 } 4148 4149 /* MOV dst.x, tmp.x */ 4150 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4151 struct tgsi_full_dst_register dst_x = 4152 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 4153 4154 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src, 4155 inst->Instruction.Saturate); 4156 } 4157 4158 /* EXP tmp.x, tmp.x */ 4159 /* DIV dst.y, abs(s0.x), tmp.x */ 4160 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4161 struct tgsi_full_dst_register dst_y = 4162 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 4163 4164 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src, 4165 FALSE); 4166 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, 4167 &tmp_src, inst->Instruction.Saturate); 4168 } 4169 4170 /* MOV dst.w, 1.0 */ 4171 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4172 struct tgsi_full_dst_register dst_w = 4173 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); 4174 struct tgsi_full_src_register one = 4175 make_immediate_reg_float(emit, 1.0f); 4176 4177 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 4178 } 4179 4180 free_temp_indexes(emit); 4181 4182 return TRUE; 4183} 4184 4185 4186/** 4187 * Emit code for TGSI_OPCODE_LRP instruction. 4188 */ 4189static boolean 4190emit_lrp(struct svga_shader_emitter_v10 *emit, 4191 const struct tgsi_full_instruction *inst) 4192{ 4193 /* dst = LRP(s0, s1, s2): 4194 * dst = s0 * (s1 - s2) + s2 4195 * Translates into: 4196 * SUB tmp, s1, s2; tmp = s1 - s2 4197 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 4198 */ 4199 unsigned tmp = get_temp_index(emit); 4200 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); 4201 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); 4202 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); 4203 4204 /* ADD tmp, s1, -s2 */ 4205 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp, 4206 &inst->Src[1], &neg_src2, FALSE); 4207 4208 /* MAD dst, s1, tmp, s3 */ 4209 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], 4210 &inst->Src[0], &src_tmp, &inst->Src[2], 4211 inst->Instruction.Saturate); 4212 4213 free_temp_indexes(emit); 4214 4215 return TRUE; 4216} 4217 4218 4219/** 4220 * Emit code for TGSI_OPCODE_POW instruction. 4221 */ 4222static boolean 4223emit_pow(struct svga_shader_emitter_v10 *emit, 4224 const struct tgsi_full_instruction *inst) 4225{ 4226 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and 4227 * src1.x while VGPU10 computes four values. 4228 * 4229 * dst = POW(src0, src1): 4230 * dst.xyzw = src0.x ^ src1.x 4231 */ 4232 unsigned tmp = get_temp_index(emit); 4233 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4234 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4235 struct tgsi_full_src_register src0_xxxx = 4236 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4237 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4238 struct tgsi_full_src_register src1_xxxx = 4239 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4240 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4241 4242 /* LOG tmp, s0.xxxx */ 4243 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx, 4244 FALSE); 4245 4246 /* MUL tmp, tmp, s1.xxxx */ 4247 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src, 4248 &src1_xxxx, FALSE); 4249 4250 /* EXP tmp, s0.xxxx */ 4251 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], 4252 &tmp_src, inst->Instruction.Saturate); 4253 4254 /* free tmp */ 4255 free_temp_indexes(emit); 4256 4257 return TRUE; 4258} 4259 4260 4261/** 4262 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. 4263 */ 4264static boolean 4265emit_rcp(struct svga_shader_emitter_v10 *emit, 4266 const struct tgsi_full_instruction *inst) 4267{ 4268 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4269 4270 unsigned tmp = get_temp_index(emit); 4271 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4272 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4273 4274 struct tgsi_full_dst_register tmp_dst_x = 4275 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4276 struct tgsi_full_src_register tmp_src_xxxx = 4277 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4278 4279 /* DIV tmp.x, 1.0, s0 */ 4280 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one, 4281 &inst->Src[0], FALSE); 4282 4283 /* MOV dst, tmp.xxxx */ 4284 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4285 &tmp_src_xxxx, inst->Instruction.Saturate); 4286 4287 free_temp_indexes(emit); 4288 4289 return TRUE; 4290} 4291 4292 4293/** 4294 * Emit code for TGSI_OPCODE_RSQ instruction. 4295 */ 4296static boolean 4297emit_rsq(struct svga_shader_emitter_v10 *emit, 4298 const struct tgsi_full_instruction *inst) 4299{ 4300 /* dst = RSQ(src): 4301 * dst.xyzw = 1 / sqrt(src.x) 4302 * Translates into: 4303 * RSQ tmp, src.x 4304 * MOV dst, tmp.xxxx 4305 */ 4306 4307 unsigned tmp = get_temp_index(emit); 4308 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4309 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4310 4311 struct tgsi_full_dst_register tmp_dst_x = 4312 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4313 struct tgsi_full_src_register tmp_src_xxxx = 4314 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4315 4316 /* RSQ tmp, src.x */ 4317 emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x, 4318 &inst->Src[0], FALSE); 4319 4320 /* MOV dst, tmp.xxxx */ 4321 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4322 &tmp_src_xxxx, inst->Instruction.Saturate); 4323 4324 /* free tmp */ 4325 free_temp_indexes(emit); 4326 4327 return TRUE; 4328} 4329 4330 4331/** 4332 * Emit code for TGSI_OPCODE_SCS instruction. 4333 */ 4334static boolean 4335emit_scs(struct svga_shader_emitter_v10 *emit, 4336 const struct tgsi_full_instruction *inst) 4337{ 4338 /* dst.x = cos(src.x) 4339 * dst.y = sin(src.x) 4340 * dst.z = 0.0 4341 * dst.w = 1.0 4342 */ 4343 struct tgsi_full_dst_register dst_x = 4344 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 4345 struct tgsi_full_dst_register dst_y = 4346 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 4347 struct tgsi_full_dst_register dst_zw = 4348 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW); 4349 4350 struct tgsi_full_src_register zero_one = 4351 make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f); 4352 4353 begin_emit_instruction(emit); 4354 emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate); 4355 emit_dst_register(emit, &dst_y); 4356 emit_dst_register(emit, &dst_x); 4357 emit_src_register(emit, &inst->Src[0]); 4358 end_emit_instruction(emit); 4359 4360 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 4361 &dst_zw, &zero_one, inst->Instruction.Saturate); 4362 4363 return TRUE; 4364} 4365 4366 4367/** 4368 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. 4369 */ 4370static boolean 4371emit_seq(struct svga_shader_emitter_v10 *emit, 4372 const struct tgsi_full_instruction *inst) 4373{ 4374 /* dst = SEQ(s0, s1): 4375 * dst = s0 == s1 ? 1.0 : 0.0 (per component) 4376 * Translates into: 4377 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4378 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4379 */ 4380 unsigned tmp = get_temp_index(emit); 4381 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4382 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4383 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4384 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4385 4386 /* EQ tmp, s0, s1 */ 4387 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], 4388 &inst->Src[1], FALSE); 4389 4390 /* MOVC dst, tmp, one, zero */ 4391 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4392 &one, &zero, FALSE); 4393 4394 free_temp_indexes(emit); 4395 4396 return TRUE; 4397} 4398 4399 4400/** 4401 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. 4402 */ 4403static boolean 4404emit_sge(struct svga_shader_emitter_v10 *emit, 4405 const struct tgsi_full_instruction *inst) 4406{ 4407 /* dst = SGE(s0, s1): 4408 * dst = s0 >= s1 ? 1.0 : 0.0 (per component) 4409 * Translates into: 4410 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) 4411 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4412 */ 4413 unsigned tmp = get_temp_index(emit); 4414 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4415 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4416 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4417 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4418 4419 /* GE tmp, s0, s1 */ 4420 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], 4421 &inst->Src[1], FALSE); 4422 4423 /* MOVC dst, tmp, one, zero */ 4424 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4425 &one, &zero, FALSE); 4426 4427 free_temp_indexes(emit); 4428 4429 return TRUE; 4430} 4431 4432 4433/** 4434 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. 4435 */ 4436static boolean 4437emit_sgt(struct svga_shader_emitter_v10 *emit, 4438 const struct tgsi_full_instruction *inst) 4439{ 4440 /* dst = SGT(s0, s1): 4441 * dst = s0 > s1 ? 1.0 : 0.0 (per component) 4442 * Translates into: 4443 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) 4444 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4445 */ 4446 unsigned tmp = get_temp_index(emit); 4447 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4448 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4449 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4450 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4451 4452 /* LT tmp, s1, s0 */ 4453 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], 4454 &inst->Src[0], FALSE); 4455 4456 /* MOVC dst, tmp, one, zero */ 4457 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4458 &one, &zero, FALSE); 4459 4460 free_temp_indexes(emit); 4461 4462 return TRUE; 4463} 4464 4465 4466/** 4467 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. 4468 */ 4469static boolean 4470emit_sincos(struct svga_shader_emitter_v10 *emit, 4471 const struct tgsi_full_instruction *inst) 4472{ 4473 unsigned tmp = get_temp_index(emit); 4474 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4475 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4476 4477 struct tgsi_full_src_register tmp_src_xxxx = 4478 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4479 struct tgsi_full_dst_register tmp_dst_x = 4480 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4481 4482 begin_emit_instruction(emit); 4483 emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); 4484 4485 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) 4486 { 4487 emit_dst_register(emit, &tmp_dst_x); /* first destination register */ 4488 emit_null_dst_register(emit); /* second destination register */ 4489 } 4490 else { 4491 emit_null_dst_register(emit); 4492 emit_dst_register(emit, &tmp_dst_x); 4493 } 4494 4495 emit_src_register(emit, &inst->Src[0]); 4496 end_emit_instruction(emit); 4497 4498 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4499 &tmp_src_xxxx, inst->Instruction.Saturate); 4500 4501 free_temp_indexes(emit); 4502 4503 return TRUE; 4504} 4505 4506 4507/** 4508 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. 4509 */ 4510static boolean 4511emit_sle(struct svga_shader_emitter_v10 *emit, 4512 const struct tgsi_full_instruction *inst) 4513{ 4514 /* dst = SLE(s0, s1): 4515 * dst = s0 <= s1 ? 1.0 : 0.0 (per component) 4516 * Translates into: 4517 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) 4518 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4519 */ 4520 unsigned tmp = get_temp_index(emit); 4521 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4522 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4523 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4524 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4525 4526 /* GE tmp, s1, s0 */ 4527 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], 4528 &inst->Src[0], FALSE); 4529 4530 /* MOVC dst, tmp, one, zero */ 4531 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4532 &one, &zero, FALSE); 4533 4534 free_temp_indexes(emit); 4535 4536 return TRUE; 4537} 4538 4539 4540/** 4541 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. 4542 */ 4543static boolean 4544emit_slt(struct svga_shader_emitter_v10 *emit, 4545 const struct tgsi_full_instruction *inst) 4546{ 4547 /* dst = SLT(s0, s1): 4548 * dst = s0 < s1 ? 1.0 : 0.0 (per component) 4549 * Translates into: 4550 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) 4551 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4552 */ 4553 unsigned tmp = get_temp_index(emit); 4554 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4555 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4556 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4557 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4558 4559 /* LT tmp, s0, s1 */ 4560 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 4561 &inst->Src[1], FALSE); 4562 4563 /* MOVC dst, tmp, one, zero */ 4564 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4565 &one, &zero, FALSE); 4566 4567 free_temp_indexes(emit); 4568 4569 return TRUE; 4570} 4571 4572 4573/** 4574 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. 4575 */ 4576static boolean 4577emit_sne(struct svga_shader_emitter_v10 *emit, 4578 const struct tgsi_full_instruction *inst) 4579{ 4580 /* dst = SNE(s0, s1): 4581 * dst = s0 != s1 ? 1.0 : 0.0 (per component) 4582 * Translates into: 4583 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4584 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4585 */ 4586 unsigned tmp = get_temp_index(emit); 4587 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4588 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4589 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4590 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4591 4592 /* NE tmp, s0, s1 */ 4593 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], 4594 &inst->Src[1], FALSE); 4595 4596 /* MOVC dst, tmp, one, zero */ 4597 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4598 &one, &zero, FALSE); 4599 4600 free_temp_indexes(emit); 4601 4602 return TRUE; 4603} 4604 4605 4606/** 4607 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. 4608 */ 4609static boolean 4610emit_ssg(struct svga_shader_emitter_v10 *emit, 4611 const struct tgsi_full_instruction *inst) 4612{ 4613 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 4614 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 4615 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 4616 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 4617 * Translates into: 4618 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) 4619 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) 4620 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) 4621 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) 4622 */ 4623 struct tgsi_full_src_register zero = 4624 make_immediate_reg_float(emit, 0.0f); 4625 struct tgsi_full_src_register one = 4626 make_immediate_reg_float(emit, 1.0f); 4627 struct tgsi_full_src_register neg_one = 4628 make_immediate_reg_float(emit, -1.0f); 4629 4630 unsigned tmp1 = get_temp_index(emit); 4631 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4632 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4633 4634 unsigned tmp2 = get_temp_index(emit); 4635 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4636 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4637 4638 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], 4639 &zero, FALSE); 4640 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, 4641 &neg_one, &zero, FALSE); 4642 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, 4643 &inst->Src[0], FALSE); 4644 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, 4645 &one, &tmp2_src, FALSE); 4646 4647 free_temp_indexes(emit); 4648 4649 return TRUE; 4650} 4651 4652 4653/** 4654 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. 4655 */ 4656static boolean 4657emit_issg(struct svga_shader_emitter_v10 *emit, 4658 const struct tgsi_full_instruction *inst) 4659{ 4660 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 4661 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 4662 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 4663 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 4664 * Translates into: 4665 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) 4666 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) 4667 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) 4668 */ 4669 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4670 4671 unsigned tmp1 = get_temp_index(emit); 4672 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4673 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4674 4675 unsigned tmp2 = get_temp_index(emit); 4676 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4677 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4678 4679 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); 4680 4681 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, 4682 &inst->Src[0], &zero, FALSE); 4683 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, 4684 &zero, &inst->Src[0], FALSE); 4685 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], 4686 &tmp1_src, &neg_tmp2, FALSE); 4687 4688 free_temp_indexes(emit); 4689 4690 return TRUE; 4691} 4692 4693 4694/** 4695 * Emit code for TGSI_OPCODE_SUB instruction. 4696 */ 4697static boolean 4698emit_sub(struct svga_shader_emitter_v10 *emit, 4699 const struct tgsi_full_instruction *inst) 4700{ 4701 /* dst = SUB(s0, s1): 4702 * dst = s0 - s1 4703 * Translates into: 4704 * ADD dst, s0, neg(s1) 4705 */ 4706 struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]); 4707 4708 /* ADD dst, s0, neg(s1) */ 4709 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], 4710 &inst->Src[0], &neg_src1, 4711 inst->Instruction.Saturate); 4712 4713 return TRUE; 4714} 4715 4716 4717/** 4718 * Emit a comparison instruction. The dest register will get 4719 * 0 or ~0 values depending on the outcome of comparing src0 to src1. 4720 */ 4721static void 4722emit_comparison(struct svga_shader_emitter_v10 *emit, 4723 SVGA3dCmpFunc func, 4724 const struct tgsi_full_dst_register *dst, 4725 const struct tgsi_full_src_register *src0, 4726 const struct tgsi_full_src_register *src1) 4727{ 4728 struct tgsi_full_src_register immediate; 4729 VGPU10OpcodeToken0 opcode0; 4730 boolean swapSrc = FALSE; 4731 4732 /* Sanity checks for svga vs. gallium enums */ 4733 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); 4734 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); 4735 4736 opcode0.value = 0; 4737 4738 switch (func) { 4739 case SVGA3D_CMP_NEVER: 4740 immediate = make_immediate_reg_int(emit, 0); 4741 /* MOV dst, {0} */ 4742 begin_emit_instruction(emit); 4743 emit_dword(emit, VGPU10_OPCODE_MOV); 4744 emit_dst_register(emit, dst); 4745 emit_src_register(emit, &immediate); 4746 end_emit_instruction(emit); 4747 return; 4748 case SVGA3D_CMP_ALWAYS: 4749 immediate = make_immediate_reg_int(emit, -1); 4750 /* MOV dst, {-1} */ 4751 begin_emit_instruction(emit); 4752 emit_dword(emit, VGPU10_OPCODE_MOV); 4753 emit_dst_register(emit, dst); 4754 emit_src_register(emit, &immediate); 4755 end_emit_instruction(emit); 4756 return; 4757 case SVGA3D_CMP_LESS: 4758 opcode0.opcodeType = VGPU10_OPCODE_LT; 4759 break; 4760 case SVGA3D_CMP_EQUAL: 4761 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4762 break; 4763 case SVGA3D_CMP_LESSEQUAL: 4764 opcode0.opcodeType = VGPU10_OPCODE_GE; 4765 swapSrc = TRUE; 4766 break; 4767 case SVGA3D_CMP_GREATER: 4768 opcode0.opcodeType = VGPU10_OPCODE_LT; 4769 swapSrc = TRUE; 4770 break; 4771 case SVGA3D_CMP_NOTEQUAL: 4772 opcode0.opcodeType = VGPU10_OPCODE_NE; 4773 break; 4774 case SVGA3D_CMP_GREATEREQUAL: 4775 opcode0.opcodeType = VGPU10_OPCODE_GE; 4776 break; 4777 default: 4778 assert(!"Unexpected comparison mode"); 4779 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4780 } 4781 4782 begin_emit_instruction(emit); 4783 emit_dword(emit, opcode0.value); 4784 emit_dst_register(emit, dst); 4785 if (swapSrc) { 4786 emit_src_register(emit, src1); 4787 emit_src_register(emit, src0); 4788 } 4789 else { 4790 emit_src_register(emit, src0); 4791 emit_src_register(emit, src1); 4792 } 4793 end_emit_instruction(emit); 4794} 4795 4796 4797/** 4798 * Get texel/address offsets for a texture instruction. 4799 */ 4800static void 4801get_texel_offsets(const struct svga_shader_emitter_v10 *emit, 4802 const struct tgsi_full_instruction *inst, int offsets[3]) 4803{ 4804 if (inst->Texture.NumOffsets == 1) { 4805 /* According to OpenGL Shader Language spec the offsets are only 4806 * fetched from a previously-declared immediate/literal. 4807 */ 4808 const struct tgsi_texture_offset *off = inst->TexOffsets; 4809 const unsigned index = off[0].Index; 4810 const unsigned swizzleX = off[0].SwizzleX; 4811 const unsigned swizzleY = off[0].SwizzleY; 4812 const unsigned swizzleZ = off[0].SwizzleZ; 4813 const union tgsi_immediate_data *imm = emit->immediates[index]; 4814 4815 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); 4816 4817 offsets[0] = imm[swizzleX].Int; 4818 offsets[1] = imm[swizzleY].Int; 4819 offsets[2] = imm[swizzleZ].Int; 4820 } 4821 else { 4822 offsets[0] = offsets[1] = offsets[2] = 0; 4823 } 4824} 4825 4826 4827/** 4828 * Set up the coordinate register for texture sampling. 4829 * When we're sampling from a RECT texture we have to scale the 4830 * unnormalized coordinate to a normalized coordinate. 4831 * We do that by multiplying the coordinate by an "extra" constant. 4832 * An alternative would be to use the RESINFO instruction to query the 4833 * texture's size. 4834 */ 4835static struct tgsi_full_src_register 4836setup_texcoord(struct svga_shader_emitter_v10 *emit, 4837 unsigned unit, 4838 const struct tgsi_full_src_register *coord) 4839{ 4840 if (emit->key.tex[unit].unnormalized) { 4841 unsigned scale_index = emit->texcoord_scale_index[unit]; 4842 unsigned tmp = get_temp_index(emit); 4843 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4844 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4845 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); 4846 4847 /* MUL tmp, coord, const[] */ 4848 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, 4849 coord, &scale_src, FALSE); 4850 return tmp_src; 4851 } 4852 else { 4853 /* use texcoord as-is */ 4854 return *coord; 4855 } 4856} 4857 4858 4859/** 4860 * For SAMPLE_C instructions, emit the extra src register which indicates 4861 * the reference/comparision value. 4862 */ 4863static void 4864emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, 4865 unsigned target, 4866 const struct tgsi_full_src_register *coord) 4867{ 4868 struct tgsi_full_src_register coord_src_ref; 4869 unsigned component; 4870 4871 assert(tgsi_is_shadow_target(target)); 4872 4873 assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */ 4874 if (target == TGSI_TEXTURE_SHADOW2D_ARRAY || 4875 target == TGSI_TEXTURE_SHADOWCUBE) 4876 component = TGSI_SWIZZLE_W; 4877 else 4878 component = TGSI_SWIZZLE_Z; 4879 4880 coord_src_ref = scalar_src(coord, component); 4881 4882 emit_src_register(emit, &coord_src_ref); 4883} 4884 4885 4886/** 4887 * Info for implementing texture swizzles. 4888 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() 4889 * functions use this to encapsulate the extra steps needed to perform 4890 * a texture swizzle, or shadow/depth comparisons. 4891 * The shadow/depth comparison is only done here if for the cases where 4892 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). 4893 */ 4894struct tex_swizzle_info 4895{ 4896 boolean swizzled; 4897 boolean shadow_compare; 4898 unsigned unit; 4899 unsigned texture_target; /**< TGSI_TEXTURE_x */ 4900 struct tgsi_full_src_register tmp_src; 4901 struct tgsi_full_dst_register tmp_dst; 4902 const struct tgsi_full_dst_register *inst_dst; 4903 const struct tgsi_full_src_register *coord_src; 4904}; 4905 4906 4907/** 4908 * Do setup for handling texture swizzles or shadow compares. 4909 * \param unit the texture unit 4910 * \param inst the TGSI texture instruction 4911 * \param shadow_compare do shadow/depth comparison? 4912 * \param swz returns the swizzle info 4913 */ 4914static void 4915begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, 4916 unsigned unit, 4917 const struct tgsi_full_instruction *inst, 4918 boolean shadow_compare, 4919 struct tex_swizzle_info *swz) 4920{ 4921 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || 4922 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || 4923 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || 4924 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); 4925 4926 swz->shadow_compare = shadow_compare; 4927 swz->texture_target = inst->Texture.Texture; 4928 4929 if (swz->swizzled || shadow_compare) { 4930 /* Allocate temp register for the result of the SAMPLE instruction 4931 * and the source of the MOV/compare/swizzle instructions. 4932 */ 4933 unsigned tmp = get_temp_index(emit); 4934 swz->tmp_src = make_src_temp_reg(tmp); 4935 swz->tmp_dst = make_dst_temp_reg(tmp); 4936 4937 swz->unit = unit; 4938 } 4939 swz->inst_dst = &inst->Dst[0]; 4940 swz->coord_src = &inst->Src[0]; 4941} 4942 4943 4944/** 4945 * Returns the register to put the SAMPLE instruction results into. 4946 * This will either be the original instruction dst reg (if no swizzle 4947 * and no shadow comparison) or a temporary reg if there is a swizzle. 4948 */ 4949static const struct tgsi_full_dst_register * 4950get_tex_swizzle_dst(const struct tex_swizzle_info *swz) 4951{ 4952 return (swz->swizzled || swz->shadow_compare) 4953 ? &swz->tmp_dst : swz->inst_dst; 4954} 4955 4956 4957/** 4958 * This emits the MOV instruction that actually implements a texture swizzle 4959 * and/or shadow comparison. 4960 */ 4961static void 4962end_tex_swizzle(struct svga_shader_emitter_v10 *emit, 4963 const struct tex_swizzle_info *swz) 4964{ 4965 if (swz->shadow_compare) { 4966 /* Emit extra instructions to compare the fetched texel value against 4967 * a texture coordinate component. The result of the comparison 4968 * is 0.0 or 1.0. 4969 */ 4970 struct tgsi_full_src_register coord_src; 4971 struct tgsi_full_src_register texel_src = 4972 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); 4973 struct tgsi_full_src_register one = 4974 make_immediate_reg_float(emit, 1.0f); 4975 /* convert gallium comparison func to SVGA comparison func */ 4976 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; 4977 4978 assert(emit->unit == PIPE_SHADER_FRAGMENT); 4979 4980 switch (swz->texture_target) { 4981 case TGSI_TEXTURE_SHADOW2D: 4982 case TGSI_TEXTURE_SHADOWRECT: 4983 case TGSI_TEXTURE_SHADOW1D_ARRAY: 4984 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); 4985 break; 4986 case TGSI_TEXTURE_SHADOW1D: 4987 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y); 4988 break; 4989 case TGSI_TEXTURE_SHADOWCUBE: 4990 case TGSI_TEXTURE_SHADOW2D_ARRAY: 4991 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W); 4992 break; 4993 default: 4994 assert(!"Unexpected texture target in end_tex_swizzle()"); 4995 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); 4996 } 4997 4998 /* COMPARE tmp, coord, texel */ 4999 /* XXX it would seem that the texel and coord arguments should 5000 * be transposed here, but piglit tests indicate otherwise. 5001 */ 5002 emit_comparison(emit, compare_func, 5003 &swz->tmp_dst, &texel_src, &coord_src); 5004 5005 /* AND dest, tmp, {1.0} */ 5006 begin_emit_instruction(emit); 5007 emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); 5008 if (swz->swizzled) { 5009 emit_dst_register(emit, &swz->tmp_dst); 5010 } 5011 else { 5012 emit_dst_register(emit, swz->inst_dst); 5013 } 5014 emit_src_register(emit, &swz->tmp_src); 5015 emit_src_register(emit, &one); 5016 end_emit_instruction(emit); 5017 } 5018 5019 if (swz->swizzled) { 5020 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; 5021 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; 5022 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; 5023 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; 5024 unsigned writemask_0 = 0, writemask_1 = 0; 5025 boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type); 5026 5027 /* Swizzle w/out zero/one terms */ 5028 struct tgsi_full_src_register src_swizzled = 5029 swizzle_src(&swz->tmp_src, 5030 swz_r < PIPE_SWIZZLE_ZERO ? swz_r : PIPE_SWIZZLE_RED, 5031 swz_g < PIPE_SWIZZLE_ZERO ? swz_g : PIPE_SWIZZLE_GREEN, 5032 swz_b < PIPE_SWIZZLE_ZERO ? swz_b : PIPE_SWIZZLE_BLUE, 5033 swz_a < PIPE_SWIZZLE_ZERO ? swz_a : PIPE_SWIZZLE_ALPHA); 5034 5035 /* MOV dst, color(tmp).<swizzle> */ 5036 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 5037 swz->inst_dst, &src_swizzled, FALSE); 5038 5039 /* handle swizzle zero terms */ 5040 writemask_0 = (((swz_r == PIPE_SWIZZLE_ZERO) << 0) | 5041 ((swz_g == PIPE_SWIZZLE_ZERO) << 1) | 5042 ((swz_b == PIPE_SWIZZLE_ZERO) << 2) | 5043 ((swz_a == PIPE_SWIZZLE_ZERO) << 3)); 5044 5045 if (writemask_0) { 5046 struct tgsi_full_src_register zero = int_tex ? 5047 make_immediate_reg_int(emit, 0) : 5048 make_immediate_reg_float(emit, 0.0f); 5049 struct tgsi_full_dst_register dst = 5050 writemask_dst(swz->inst_dst, writemask_0); 5051 5052 /* MOV dst.writemask_0, {0,0,0,0} */ 5053 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 5054 &dst, &zero, FALSE); 5055 } 5056 5057 /* handle swizzle one terms */ 5058 writemask_1 = (((swz_r == PIPE_SWIZZLE_ONE) << 0) | 5059 ((swz_g == PIPE_SWIZZLE_ONE) << 1) | 5060 ((swz_b == PIPE_SWIZZLE_ONE) << 2) | 5061 ((swz_a == PIPE_SWIZZLE_ONE) << 3)); 5062 5063 if (writemask_1) { 5064 struct tgsi_full_src_register one = int_tex ? 5065 make_immediate_reg_int(emit, 1) : 5066 make_immediate_reg_float(emit, 1.0f); 5067 struct tgsi_full_dst_register dst = 5068 writemask_dst(swz->inst_dst, writemask_1); 5069 5070 /* MOV dst.writemask_1, {1,1,1,1} */ 5071 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE); 5072 } 5073 } 5074} 5075 5076 5077/** 5078 * Emit code for TGSI_OPCODE_SAMPLE instruction. 5079 */ 5080static boolean 5081emit_sample(struct svga_shader_emitter_v10 *emit, 5082 const struct tgsi_full_instruction *inst) 5083{ 5084 const unsigned resource_unit = inst->Src[1].Register.Index; 5085 const unsigned sampler_unit = inst->Src[2].Register.Index; 5086 struct tgsi_full_src_register coord; 5087 int offsets[3]; 5088 struct tex_swizzle_info swz_info; 5089 5090 begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); 5091 5092 get_texel_offsets(emit, inst, offsets); 5093 5094 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); 5095 5096 /* SAMPLE dst, coord(s0), resource, sampler */ 5097 begin_emit_instruction(emit); 5098 5099 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, 5100 inst->Instruction.Saturate, offsets); 5101 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5102 emit_src_register(emit, &coord); 5103 emit_resource_register(emit, resource_unit); 5104 emit_sampler_register(emit, sampler_unit); 5105 end_emit_instruction(emit); 5106 5107 end_tex_swizzle(emit, &swz_info); 5108 5109 free_temp_indexes(emit); 5110 5111 return TRUE; 5112} 5113 5114 5115/** 5116 * Check if a texture instruction is valid. 5117 * An example of an invalid texture instruction is doing shadow comparison 5118 * with an integer-valued texture. 5119 * If we detect an invalid texture instruction, we replace it with: 5120 * MOV dst, {1,1,1,1}; 5121 * \return TRUE if valid, FALSE if invalid. 5122 */ 5123static boolean 5124is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, 5125 const struct tgsi_full_instruction *inst) 5126{ 5127 const unsigned unit = inst->Src[1].Register.Index; 5128 const unsigned target = inst->Texture.Texture; 5129 boolean valid = TRUE; 5130 5131 if (tgsi_is_shadow_target(target) && 5132 is_integer_type(emit->key.tex[unit].return_type)) { 5133 debug_printf("Invalid SAMPLE_C with an integer texture!\n"); 5134 valid = FALSE; 5135 } 5136 /* XXX might check for other conditions in the future here */ 5137 5138 if (!valid) { 5139 /* emit a MOV dst, {1,1,1,1} instruction. */ 5140 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 5141 begin_emit_instruction(emit); 5142 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5143 emit_dst_register(emit, &inst->Dst[0]); 5144 emit_src_register(emit, &one); 5145 end_emit_instruction(emit); 5146 } 5147 5148 return valid; 5149} 5150 5151 5152/** 5153 * Emit code for TGSI_OPCODE_TEX (simple texture lookup) 5154 */ 5155static boolean 5156emit_tex(struct svga_shader_emitter_v10 *emit, 5157 const struct tgsi_full_instruction *inst) 5158{ 5159 const uint unit = inst->Src[1].Register.Index; 5160 unsigned target = inst->Texture.Texture; 5161 unsigned opcode; 5162 struct tgsi_full_src_register coord; 5163 int offsets[3]; 5164 struct tex_swizzle_info swz_info; 5165 5166 /* check that the sampler returns a float */ 5167 if (!is_valid_tex_instruction(emit, inst)) 5168 return TRUE; 5169 5170 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5171 5172 get_texel_offsets(emit, inst, offsets); 5173 5174 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5175 5176 /* SAMPLE dst, coord(s0), resource, sampler */ 5177 begin_emit_instruction(emit); 5178 5179 if (tgsi_is_shadow_target(target)) 5180 opcode = VGPU10_OPCODE_SAMPLE_C; 5181 else 5182 opcode = VGPU10_OPCODE_SAMPLE; 5183 5184 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5185 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5186 emit_src_register(emit, &coord); 5187 emit_resource_register(emit, unit); 5188 emit_sampler_register(emit, unit); 5189 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5190 emit_tex_compare_refcoord(emit, target, &coord); 5191 } 5192 end_emit_instruction(emit); 5193 5194 end_tex_swizzle(emit, &swz_info); 5195 5196 free_temp_indexes(emit); 5197 5198 return TRUE; 5199} 5200 5201 5202/** 5203 * Emit code for TGSI_OPCODE_TXP (projective texture) 5204 */ 5205static boolean 5206emit_txp(struct svga_shader_emitter_v10 *emit, 5207 const struct tgsi_full_instruction *inst) 5208{ 5209 const uint unit = inst->Src[1].Register.Index; 5210 unsigned target = inst->Texture.Texture; 5211 unsigned opcode; 5212 int offsets[3]; 5213 unsigned tmp = get_temp_index(emit); 5214 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 5215 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 5216 struct tgsi_full_src_register src0_wwww = 5217 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5218 struct tgsi_full_src_register coord; 5219 struct tex_swizzle_info swz_info; 5220 5221 /* check that the sampler returns a float */ 5222 if (!is_valid_tex_instruction(emit, inst)) 5223 return TRUE; 5224 5225 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5226 5227 get_texel_offsets(emit, inst, offsets); 5228 5229 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5230 5231 /* DIV tmp, coord, coord.wwww */ 5232 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, 5233 &coord, &src0_wwww, FALSE); 5234 5235 /* SAMPLE dst, coord(tmp), resource, sampler */ 5236 begin_emit_instruction(emit); 5237 5238 if (tgsi_is_shadow_target(target)) 5239 opcode = VGPU10_OPCODE_SAMPLE_C; 5240 else 5241 opcode = VGPU10_OPCODE_SAMPLE; 5242 5243 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5244 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5245 emit_src_register(emit, &tmp_src); /* projected coord */ 5246 emit_resource_register(emit, unit); 5247 emit_sampler_register(emit, unit); 5248 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5249 emit_tex_compare_refcoord(emit, target, &tmp_src); 5250 } 5251 end_emit_instruction(emit); 5252 5253 end_tex_swizzle(emit, &swz_info); 5254 5255 free_temp_indexes(emit); 5256 5257 return TRUE; 5258} 5259 5260 5261/* 5262 * Emit code for TGSI_OPCODE_XPD instruction. 5263 */ 5264static boolean 5265emit_xpd(struct svga_shader_emitter_v10 *emit, 5266 const struct tgsi_full_instruction *inst) 5267{ 5268 /* dst.x = src0.y * src1.z - src1.y * src0.z 5269 * dst.y = src0.z * src1.x - src1.z * src0.x 5270 * dst.z = src0.x * src1.y - src1.x * src0.y 5271 * dst.w = 1 5272 */ 5273 struct tgsi_full_src_register s0_xxxx = 5274 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 5275 struct tgsi_full_src_register s0_yyyy = 5276 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 5277 struct tgsi_full_src_register s0_zzzz = 5278 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 5279 5280 struct tgsi_full_src_register s1_xxxx = 5281 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 5282 struct tgsi_full_src_register s1_yyyy = 5283 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 5284 struct tgsi_full_src_register s1_zzzz = 5285 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z); 5286 5287 unsigned tmp1 = get_temp_index(emit); 5288 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 5289 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 5290 5291 unsigned tmp2 = get_temp_index(emit); 5292 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 5293 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 5294 struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src); 5295 5296 unsigned tmp3 = get_temp_index(emit); 5297 struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3); 5298 struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3); 5299 struct tgsi_full_dst_register tmp3_dst_x = 5300 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X); 5301 struct tgsi_full_dst_register tmp3_dst_y = 5302 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y); 5303 struct tgsi_full_dst_register tmp3_dst_z = 5304 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z); 5305 struct tgsi_full_dst_register tmp3_dst_w = 5306 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W); 5307 5308 /* Note: we put all the intermediate computations into tmp3 in case 5309 * the XPD dest register is that same as one of the src regs (in which 5310 * case we could clobber a src reg before we're done with it) . 5311 * 5312 * Note: we could get by with just one temp register instead of three 5313 * since we're doing scalar operations and there's enough room in one 5314 * temp for everything. 5315 */ 5316 5317 /* MUL tmp1, src0.y, src1.z */ 5318 /* MUL tmp2, src1.y, src0.z */ 5319 /* ADD tmp3.x, tmp1, -tmp2 */ 5320 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 5321 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, 5322 &s0_yyyy, &s1_zzzz, FALSE); 5323 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, 5324 &s1_yyyy, &s0_zzzz, FALSE); 5325 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x, 5326 &tmp1_src, &neg_tmp2_src, FALSE); 5327 } 5328 5329 /* MUL tmp1, src0.z, src1.x */ 5330 /* MUL tmp2, src1.z, src0.x */ 5331 /* ADD tmp3.y, tmp1, -tmp2 */ 5332 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 5333 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz, 5334 &s1_xxxx, FALSE); 5335 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz, 5336 &s0_xxxx, FALSE); 5337 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y, 5338 &tmp1_src, &neg_tmp2_src, FALSE); 5339 } 5340 5341 /* MUL tmp1, src0.x, src1.y */ 5342 /* MUL tmp2, src1.x, src0.y */ 5343 /* ADD tmp3.z, tmp1, -tmp2 */ 5344 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 5345 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx, 5346 &s1_yyyy, FALSE); 5347 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx, 5348 &s0_yyyy, FALSE); 5349 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z, 5350 &tmp1_src, &neg_tmp2_src, FALSE); 5351 } 5352 5353 /* MOV tmp3.w, 1.0 */ 5354 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 5355 struct tgsi_full_src_register one = 5356 make_immediate_reg_float(emit, 1.0f); 5357 5358 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE); 5359 } 5360 5361 /* MOV dst, tmp3 */ 5362 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src, 5363 inst->Instruction.Saturate); 5364 5365 5366 free_temp_indexes(emit); 5367 5368 return TRUE; 5369} 5370 5371 5372/** 5373 * Emit code for TGSI_OPCODE_TXD (explicit derivatives) 5374 */ 5375static boolean 5376emit_txd(struct svga_shader_emitter_v10 *emit, 5377 const struct tgsi_full_instruction *inst) 5378{ 5379 const uint unit = inst->Src[3].Register.Index; 5380 unsigned target = inst->Texture.Texture; 5381 int offsets[3]; 5382 struct tgsi_full_src_register coord; 5383 struct tex_swizzle_info swz_info; 5384 5385 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5386 &swz_info); 5387 5388 get_texel_offsets(emit, inst, offsets); 5389 5390 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5391 5392 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ 5393 begin_emit_instruction(emit); 5394 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, 5395 inst->Instruction.Saturate, offsets); 5396 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5397 emit_src_register(emit, &coord); 5398 emit_resource_register(emit, unit); 5399 emit_sampler_register(emit, unit); 5400 emit_src_register(emit, &inst->Src[1]); /* Xderiv */ 5401 emit_src_register(emit, &inst->Src[2]); /* Yderiv */ 5402 end_emit_instruction(emit); 5403 5404 end_tex_swizzle(emit, &swz_info); 5405 5406 free_temp_indexes(emit); 5407 5408 return TRUE; 5409} 5410 5411 5412/** 5413 * Emit code for TGSI_OPCODE_TXF (texel fetch) 5414 */ 5415static boolean 5416emit_txf(struct svga_shader_emitter_v10 *emit, 5417 const struct tgsi_full_instruction *inst) 5418{ 5419 const uint unit = inst->Src[1].Register.Index; 5420 const unsigned msaa = emit->key.tex[unit].texture_msaa; 5421 int offsets[3]; 5422 struct tex_swizzle_info swz_info; 5423 5424 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5425 5426 get_texel_offsets(emit, inst, offsets); 5427 5428 if (msaa) { 5429 /* Fetch one sample from an MSAA texture */ 5430 struct tgsi_full_src_register sampleIndex = 5431 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5432 /* LD_MS dst, coord(s0), resource, sampleIndex */ 5433 begin_emit_instruction(emit); 5434 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, 5435 inst->Instruction.Saturate, offsets); 5436 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5437 emit_src_register(emit, &inst->Src[0]); 5438 emit_resource_register(emit, unit); 5439 emit_src_register(emit, &sampleIndex); 5440 end_emit_instruction(emit); 5441 } 5442 else { 5443 /* Fetch one texel specified by integer coordinate */ 5444 /* LD dst, coord(s0), resource */ 5445 begin_emit_instruction(emit); 5446 emit_sample_opcode(emit, VGPU10_OPCODE_LD, 5447 inst->Instruction.Saturate, offsets); 5448 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5449 emit_src_register(emit, &inst->Src[0]); 5450 emit_resource_register(emit, unit); 5451 end_emit_instruction(emit); 5452 } 5453 5454 end_tex_swizzle(emit, &swz_info); 5455 5456 free_temp_indexes(emit); 5457 5458 return TRUE; 5459} 5460 5461 5462/** 5463 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) 5464 * or TGSI_OPCODE_TXB2 (for cube shadow maps). 5465 */ 5466static boolean 5467emit_txl_txb(struct svga_shader_emitter_v10 *emit, 5468 const struct tgsi_full_instruction *inst) 5469{ 5470 unsigned target = inst->Texture.Texture; 5471 unsigned opcode, unit; 5472 int offsets[3]; 5473 struct tgsi_full_src_register coord, lod_bias; 5474 struct tex_swizzle_info swz_info; 5475 5476 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || 5477 inst->Instruction.Opcode == TGSI_OPCODE_TXB || 5478 inst->Instruction.Opcode == TGSI_OPCODE_TXB2); 5479 5480 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { 5481 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 5482 unit = inst->Src[2].Register.Index; 5483 } 5484 else { 5485 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5486 unit = inst->Src[1].Register.Index; 5487 } 5488 5489 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5490 &swz_info); 5491 5492 get_texel_offsets(emit, inst, offsets); 5493 5494 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5495 5496 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ 5497 begin_emit_instruction(emit); 5498 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { 5499 opcode = VGPU10_OPCODE_SAMPLE_L; 5500 } 5501 else { 5502 opcode = VGPU10_OPCODE_SAMPLE_B; 5503 } 5504 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5505 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5506 emit_src_register(emit, &coord); 5507 emit_resource_register(emit, unit); 5508 emit_sampler_register(emit, unit); 5509 emit_src_register(emit, &lod_bias); 5510 end_emit_instruction(emit); 5511 5512 end_tex_swizzle(emit, &swz_info); 5513 5514 free_temp_indexes(emit); 5515 5516 return TRUE; 5517} 5518 5519 5520/** 5521 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. 5522 */ 5523static boolean 5524emit_txq(struct svga_shader_emitter_v10 *emit, 5525 const struct tgsi_full_instruction *inst) 5526{ 5527 const uint unit = inst->Src[1].Register.Index; 5528 5529 if (emit->key.tex[unit].texture_target == PIPE_BUFFER) { 5530 /* RESINFO does not support querying texture buffers, so we instead 5531 * store texture buffer sizes in shader constants, then copy them to 5532 * implement TXQ instead of emitting RESINFO. 5533 * MOV dst, const[texture_buffer_size_index[unit]] 5534 */ 5535 struct tgsi_full_src_register size_src = 5536 make_src_const_reg(emit->texture_buffer_size_index[unit]); 5537 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src, 5538 FALSE); 5539 } else { 5540 /* RESINFO dst, srcMipLevel, resource */ 5541 begin_emit_instruction(emit); 5542 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); 5543 emit_dst_register(emit, &inst->Dst[0]); 5544 emit_src_register(emit, &inst->Src[0]); 5545 emit_resource_register(emit, unit); 5546 end_emit_instruction(emit); 5547 } 5548 5549 free_temp_indexes(emit); 5550 5551 return TRUE; 5552} 5553 5554 5555/** 5556 * Emit a simple instruction (like ADD, MUL, MIN, etc). 5557 */ 5558static boolean 5559emit_simple(struct svga_shader_emitter_v10 *emit, 5560 const struct tgsi_full_instruction *inst) 5561{ 5562 const unsigned opcode = inst->Instruction.Opcode; 5563 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5564 unsigned i; 5565 5566 begin_emit_instruction(emit); 5567 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5568 inst->Instruction.Saturate); 5569 for (i = 0; i < op->num_dst; i++) { 5570 emit_dst_register(emit, &inst->Dst[i]); 5571 } 5572 for (i = 0; i < op->num_src; i++) { 5573 emit_src_register(emit, &inst->Src[i]); 5574 } 5575 end_emit_instruction(emit); 5576 5577 return TRUE; 5578} 5579 5580 5581/** 5582 * We only special case the MOV instruction to try to detect constant 5583 * color writes in the fragment shader. 5584 */ 5585static boolean 5586emit_mov(struct svga_shader_emitter_v10 *emit, 5587 const struct tgsi_full_instruction *inst) 5588{ 5589 const struct tgsi_full_src_register *src = &inst->Src[0]; 5590 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 5591 5592 if (emit->unit == PIPE_SHADER_FRAGMENT && 5593 dst->Register.File == TGSI_FILE_OUTPUT && 5594 dst->Register.Index == 0 && 5595 src->Register.File == TGSI_FILE_CONSTANT && 5596 !src->Register.Indirect) { 5597 emit->constant_color_output = TRUE; 5598 } 5599 5600 return emit_simple(emit, inst); 5601} 5602 5603 5604/** 5605 * Emit a simple VGPU10 instruction which writes to multiple dest registers, 5606 * where TGSI only uses one dest register. 5607 */ 5608static boolean 5609emit_simple_1dst(struct svga_shader_emitter_v10 *emit, 5610 const struct tgsi_full_instruction *inst, 5611 unsigned dst_count, 5612 unsigned dst_index) 5613{ 5614 const unsigned opcode = inst->Instruction.Opcode; 5615 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5616 unsigned i; 5617 5618 begin_emit_instruction(emit); 5619 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5620 inst->Instruction.Saturate); 5621 5622 for (i = 0; i < dst_count; i++) { 5623 if (i == dst_index) { 5624 emit_dst_register(emit, &inst->Dst[0]); 5625 } else { 5626 emit_null_dst_register(emit); 5627 } 5628 } 5629 5630 for (i = 0; i < op->num_src; i++) { 5631 emit_src_register(emit, &inst->Src[i]); 5632 } 5633 end_emit_instruction(emit); 5634 5635 return TRUE; 5636} 5637 5638 5639/** 5640 * Translate a single TGSI instruction to VGPU10. 5641 */ 5642static boolean 5643emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, 5644 unsigned inst_number, 5645 const struct tgsi_full_instruction *inst) 5646{ 5647 const unsigned opcode = inst->Instruction.Opcode; 5648 5649 switch (opcode) { 5650 case TGSI_OPCODE_ADD: 5651 case TGSI_OPCODE_AND: 5652 case TGSI_OPCODE_BGNLOOP: 5653 case TGSI_OPCODE_BRK: 5654 case TGSI_OPCODE_CEIL: 5655 case TGSI_OPCODE_CONT: 5656 case TGSI_OPCODE_DDX: 5657 case TGSI_OPCODE_DDY: 5658 case TGSI_OPCODE_DIV: 5659 case TGSI_OPCODE_DP2: 5660 case TGSI_OPCODE_DP3: 5661 case TGSI_OPCODE_DP4: 5662 case TGSI_OPCODE_ELSE: 5663 case TGSI_OPCODE_ENDIF: 5664 case TGSI_OPCODE_ENDLOOP: 5665 case TGSI_OPCODE_ENDSUB: 5666 case TGSI_OPCODE_F2I: 5667 case TGSI_OPCODE_F2U: 5668 case TGSI_OPCODE_FLR: 5669 case TGSI_OPCODE_FRC: 5670 case TGSI_OPCODE_FSEQ: 5671 case TGSI_OPCODE_FSGE: 5672 case TGSI_OPCODE_FSLT: 5673 case TGSI_OPCODE_FSNE: 5674 case TGSI_OPCODE_I2F: 5675 case TGSI_OPCODE_IMAX: 5676 case TGSI_OPCODE_IMIN: 5677 case TGSI_OPCODE_INEG: 5678 case TGSI_OPCODE_ISGE: 5679 case TGSI_OPCODE_ISHR: 5680 case TGSI_OPCODE_ISLT: 5681 case TGSI_OPCODE_MAD: 5682 case TGSI_OPCODE_MAX: 5683 case TGSI_OPCODE_MIN: 5684 case TGSI_OPCODE_MUL: 5685 case TGSI_OPCODE_NOP: 5686 case TGSI_OPCODE_NOT: 5687 case TGSI_OPCODE_OR: 5688 case TGSI_OPCODE_RET: 5689 case TGSI_OPCODE_UADD: 5690 case TGSI_OPCODE_USEQ: 5691 case TGSI_OPCODE_USGE: 5692 case TGSI_OPCODE_USLT: 5693 case TGSI_OPCODE_UMIN: 5694 case TGSI_OPCODE_UMAD: 5695 case TGSI_OPCODE_UMAX: 5696 case TGSI_OPCODE_ROUND: 5697 case TGSI_OPCODE_SQRT: 5698 case TGSI_OPCODE_SHL: 5699 case TGSI_OPCODE_TRUNC: 5700 case TGSI_OPCODE_U2F: 5701 case TGSI_OPCODE_UCMP: 5702 case TGSI_OPCODE_USHR: 5703 case TGSI_OPCODE_USNE: 5704 case TGSI_OPCODE_XOR: 5705 /* simple instructions */ 5706 return emit_simple(emit, inst); 5707 5708 case TGSI_OPCODE_MOV: 5709 return emit_mov(emit, inst); 5710 case TGSI_OPCODE_EMIT: 5711 return emit_vertex(emit, inst); 5712 case TGSI_OPCODE_ENDPRIM: 5713 return emit_endprim(emit, inst); 5714 case TGSI_OPCODE_ABS: 5715 return emit_abs(emit, inst); 5716 case TGSI_OPCODE_IABS: 5717 return emit_iabs(emit, inst); 5718 case TGSI_OPCODE_ARL: 5719 /* fall-through */ 5720 case TGSI_OPCODE_UARL: 5721 return emit_arl_uarl(emit, inst); 5722 case TGSI_OPCODE_BGNSUB: 5723 /* no-op */ 5724 return TRUE; 5725 case TGSI_OPCODE_CAL: 5726 return emit_cal(emit, inst); 5727 case TGSI_OPCODE_CMP: 5728 return emit_cmp(emit, inst); 5729 case TGSI_OPCODE_COS: 5730 return emit_sincos(emit, inst); 5731 case TGSI_OPCODE_DP2A: 5732 return emit_dp2a(emit, inst); 5733 case TGSI_OPCODE_DPH: 5734 return emit_dph(emit, inst); 5735 case TGSI_OPCODE_DST: 5736 return emit_dst(emit, inst); 5737 case TGSI_OPCODE_EX2: 5738 return emit_ex2(emit, inst); 5739 case TGSI_OPCODE_EXP: 5740 return emit_exp(emit, inst); 5741 case TGSI_OPCODE_IF: 5742 return emit_if(emit, inst); 5743 case TGSI_OPCODE_KILL: 5744 return emit_kill(emit, inst); 5745 case TGSI_OPCODE_KILL_IF: 5746 return emit_kill_if(emit, inst); 5747 case TGSI_OPCODE_LG2: 5748 return emit_lg2(emit, inst); 5749 case TGSI_OPCODE_LIT: 5750 return emit_lit(emit, inst); 5751 case TGSI_OPCODE_LOG: 5752 return emit_log(emit, inst); 5753 case TGSI_OPCODE_LRP: 5754 return emit_lrp(emit, inst); 5755 case TGSI_OPCODE_POW: 5756 return emit_pow(emit, inst); 5757 case TGSI_OPCODE_RCP: 5758 return emit_rcp(emit, inst); 5759 case TGSI_OPCODE_RSQ: 5760 return emit_rsq(emit, inst); 5761 case TGSI_OPCODE_SAMPLE: 5762 return emit_sample(emit, inst); 5763 case TGSI_OPCODE_SCS: 5764 return emit_scs(emit, inst); 5765 case TGSI_OPCODE_SEQ: 5766 return emit_seq(emit, inst); 5767 case TGSI_OPCODE_SGE: 5768 return emit_sge(emit, inst); 5769 case TGSI_OPCODE_SGT: 5770 return emit_sgt(emit, inst); 5771 case TGSI_OPCODE_SIN: 5772 return emit_sincos(emit, inst); 5773 case TGSI_OPCODE_SLE: 5774 return emit_sle(emit, inst); 5775 case TGSI_OPCODE_SLT: 5776 return emit_slt(emit, inst); 5777 case TGSI_OPCODE_SNE: 5778 return emit_sne(emit, inst); 5779 case TGSI_OPCODE_SSG: 5780 return emit_ssg(emit, inst); 5781 case TGSI_OPCODE_ISSG: 5782 return emit_issg(emit, inst); 5783 case TGSI_OPCODE_SUB: 5784 return emit_sub(emit, inst); 5785 case TGSI_OPCODE_TEX: 5786 return emit_tex(emit, inst); 5787 case TGSI_OPCODE_TXP: 5788 return emit_txp(emit, inst); 5789 case TGSI_OPCODE_TXB: 5790 case TGSI_OPCODE_TXB2: 5791 case TGSI_OPCODE_TXL: 5792 return emit_txl_txb(emit, inst); 5793 case TGSI_OPCODE_TXD: 5794 return emit_txd(emit, inst); 5795 case TGSI_OPCODE_TXF: 5796 return emit_txf(emit, inst); 5797 case TGSI_OPCODE_TXQ: 5798 return emit_txq(emit, inst); 5799 case TGSI_OPCODE_UIF: 5800 return emit_if(emit, inst); 5801 case TGSI_OPCODE_XPD: 5802 return emit_xpd(emit, inst); 5803 case TGSI_OPCODE_UMUL_HI: 5804 case TGSI_OPCODE_IMUL_HI: 5805 case TGSI_OPCODE_UDIV: 5806 case TGSI_OPCODE_IDIV: 5807 /* These cases use only the FIRST of two destination registers */ 5808 return emit_simple_1dst(emit, inst, 2, 0); 5809 case TGSI_OPCODE_UMUL: 5810 case TGSI_OPCODE_UMOD: 5811 case TGSI_OPCODE_MOD: 5812 /* These cases use only the SECOND of two destination registers */ 5813 return emit_simple_1dst(emit, inst, 2, 1); 5814 case TGSI_OPCODE_END: 5815 if (!emit_post_helpers(emit)) 5816 return FALSE; 5817 return emit_simple(emit, inst); 5818 5819 default: 5820 debug_printf("Unimplemented tgsi instruction %s\n", 5821 tgsi_get_opcode_name(opcode)); 5822 return FALSE; 5823 } 5824 5825 return TRUE; 5826} 5827 5828 5829/** 5830 * Emit the extra instructions to adjust the vertex position. 5831 * There are two possible adjustments: 5832 * 1. Converting from Gallium to VGPU10 coordinate space by applying the 5833 * "prescale" and "pretranslate" values. 5834 * 2. Undoing the viewport transformation when we use the swtnl/draw path. 5835 * \param vs_pos_tmp_index which temporary register contains the vertex pos. 5836 */ 5837static void 5838emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, 5839 unsigned vs_pos_tmp_index) 5840{ 5841 struct tgsi_full_src_register tmp_pos_src; 5842 struct tgsi_full_dst_register pos_dst; 5843 5844 /* Don't bother to emit any extra vertex instructions if vertex position is 5845 * not written out 5846 */ 5847 if (emit->vposition.out_index == INVALID_INDEX) 5848 return; 5849 5850 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index); 5851 pos_dst = make_dst_output_reg(emit->vposition.out_index); 5852 5853 /* If non-adjusted vertex position register index 5854 * is valid, copy the vertex position from the temporary 5855 * vertex position register before it is modified by the 5856 * prescale computation. 5857 */ 5858 if (emit->vposition.so_index != INVALID_INDEX) { 5859 struct tgsi_full_dst_register pos_so_dst = 5860 make_dst_output_reg(emit->vposition.so_index); 5861 5862 /* MOV pos_so, tmp_pos */ 5863 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, 5864 &tmp_pos_src, FALSE); 5865 } 5866 5867 if (emit->vposition.need_prescale) { 5868 /* This code adjusts the vertex position to match the VGPU10 convention. 5869 * If p is the position computed by the shader (usually by applying the 5870 * modelview and projection matrices), the new position q is computed by: 5871 * 5872 * q.x = p.w * trans.x + p.x * scale.x 5873 * q.y = p.w * trans.y + p.y * scale.y 5874 * q.z = p.w * trans.z + p.z * scale.z; 5875 * q.w = p.w * trans.w + p.w; 5876 */ 5877 struct tgsi_full_src_register tmp_pos_src_w = 5878 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5879 struct tgsi_full_dst_register tmp_pos_dst = 5880 make_dst_temp_reg(vs_pos_tmp_index); 5881 struct tgsi_full_dst_register tmp_pos_dst_xyz = 5882 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ); 5883 5884 struct tgsi_full_src_register prescale_scale = 5885 make_src_const_reg(emit->vposition.prescale_scale_index); 5886 struct tgsi_full_src_register prescale_trans = 5887 make_src_const_reg(emit->vposition.prescale_trans_index); 5888 5889 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */ 5890 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz, 5891 &tmp_pos_src, &prescale_scale, FALSE); 5892 5893 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */ 5894 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w, 5895 &prescale_trans, &tmp_pos_src, FALSE); 5896 } 5897 else if (emit->key.vs.undo_viewport) { 5898 /* This code computes the final vertex position from the temporary 5899 * vertex position by undoing the viewport transformation and the 5900 * divide-by-W operation (we convert window coords back to clip coords). 5901 * This is needed when we use the 'draw' module for fallbacks. 5902 * If p is the temp pos in window coords, then the NDC coord q is: 5903 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w 5904 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w 5905 * q.z = p.z * p.w 5906 * q.w = p.w 5907 * CONST[vs_viewport_index] contains: 5908 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans } 5909 */ 5910 struct tgsi_full_dst_register tmp_pos_dst = 5911 make_dst_temp_reg(vs_pos_tmp_index); 5912 struct tgsi_full_dst_register tmp_pos_dst_xy = 5913 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY); 5914 struct tgsi_full_src_register tmp_pos_src_wwww = 5915 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5916 5917 struct tgsi_full_dst_register pos_dst_xyz = 5918 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ); 5919 struct tgsi_full_dst_register pos_dst_w = 5920 writemask_dst(&pos_dst, TGSI_WRITEMASK_W); 5921 5922 struct tgsi_full_src_register vp_xyzw = 5923 make_src_const_reg(emit->vs.viewport_index); 5924 struct tgsi_full_src_register vp_zwww = 5925 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, 5926 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 5927 5928 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */ 5929 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy, 5930 &tmp_pos_src, &vp_zwww, FALSE); 5931 5932 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */ 5933 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy, 5934 &tmp_pos_src, &vp_xyzw, FALSE); 5935 5936 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */ 5937 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz, 5938 &tmp_pos_src, &tmp_pos_src_wwww, FALSE); 5939 5940 /* MOV pos.w, tmp_pos.w */ 5941 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, 5942 &tmp_pos_src, FALSE); 5943 } 5944 else if (vs_pos_tmp_index != INVALID_INDEX) { 5945 /* This code is to handle the case where the temporary vertex 5946 * position register is created when the vertex shader has stream 5947 * output and prescale is disabled because rasterization is to be 5948 * discarded. 5949 */ 5950 struct tgsi_full_dst_register pos_dst = 5951 make_dst_output_reg(emit->vposition.out_index); 5952 5953 /* MOV pos, tmp_pos */ 5954 begin_emit_instruction(emit); 5955 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5956 emit_dst_register(emit, &pos_dst); 5957 emit_src_register(emit, &tmp_pos_src); 5958 end_emit_instruction(emit); 5959 } 5960} 5961 5962static void 5963emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) 5964{ 5965 if (emit->clip_mode == CLIP_DISTANCE) { 5966 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */ 5967 emit_clip_distance_instructions(emit); 5968 5969 } else if (emit->clip_mode == CLIP_VERTEX) { 5970 /* Convert TGSI CLIPVERTEX to CLIPDIST */ 5971 emit_clip_vertex_instructions(emit); 5972 } 5973 5974 /** 5975 * Emit vertex position and take care of legacy user planes only if 5976 * there is a valid vertex position register index. 5977 * This is to take care of the case 5978 * where the shader doesn't output vertex position. Then in 5979 * this case, don't bother to emit more vertex instructions. 5980 */ 5981 if (emit->vposition.out_index == INVALID_INDEX) 5982 return; 5983 5984 /** 5985 * Emit per-vertex clipping instructions for legacy user defined clip planes. 5986 * NOTE: we must emit the clip distance instructions before the 5987 * emit_vpos_instructions() call since the later function will change 5988 * the TEMP[vs_pos_tmp_index] value. 5989 */ 5990 if (emit->clip_mode == CLIP_LEGACY) { 5991 /* Emit CLIPDIST for legacy user defined clip planes */ 5992 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index); 5993 } 5994} 5995 5996 5997/** 5998 * Emit extra per-vertex instructions. This includes clip-coordinate 5999 * space conversion and computing clip distances. This is called for 6000 * each GS emit-vertex instruction and at the end of VS translation. 6001 */ 6002static void 6003emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) 6004{ 6005 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; 6006 6007 /* Emit clipping instructions based on clipping mode */ 6008 emit_clipping_instructions(emit); 6009 6010 /** 6011 * Reset the temporary vertex position register index 6012 * so that emit_dst_register() will use the real vertex position output 6013 */ 6014 emit->vposition.tmp_index = INVALID_INDEX; 6015 6016 /* Emit vertex position instructions */ 6017 emit_vpos_instructions(emit, vs_pos_tmp_index); 6018 6019 /* Restore original vposition.tmp_index value for the next GS vertex. 6020 * It doesn't matter for VS. 6021 */ 6022 emit->vposition.tmp_index = vs_pos_tmp_index; 6023} 6024 6025/** 6026 * Translate the TGSI_OPCODE_EMIT GS instruction. 6027 */ 6028static boolean 6029emit_vertex(struct svga_shader_emitter_v10 *emit, 6030 const struct tgsi_full_instruction *inst) 6031{ 6032 unsigned ret = TRUE; 6033 6034 assert(emit->unit == PIPE_SHADER_GEOMETRY); 6035 6036 emit_vertex_instructions(emit); 6037 6038 /* We can't use emit_simple() because the TGSI instruction has one 6039 * operand (vertex stream number) which we must ignore for VGPU10. 6040 */ 6041 begin_emit_instruction(emit); 6042 emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); 6043 end_emit_instruction(emit); 6044 6045 return ret; 6046} 6047 6048 6049/** 6050 * Emit the extra code to convert from VGPU10's boolean front-face 6051 * register to TGSI's signed front-face register. 6052 * 6053 * TODO: Make temporary front-face register a scalar. 6054 */ 6055static void 6056emit_frontface_instructions(struct svga_shader_emitter_v10 *emit) 6057{ 6058 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6059 6060 if (emit->fs.face_input_index != INVALID_INDEX) { 6061 /* convert vgpu10 boolean face register to gallium +/-1 value */ 6062 struct tgsi_full_dst_register tmp_dst = 6063 make_dst_temp_reg(emit->fs.face_tmp_index); 6064 struct tgsi_full_src_register one = 6065 make_immediate_reg_float(emit, 1.0f); 6066 struct tgsi_full_src_register neg_one = 6067 make_immediate_reg_float(emit, -1.0f); 6068 6069 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */ 6070 begin_emit_instruction(emit); 6071 emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE); 6072 emit_dst_register(emit, &tmp_dst); 6073 emit_face_register(emit); 6074 emit_src_register(emit, &one); 6075 emit_src_register(emit, &neg_one); 6076 end_emit_instruction(emit); 6077 } 6078} 6079 6080 6081/** 6082 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w. 6083 */ 6084static void 6085emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) 6086{ 6087 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6088 6089 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 6090 struct tgsi_full_dst_register tmp_dst = 6091 make_dst_temp_reg(emit->fs.fragcoord_tmp_index); 6092 struct tgsi_full_dst_register tmp_dst_xyz = 6093 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ); 6094 struct tgsi_full_dst_register tmp_dst_w = 6095 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 6096 struct tgsi_full_src_register one = 6097 make_immediate_reg_float(emit, 1.0f); 6098 struct tgsi_full_src_register fragcoord = 6099 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index); 6100 6101 /* save the input index */ 6102 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index; 6103 /* set to invalid to prevent substitution in emit_src_register() */ 6104 emit->fs.fragcoord_input_index = INVALID_INDEX; 6105 6106 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */ 6107 begin_emit_instruction(emit); 6108 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 6109 emit_dst_register(emit, &tmp_dst_xyz); 6110 emit_src_register(emit, &fragcoord); 6111 end_emit_instruction(emit); 6112 6113 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */ 6114 begin_emit_instruction(emit); 6115 emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE); 6116 emit_dst_register(emit, &tmp_dst_w); 6117 emit_src_register(emit, &one); 6118 emit_src_register(emit, &fragcoord); 6119 end_emit_instruction(emit); 6120 6121 /* restore saved value */ 6122 emit->fs.fragcoord_input_index = fragcoord_input_index; 6123 } 6124} 6125 6126 6127/** 6128 * Emit extra instructions to adjust VS inputs/attributes. This can 6129 * mean casting a vertex attribute from int to float or setting the 6130 * W component to 1, or both. 6131 */ 6132static void 6133emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) 6134{ 6135 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1; 6136 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof; 6137 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof; 6138 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra; 6139 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm; 6140 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled; 6141 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled; 6142 6143 unsigned adjust_mask = (save_w_1_mask | 6144 save_itof_mask | 6145 save_utof_mask | 6146 save_is_bgra_mask | 6147 save_puint_to_snorm_mask | 6148 save_puint_to_uscaled_mask | 6149 save_puint_to_sscaled_mask); 6150 6151 assert(emit->unit == PIPE_SHADER_VERTEX); 6152 6153 if (adjust_mask) { 6154 struct tgsi_full_src_register one = 6155 make_immediate_reg_float(emit, 1.0f); 6156 6157 struct tgsi_full_src_register one_int = 6158 make_immediate_reg_int(emit, 1); 6159 6160 /* We need to turn off these bitmasks while emitting the 6161 * instructions below, then restore them afterward. 6162 */ 6163 emit->key.vs.adjust_attrib_w_1 = 0; 6164 emit->key.vs.adjust_attrib_itof = 0; 6165 emit->key.vs.adjust_attrib_utof = 0; 6166 emit->key.vs.attrib_is_bgra = 0; 6167 emit->key.vs.attrib_puint_to_snorm = 0; 6168 emit->key.vs.attrib_puint_to_uscaled = 0; 6169 emit->key.vs.attrib_puint_to_sscaled = 0; 6170 6171 while (adjust_mask) { 6172 unsigned index = u_bit_scan(&adjust_mask); 6173 unsigned tmp = emit->vs.adjusted_input[index]; 6174 struct tgsi_full_src_register input_src = 6175 make_src_reg(TGSI_FILE_INPUT, index); 6176 6177 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6178 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6179 struct tgsi_full_dst_register tmp_dst_w = 6180 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 6181 6182 /* ITOF/UTOF/MOV tmp, input[index] */ 6183 if (save_itof_mask & (1 << index)) { 6184 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, 6185 &tmp_dst, &input_src, FALSE); 6186 } 6187 else if (save_utof_mask & (1 << index)) { 6188 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF, 6189 &tmp_dst, &input_src, FALSE); 6190 } 6191 else if (save_puint_to_snorm_mask & (1 << index)) { 6192 emit_puint_to_snorm(emit, &tmp_dst, &input_src); 6193 } 6194 else if (save_puint_to_uscaled_mask & (1 << index)) { 6195 emit_puint_to_uscaled(emit, &tmp_dst, &input_src); 6196 } 6197 else if (save_puint_to_sscaled_mask & (1 << index)) { 6198 emit_puint_to_sscaled(emit, &tmp_dst, &input_src); 6199 } 6200 else { 6201 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index)); 6202 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6203 &tmp_dst, &input_src, FALSE); 6204 } 6205 6206 if (save_is_bgra_mask & (1 << index)) { 6207 emit_swap_r_b(emit, &tmp_dst, &tmp_src); 6208 } 6209 6210 if (save_w_1_mask & (1 << index)) { 6211 /* MOV tmp.w, 1.0 */ 6212 if (emit->key.vs.attrib_is_pure_int & (1 << index)) { 6213 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6214 &tmp_dst_w, &one_int, FALSE); 6215 } 6216 else { 6217 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6218 &tmp_dst_w, &one, FALSE); 6219 } 6220 } 6221 } 6222 6223 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask; 6224 emit->key.vs.adjust_attrib_itof = save_itof_mask; 6225 emit->key.vs.adjust_attrib_utof = save_utof_mask; 6226 emit->key.vs.attrib_is_bgra = save_is_bgra_mask; 6227 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask; 6228 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask; 6229 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask; 6230 } 6231} 6232 6233 6234/** 6235 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed 6236 * to implement some instructions. We pre-allocate those values here 6237 * in the immediate constant buffer. 6238 */ 6239static void 6240alloc_common_immediates(struct svga_shader_emitter_v10 *emit) 6241{ 6242 unsigned n = 0; 6243 6244 emit->common_immediate_pos[n++] = 6245 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); 6246 6247 emit->common_immediate_pos[n++] = 6248 alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f); 6249 6250 emit->common_immediate_pos[n++] = 6251 alloc_immediate_int4(emit, 0, 1, 0, -1); 6252 6253 if (emit->key.vs.attrib_puint_to_snorm) { 6254 emit->common_immediate_pos[n++] = 6255 alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 6256 } 6257 6258 if (emit->key.vs.attrib_puint_to_uscaled) { 6259 emit->common_immediate_pos[n++] = 6260 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); 6261 } 6262 6263 if (emit->key.vs.attrib_puint_to_sscaled) { 6264 emit->common_immediate_pos[n++] = 6265 alloc_immediate_int4(emit, 22, 12, 2, 0); 6266 6267 emit->common_immediate_pos[n++] = 6268 alloc_immediate_int4(emit, 22, 30, 0, 0); 6269 } 6270 6271 assert(n <= Elements(emit->common_immediate_pos)); 6272 emit->num_common_immediates = n; 6273} 6274 6275 6276/** 6277 * Emit any extra/helper declarations/code that we might need between 6278 * the declaration section and code section. 6279 */ 6280static boolean 6281emit_pre_helpers(struct svga_shader_emitter_v10 *emit) 6282{ 6283 /* Properties */ 6284 if (emit->unit == PIPE_SHADER_GEOMETRY) 6285 emit_property_instructions(emit); 6286 6287 /* Declare inputs */ 6288 if (!emit_input_declarations(emit)) 6289 return FALSE; 6290 6291 /* Declare outputs */ 6292 if (!emit_output_declarations(emit)) 6293 return FALSE; 6294 6295 /* Declare temporary registers */ 6296 emit_temporaries_declaration(emit); 6297 6298 /* Declare constant registers */ 6299 emit_constant_declaration(emit); 6300 6301 /* Declare samplers and resources */ 6302 emit_sampler_declarations(emit); 6303 emit_resource_declarations(emit); 6304 6305 /* Declare clip distance output registers */ 6306 if (emit->unit == PIPE_SHADER_VERTEX || 6307 emit->unit == PIPE_SHADER_GEOMETRY) { 6308 emit_clip_distance_declarations(emit); 6309 } 6310 6311 alloc_common_immediates(emit); 6312 6313 if (emit->unit == PIPE_SHADER_FRAGMENT && 6314 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6315 float alpha = emit->key.fs.alpha_ref; 6316 emit->fs.alpha_ref_index = 6317 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha); 6318 } 6319 6320 /* Now, emit the constant block containing all the immediates 6321 * declared by shader, as well as the extra ones seen above. 6322 */ 6323 emit_vgpu10_immediates_block(emit); 6324 6325 if (emit->unit == PIPE_SHADER_FRAGMENT) { 6326 emit_frontface_instructions(emit); 6327 emit_fragcoord_instructions(emit); 6328 } 6329 else if (emit->unit == PIPE_SHADER_VERTEX) { 6330 emit_vertex_attrib_instructions(emit); 6331 } 6332 6333 return TRUE; 6334} 6335 6336 6337/** 6338 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w 6339 * against the alpha reference value and discards the fragment if the 6340 * comparison fails. 6341 */ 6342static void 6343emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, 6344 unsigned fs_color_tmp_index) 6345{ 6346 /* compare output color's alpha to alpha ref and kill */ 6347 unsigned tmp = get_temp_index(emit); 6348 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6349 struct tgsi_full_src_register tmp_src_x = 6350 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 6351 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6352 struct tgsi_full_src_register color_src = 6353 make_src_temp_reg(fs_color_tmp_index); 6354 struct tgsi_full_src_register color_src_w = 6355 scalar_src(&color_src, TGSI_SWIZZLE_W); 6356 struct tgsi_full_src_register ref_src = 6357 make_src_immediate_reg(emit->fs.alpha_ref_index); 6358 struct tgsi_full_dst_register color_dst = 6359 make_dst_output_reg(emit->fs.color_out_index[0]); 6360 6361 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6362 6363 /* dst = src0 'alpha_func' src1 */ 6364 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst, 6365 &color_src_w, &ref_src); 6366 6367 /* DISCARD if dst.x == 0 */ 6368 begin_emit_instruction(emit); 6369 emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */ 6370 emit_src_register(emit, &tmp_src_x); 6371 end_emit_instruction(emit); 6372 6373 /* If we don't need to broadcast the color below or set fragments to 6374 * white, emit final color here. 6375 */ 6376 if (emit->key.fs.write_color0_to_n_cbufs <= 1 && 6377 !emit->key.fs.white_fragments) { 6378 /* MOV output.color, tempcolor */ 6379 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6380 &color_src, FALSE); /* XXX saturate? */ 6381 } 6382 6383 free_temp_indexes(emit); 6384} 6385 6386 6387/** 6388 * When we need to emit white for all fragments (for emulating XOR logicop 6389 * mode), this function copies white into the temporary color output register. 6390 */ 6391static void 6392emit_set_color_white(struct svga_shader_emitter_v10 *emit, 6393 unsigned fs_color_tmp_index) 6394{ 6395 struct tgsi_full_dst_register color_dst = 6396 make_dst_temp_reg(fs_color_tmp_index); 6397 struct tgsi_full_src_register white = 6398 make_immediate_reg_float(emit, 1.0f); 6399 6400 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE); 6401} 6402 6403 6404/** 6405 * Emit instructions for writing a single color output to multiple 6406 * color buffers. 6407 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or 6408 * when key.fs.white_fragments is true). 6409 * property is set and the number of render targets is greater than one. 6410 * \param fs_color_tmp_index index of the temp register that holds the 6411 * color to broadcast. 6412 */ 6413static void 6414emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, 6415 unsigned fs_color_tmp_index) 6416{ 6417 const unsigned n = emit->key.fs.write_color0_to_n_cbufs; 6418 unsigned i; 6419 struct tgsi_full_src_register color_src = 6420 make_src_temp_reg(fs_color_tmp_index); 6421 6422 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6423 6424 for (i = 0; i < n; i++) { 6425 unsigned output_reg = emit->fs.color_out_index[i]; 6426 struct tgsi_full_dst_register color_dst = 6427 make_dst_output_reg(output_reg); 6428 6429 /* Fill in this semantic here since we'll use it later in 6430 * emit_dst_register(). 6431 */ 6432 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR; 6433 6434 /* MOV output.color[i], tempcolor */ 6435 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6436 &color_src, FALSE); /* XXX saturate? */ 6437 } 6438} 6439 6440 6441/** 6442 * Emit extra helper code after the original shader code, but before the 6443 * last END/RET instruction. 6444 * For vertex shaders this means emitting the extra code to apply the 6445 * prescale scale/translation. 6446 */ 6447static boolean 6448emit_post_helpers(struct svga_shader_emitter_v10 *emit) 6449{ 6450 if (emit->unit == PIPE_SHADER_VERTEX) { 6451 emit_vertex_instructions(emit); 6452 } 6453 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 6454 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index; 6455 6456 /* We no longer want emit_dst_register() to substitute the 6457 * temporary fragment color register for the real color output. 6458 */ 6459 emit->fs.color_tmp_index = INVALID_INDEX; 6460 6461 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6462 emit_alpha_test_instructions(emit, fs_color_tmp_index); 6463 } 6464 if (emit->key.fs.white_fragments) { 6465 emit_set_color_white(emit, fs_color_tmp_index); 6466 } 6467 if (emit->key.fs.write_color0_to_n_cbufs > 1 || 6468 emit->key.fs.white_fragments) { 6469 emit_broadcast_color_instructions(emit, fs_color_tmp_index); 6470 } 6471 } 6472 6473 return TRUE; 6474} 6475 6476 6477/** 6478 * Translate the TGSI tokens into VGPU10 tokens. 6479 */ 6480static boolean 6481emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, 6482 const struct tgsi_token *tokens) 6483{ 6484 struct tgsi_parse_context parse; 6485 boolean ret = TRUE; 6486 boolean pre_helpers_emitted = FALSE; 6487 unsigned inst_number = 0; 6488 6489 tgsi_parse_init(&parse, tokens); 6490 6491 while (!tgsi_parse_end_of_tokens(&parse)) { 6492 tgsi_parse_token(&parse); 6493 6494 switch (parse.FullToken.Token.Type) { 6495 case TGSI_TOKEN_TYPE_IMMEDIATE: 6496 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate); 6497 if (!ret) 6498 goto done; 6499 break; 6500 6501 case TGSI_TOKEN_TYPE_DECLARATION: 6502 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration); 6503 if (!ret) 6504 goto done; 6505 break; 6506 6507 case TGSI_TOKEN_TYPE_INSTRUCTION: 6508 if (!pre_helpers_emitted) { 6509 ret = emit_pre_helpers(emit); 6510 if (!ret) 6511 goto done; 6512 pre_helpers_emitted = TRUE; 6513 } 6514 ret = emit_vgpu10_instruction(emit, inst_number++, 6515 &parse.FullToken.FullInstruction); 6516 if (!ret) 6517 goto done; 6518 break; 6519 6520 case TGSI_TOKEN_TYPE_PROPERTY: 6521 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty); 6522 if (!ret) 6523 goto done; 6524 break; 6525 6526 default: 6527 break; 6528 } 6529 } 6530 6531done: 6532 tgsi_parse_free(&parse); 6533 return ret; 6534} 6535 6536 6537/** 6538 * Emit the first VGPU10 shader tokens. 6539 */ 6540static boolean 6541emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) 6542{ 6543 VGPU10ProgramToken ptoken; 6544 6545 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ 6546 ptoken.majorVersion = 4; 6547 ptoken.minorVersion = 0; 6548 ptoken.programType = translate_shader_type(emit->unit); 6549 if (!emit_dword(emit, ptoken.value)) 6550 return FALSE; 6551 6552 /* Second token: total length of shader, in tokens. We can't fill this 6553 * in until we're all done. Emit zero for now. 6554 */ 6555 return emit_dword(emit, 0); 6556} 6557 6558 6559static boolean 6560emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) 6561{ 6562 VGPU10ProgramToken *tokens; 6563 6564 /* Replace the second token with total shader length */ 6565 tokens = (VGPU10ProgramToken *) emit->buf; 6566 tokens[1].value = emit_get_num_tokens(emit); 6567 6568 return TRUE; 6569} 6570 6571 6572/** 6573 * Modify the FS to read the BCOLORs and use the FACE register 6574 * to choose between the front/back colors. 6575 */ 6576static const struct tgsi_token * 6577transform_fs_twoside(const struct tgsi_token *tokens) 6578{ 6579 if (0) { 6580 debug_printf("Before tgsi_add_two_side ------------------\n"); 6581 tgsi_dump(tokens,0); 6582 } 6583 tokens = tgsi_add_two_side(tokens); 6584 if (0) { 6585 debug_printf("After tgsi_add_two_side ------------------\n"); 6586 tgsi_dump(tokens, 0); 6587 } 6588 return tokens; 6589} 6590 6591 6592/** 6593 * Modify the FS to do polygon stipple. 6594 */ 6595static const struct tgsi_token * 6596transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, 6597 const struct tgsi_token *tokens) 6598{ 6599 const struct tgsi_token *new_tokens; 6600 unsigned unit; 6601 6602 if (0) { 6603 debug_printf("Before pstipple ------------------\n"); 6604 tgsi_dump(tokens,0); 6605 } 6606 6607 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0); 6608 6609 emit->fs.pstipple_sampler_unit = unit; 6610 6611 /* Setup texture state for stipple */ 6612 emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D; 6613 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; 6614 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; 6615 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; 6616 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; 6617 6618 if (0) { 6619 debug_printf("After pstipple ------------------\n"); 6620 tgsi_dump(new_tokens, 0); 6621 } 6622 6623 return new_tokens; 6624} 6625 6626/** 6627 * Modify the FS to support anti-aliasing point. 6628 */ 6629static const struct tgsi_token * 6630transform_fs_aapoint(const struct tgsi_token *tokens, 6631 int aa_coord_index) 6632{ 6633 if (0) { 6634 debug_printf("Before tgsi_add_aa_point ------------------\n"); 6635 tgsi_dump(tokens,0); 6636 } 6637 tokens = tgsi_add_aa_point(tokens, aa_coord_index); 6638 if (0) { 6639 debug_printf("After tgsi_add_aa_point ------------------\n"); 6640 tgsi_dump(tokens, 0); 6641 } 6642 return tokens; 6643} 6644 6645/** 6646 * This is the main entrypoint for the TGSI -> VPGU10 translator. 6647 */ 6648struct svga_shader_variant * 6649svga_tgsi_vgpu10_translate(struct svga_context *svga, 6650 const struct svga_shader *shader, 6651 const struct svga_compile_key *key, 6652 unsigned unit) 6653{ 6654 struct svga_shader_variant *variant = NULL; 6655 struct svga_shader_emitter_v10 *emit; 6656 const struct tgsi_token *tokens = shader->tokens; 6657 struct svga_vertex_shader *vs = svga->curr.vs; 6658 struct svga_geometry_shader *gs = svga->curr.gs; 6659 6660 assert(unit == PIPE_SHADER_VERTEX || 6661 unit == PIPE_SHADER_GEOMETRY || 6662 unit == PIPE_SHADER_FRAGMENT); 6663 6664 /* These two flags cannot be used together */ 6665 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1); 6666 6667 /* 6668 * Setup the code emitter 6669 */ 6670 emit = alloc_emitter(); 6671 if (!emit) 6672 return NULL; 6673 6674 emit->unit = unit; 6675 emit->key = *key; 6676 6677 emit->vposition.need_prescale = (emit->key.vs.need_prescale || 6678 emit->key.gs.need_prescale); 6679 emit->vposition.tmp_index = INVALID_INDEX; 6680 emit->vposition.so_index = INVALID_INDEX; 6681 emit->vposition.out_index = INVALID_INDEX; 6682 6683 emit->fs.color_tmp_index = INVALID_INDEX; 6684 emit->fs.face_input_index = INVALID_INDEX; 6685 emit->fs.fragcoord_input_index = INVALID_INDEX; 6686 6687 emit->gs.prim_id_index = INVALID_INDEX; 6688 6689 emit->clip_dist_out_index = INVALID_INDEX; 6690 emit->clip_dist_tmp_index = INVALID_INDEX; 6691 emit->clip_dist_so_index = INVALID_INDEX; 6692 emit->clip_vertex_out_index = INVALID_INDEX; 6693 6694 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { 6695 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; 6696 } 6697 6698 if (unit == PIPE_SHADER_FRAGMENT) { 6699 if (key->fs.light_twoside) { 6700 tokens = transform_fs_twoside(tokens); 6701 } 6702 if (key->fs.pstipple) { 6703 const struct tgsi_token *new_tokens = 6704 transform_fs_pstipple(emit, tokens); 6705 if (tokens != shader->tokens) { 6706 /* free the two-sided shader tokens */ 6707 tgsi_free_tokens(tokens); 6708 } 6709 tokens = new_tokens; 6710 } 6711 if (key->fs.aa_point) { 6712 tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index); 6713 } 6714 } 6715 6716 if (SVGA_DEBUG & DEBUG_TGSI) { 6717 debug_printf("#####################################\n"); 6718 debug_printf("### TGSI Shader %u\n", shader->id); 6719 tgsi_dump(tokens, 0); 6720 } 6721 6722 /** 6723 * Rescan the header if the token string is different from the one 6724 * included in the shader; otherwise, the header info is already up-to-date 6725 */ 6726 if (tokens != shader->tokens) { 6727 tgsi_scan_shader(tokens, &emit->info); 6728 } else { 6729 emit->info = shader->info; 6730 } 6731 6732 emit->num_outputs = emit->info.num_outputs; 6733 6734 if (unit == PIPE_SHADER_FRAGMENT) { 6735 /* Compute FS input remapping to match the output from VS/GS */ 6736 if (gs) { 6737 svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage); 6738 } else { 6739 assert(vs); 6740 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6741 } 6742 } else if (unit == PIPE_SHADER_GEOMETRY) { 6743 assert(vs); 6744 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6745 } 6746 6747 determine_clipping_mode(emit); 6748 6749 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) { 6750 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) { 6751 /* if there is stream output declarations associated 6752 * with this shader or the shader writes to ClipDistance 6753 * then reserve extra registers for the non-adjusted vertex position 6754 * and the ClipDistance shadow copy 6755 */ 6756 emit->vposition.so_index = emit->num_outputs++; 6757 6758 if (emit->clip_mode == CLIP_DISTANCE) { 6759 emit->clip_dist_so_index = emit->num_outputs++; 6760 if (emit->info.num_written_clipdistance > 4) 6761 emit->num_outputs++; 6762 } 6763 } 6764 } 6765 6766 /* 6767 * Do actual shader translation. 6768 */ 6769 if (!emit_vgpu10_header(emit)) { 6770 debug_printf("svga: emit VGPU10 header failed\n"); 6771 goto cleanup; 6772 } 6773 6774 if (!emit_vgpu10_instructions(emit, tokens)) { 6775 debug_printf("svga: emit VGPU10 instructions failed\n"); 6776 goto cleanup; 6777 } 6778 6779 if (!emit_vgpu10_tail(emit)) { 6780 debug_printf("svga: emit VGPU10 tail failed\n"); 6781 goto cleanup; 6782 } 6783 6784 if (emit->register_overflow) { 6785 goto cleanup; 6786 } 6787 6788 /* 6789 * Create, initialize the 'variant' object. 6790 */ 6791 variant = svga_new_shader_variant(svga); 6792 if (!variant) 6793 goto cleanup; 6794 6795 variant->shader = shader; 6796 variant->nr_tokens = emit_get_num_tokens(emit); 6797 variant->tokens = (const unsigned *)emit->buf; 6798 emit->buf = NULL; /* buffer is no longer owed by emitter context */ 6799 memcpy(&variant->key, key, sizeof(*key)); 6800 variant->id = UTIL_BITMASK_INVALID_INDEX; 6801 6802 /* The extra constant starting offset starts with the number of 6803 * shader constants declared in the shader. 6804 */ 6805 variant->extra_const_start = emit->num_shader_consts[0]; 6806 if (key->gs.wide_point) { 6807 /** 6808 * The extra constant added in the transformed shader 6809 * for inverse viewport scale is to be supplied by the driver. 6810 * So the extra constant starting offset needs to be reduced by 1. 6811 */ 6812 assert(variant->extra_const_start > 0); 6813 variant->extra_const_start--; 6814 } 6815 6816 variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; 6817 6818 /* If there was exactly one write to a fragment shader output register 6819 * and it came from a constant buffer, we know all fragments will have 6820 * the same color (except for blending). 6821 */ 6822 variant->constant_color_output = 6823 emit->constant_color_output && emit->num_output_writes == 1; 6824 6825 /** keep track in the variant if flat interpolation is used 6826 * for any of the varyings. 6827 */ 6828 variant->uses_flat_interp = emit->uses_flat_interp; 6829 6830 if (tokens != shader->tokens) { 6831 tgsi_free_tokens(tokens); 6832 } 6833 6834cleanup: 6835 free_emitter(emit); 6836 6837 return variant; 6838} 6839