svga_tgsi_vgpu10.c revision e0184b3995fa308c125ae8e090d2bbdffd495b5f
1/********************************************************** 2 * Copyright 1998-2013 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26/** 27 * @file svga_tgsi_vgpu10.c 28 * 29 * TGSI -> VGPU10 shader translation. 30 * 31 * \author Mingcheng Chen 32 * \author Brian Paul 33 */ 34 35#include "pipe/p_compiler.h" 36#include "pipe/p_shader_tokens.h" 37#include "pipe/p_defines.h" 38#include "tgsi/tgsi_build.h" 39#include "tgsi/tgsi_dump.h" 40#include "tgsi/tgsi_info.h" 41#include "tgsi/tgsi_parse.h" 42#include "tgsi/tgsi_scan.h" 43#include "tgsi/tgsi_two_side.h" 44#include "tgsi/tgsi_aa_point.h" 45#include "tgsi/tgsi_util.h" 46#include "util/u_math.h" 47#include "util/u_memory.h" 48#include "util/u_bitmask.h" 49#include "util/u_debug.h" 50#include "util/u_pstipple.h" 51 52#include "svga_context.h" 53#include "svga_debug.h" 54#include "svga_link.h" 55#include "svga_shader.h" 56#include "svga_tgsi.h" 57 58#include "VGPU10ShaderTokens.h" 59 60 61#define INVALID_INDEX 99999 62#define MAX_INTERNAL_TEMPS 3 63#define MAX_SYSTEM_VALUES 4 64#define MAX_IMMEDIATE_COUNT \ 65 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) 66#define MAX_TEMP_ARRAYS 64 /* Enough? */ 67 68 69/** 70 * Clipping is complicated. There's four different cases which we 71 * handle during VS/GS shader translation: 72 */ 73enum clipping_mode 74{ 75 CLIP_NONE, /**< No clipping enabled */ 76 CLIP_LEGACY, /**< The shader has no clipping declarations or code but 77 * one or more user-defined clip planes are enabled. We 78 * generate extra code to emit clip distances. 79 */ 80 CLIP_DISTANCE, /**< The shader already declares clip distance output 81 * registers and has code to write to them. 82 */ 83 CLIP_VERTEX /**< The shader declares a clip vertex output register and 84 * has code that writes to the register. We convert the 85 * clipvertex position into one or more clip distances. 86 */ 87}; 88 89 90struct svga_shader_emitter_v10 91{ 92 /* The token output buffer */ 93 unsigned size; 94 char *buf; 95 char *ptr; 96 97 /* Information about the shader and state (does not change) */ 98 struct svga_compile_key key; 99 struct tgsi_shader_info info; 100 unsigned unit; 101 102 unsigned inst_start_token; 103 boolean discard_instruction; /**< throw away current instruction? */ 104 105 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; 106 unsigned num_immediates; /**< Number of immediates emitted */ 107 unsigned common_immediate_pos[8]; /**< literals for common immediates */ 108 unsigned num_common_immediates; 109 boolean immediates_emitted; 110 111 unsigned num_outputs; /**< include any extra outputs */ 112 /** The first extra output is reserved for 113 * non-adjusted vertex position for 114 * stream output purpose 115 */ 116 117 /* Temporary Registers */ 118 unsigned num_shader_temps; /**< num of temps used by original shader */ 119 unsigned internal_temp_count; /**< currently allocated internal temps */ 120 struct { 121 unsigned start, size; 122 } temp_arrays[MAX_TEMP_ARRAYS]; 123 unsigned num_temp_arrays; 124 125 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ 126 struct { 127 unsigned arrayId, index; 128 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ 129 130 /** Number of constants used by original shader for each constant buffer. 131 * The size should probably always match with that of svga_state.constbufs. 132 */ 133 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; 134 135 /* Samplers */ 136 unsigned num_samplers; 137 ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */ 138 ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */ 139 140 /* Address regs (really implemented with temps) */ 141 unsigned num_address_regs; 142 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; 143 144 /* Output register usage masks */ 145 ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; 146 147 /* To map TGSI system value index to VGPU shader input indexes */ 148 ubyte system_value_indexes[MAX_SYSTEM_VALUES]; 149 150 struct { 151 /* vertex position scale/translation */ 152 unsigned out_index; /**< the real position output reg */ 153 unsigned tmp_index; /**< the fake/temp position output reg */ 154 unsigned so_index; /**< the non-adjusted position output reg */ 155 unsigned prescale_scale_index, prescale_trans_index; 156 boolean need_prescale; 157 } vposition; 158 159 /* For vertex shaders only */ 160 struct { 161 /* viewport constant */ 162 unsigned viewport_index; 163 164 /* temp index of adjusted vertex attributes */ 165 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS]; 166 } vs; 167 168 /* For fragment shaders only */ 169 struct { 170 /* apha test */ 171 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */ 172 unsigned color_tmp_index; /**< fake/temp color output reg */ 173 unsigned alpha_ref_index; /**< immediate constant for alpha ref */ 174 175 /* front-face */ 176 unsigned face_input_index; /**< real fragment shader face reg (bool) */ 177 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ 178 179 unsigned pstipple_sampler_unit; 180 181 unsigned fragcoord_input_index; /**< real fragment position input reg */ 182 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ 183 } fs; 184 185 /* For geometry shaders only */ 186 struct { 187 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */ 188 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */ 189 unsigned input_size; /**< size of input arrays */ 190 unsigned prim_id_index; /**< primitive id register index */ 191 unsigned max_out_vertices; /**< maximum number of output vertices */ 192 } gs; 193 194 /* For vertex or geometry shaders */ 195 enum clipping_mode clip_mode; 196 unsigned clip_dist_out_index; /**< clip distance output register index */ 197 unsigned clip_dist_tmp_index; /**< clip distance temporary register */ 198 unsigned clip_dist_so_index; /**< clip distance shadow copy */ 199 200 /** Index of temporary holding the clipvertex coordinate */ 201 unsigned clip_vertex_out_index; /**< clip vertex output register index */ 202 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */ 203 204 /* user clip plane constant slot indexes */ 205 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; 206 207 unsigned num_output_writes; 208 boolean constant_color_output; 209 210 boolean uses_flat_interp; 211 212 /* For all shaders: const reg index for RECT coord scaling */ 213 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; 214 215 /* For all shaders: const reg index for texture buffer size */ 216 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; 217 218 /* VS/GS/FS Linkage info */ 219 struct shader_linkage linkage; 220 221 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ 222}; 223 224 225static boolean 226emit_post_helpers(struct svga_shader_emitter_v10 *emit); 227 228static boolean 229emit_vertex(struct svga_shader_emitter_v10 *emit, 230 const struct tgsi_full_instruction *inst); 231 232static char err_buf[128]; 233 234static boolean 235expand(struct svga_shader_emitter_v10 *emit) 236{ 237 char *new_buf; 238 unsigned newsize = emit->size * 2; 239 240 if (emit->buf != err_buf) 241 new_buf = REALLOC(emit->buf, emit->size, newsize); 242 else 243 new_buf = NULL; 244 245 if (!new_buf) { 246 emit->ptr = err_buf; 247 emit->buf = err_buf; 248 emit->size = sizeof(err_buf); 249 return FALSE; 250 } 251 252 emit->size = newsize; 253 emit->ptr = new_buf + (emit->ptr - emit->buf); 254 emit->buf = new_buf; 255 return TRUE; 256} 257 258/** 259 * Create and initialize a new svga_shader_emitter_v10 object. 260 */ 261static struct svga_shader_emitter_v10 * 262alloc_emitter(void) 263{ 264 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit)); 265 266 if (!emit) 267 return NULL; 268 269 /* to initialize the output buffer */ 270 emit->size = 512; 271 if (!expand(emit)) { 272 FREE(emit); 273 return NULL; 274 } 275 return emit; 276} 277 278/** 279 * Free an svga_shader_emitter_v10 object. 280 */ 281static void 282free_emitter(struct svga_shader_emitter_v10 *emit) 283{ 284 assert(emit); 285 FREE(emit->buf); /* will be NULL if translation succeeded */ 286 FREE(emit); 287} 288 289static inline boolean 290reserve(struct svga_shader_emitter_v10 *emit, 291 unsigned nr_dwords) 292{ 293 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) { 294 if (!expand(emit)) 295 return FALSE; 296 } 297 298 return TRUE; 299} 300 301static boolean 302emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword) 303{ 304 if (!reserve(emit, 1)) 305 return FALSE; 306 307 *(uint32 *)emit->ptr = dword; 308 emit->ptr += sizeof dword; 309 return TRUE; 310} 311 312static boolean 313emit_dwords(struct svga_shader_emitter_v10 *emit, 314 const uint32 *dwords, 315 unsigned nr) 316{ 317 if (!reserve(emit, nr)) 318 return FALSE; 319 320 memcpy(emit->ptr, dwords, nr * sizeof *dwords); 321 emit->ptr += nr * sizeof *dwords; 322 return TRUE; 323} 324 325/** Return the number of tokens in the emitter's buffer */ 326static unsigned 327emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit) 328{ 329 return (emit->ptr - emit->buf) / sizeof(unsigned); 330} 331 332 333/** 334 * Check for register overflow. If we overflow we'll set an 335 * error flag. This function can be called for register declarations 336 * or use as src/dst instruction operands. 337 * \param type register type. One of VGPU10_OPERAND_TYPE_x 338 or VGPU10_OPCODE_DCL_x 339 * \param index the register index 340 */ 341static void 342check_register_index(struct svga_shader_emitter_v10 *emit, 343 unsigned operandType, unsigned index) 344{ 345 bool overflow_before = emit->register_overflow; 346 347 switch (operandType) { 348 case VGPU10_OPERAND_TYPE_TEMP: 349 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP: 350 case VGPU10_OPCODE_DCL_TEMPS: 351 if (index >= VGPU10_MAX_TEMPS) { 352 emit->register_overflow = TRUE; 353 } 354 break; 355 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER: 356 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER: 357 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 358 emit->register_overflow = TRUE; 359 } 360 break; 361 case VGPU10_OPERAND_TYPE_INPUT: 362 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID: 363 case VGPU10_OPCODE_DCL_INPUT: 364 case VGPU10_OPCODE_DCL_INPUT_SGV: 365 case VGPU10_OPCODE_DCL_INPUT_SIV: 366 case VGPU10_OPCODE_DCL_INPUT_PS: 367 case VGPU10_OPCODE_DCL_INPUT_PS_SGV: 368 case VGPU10_OPCODE_DCL_INPUT_PS_SIV: 369 if ((emit->unit == PIPE_SHADER_VERTEX && 370 index >= VGPU10_MAX_VS_INPUTS) || 371 (emit->unit == PIPE_SHADER_GEOMETRY && 372 index >= VGPU10_MAX_GS_INPUTS) || 373 (emit->unit == PIPE_SHADER_FRAGMENT && 374 index >= VGPU10_MAX_FS_INPUTS)) { 375 emit->register_overflow = TRUE; 376 } 377 break; 378 case VGPU10_OPERAND_TYPE_OUTPUT: 379 case VGPU10_OPCODE_DCL_OUTPUT: 380 case VGPU10_OPCODE_DCL_OUTPUT_SGV: 381 case VGPU10_OPCODE_DCL_OUTPUT_SIV: 382 if ((emit->unit == PIPE_SHADER_VERTEX && 383 index >= VGPU10_MAX_VS_OUTPUTS) || 384 (emit->unit == PIPE_SHADER_GEOMETRY && 385 index >= VGPU10_MAX_GS_OUTPUTS) || 386 (emit->unit == PIPE_SHADER_FRAGMENT && 387 index >= VGPU10_MAX_FS_OUTPUTS)) { 388 emit->register_overflow = TRUE; 389 } 390 break; 391 case VGPU10_OPERAND_TYPE_SAMPLER: 392 case VGPU10_OPCODE_DCL_SAMPLER: 393 if (index >= VGPU10_MAX_SAMPLERS) { 394 emit->register_overflow = TRUE; 395 } 396 break; 397 case VGPU10_OPERAND_TYPE_RESOURCE: 398 case VGPU10_OPCODE_DCL_RESOURCE: 399 if (index >= VGPU10_MAX_RESOURCES) { 400 emit->register_overflow = TRUE; 401 } 402 break; 403 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: 404 if (index >= MAX_IMMEDIATE_COUNT) { 405 emit->register_overflow = TRUE; 406 } 407 break; 408 default: 409 assert(0); 410 ; /* nothing */ 411 } 412 413 if (emit->register_overflow && !overflow_before) { 414 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n", 415 operandType, index); 416 } 417} 418 419 420/** 421 * Examine misc state to determine the clipping mode. 422 */ 423static void 424determine_clipping_mode(struct svga_shader_emitter_v10 *emit) 425{ 426 if (emit->info.num_written_clipdistance > 0) { 427 emit->clip_mode = CLIP_DISTANCE; 428 } 429 else if (emit->info.writes_clipvertex) { 430 emit->clip_mode = CLIP_VERTEX; 431 } 432 else if (emit->key.clip_plane_enable) { 433 emit->clip_mode = CLIP_LEGACY; 434 } 435 else { 436 emit->clip_mode = CLIP_NONE; 437 } 438} 439 440 441/** 442 * For clip distance register declarations and clip distance register 443 * writes we need to mask the declaration usage or instruction writemask 444 * (respectively) against the set of the really-enabled clipping planes. 445 * 446 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables 447 * has a VS that writes to all 8 clip distance registers, but the plane enable 448 * flags are a subset of that. 449 * 450 * This function is used to apply the plane enable flags to the register 451 * declaration or instruction writemask. 452 * 453 * \param writemask the declaration usage mask or instruction writemask 454 * \param clip_reg_index which clip plane register is being declared/written. 455 * The legal values are 0 and 1 (two clip planes per 456 * register, for a total of 8 clip planes) 457 */ 458static unsigned 459apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit, 460 unsigned writemask, unsigned clip_reg_index) 461{ 462 unsigned shift; 463 464 assert(clip_reg_index < 2); 465 466 /* four clip planes per clip register: */ 467 shift = clip_reg_index * 4; 468 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf); 469 470 return writemask; 471} 472 473 474/** 475 * Translate gallium shader type into VGPU10 type. 476 */ 477static VGPU10_PROGRAM_TYPE 478translate_shader_type(unsigned type) 479{ 480 switch (type) { 481 case PIPE_SHADER_VERTEX: 482 return VGPU10_VERTEX_SHADER; 483 case PIPE_SHADER_GEOMETRY: 484 return VGPU10_GEOMETRY_SHADER; 485 case PIPE_SHADER_FRAGMENT: 486 return VGPU10_PIXEL_SHADER; 487 default: 488 assert(!"Unexpected shader type"); 489 return VGPU10_VERTEX_SHADER; 490 } 491} 492 493 494/** 495 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x 496 * Note: we only need to translate the opcodes for "simple" instructions, 497 * as seen below. All other opcodes are handled/translated specially. 498 */ 499static VGPU10_OPCODE_TYPE 500translate_opcode(unsigned opcode) 501{ 502 switch (opcode) { 503 case TGSI_OPCODE_MOV: 504 return VGPU10_OPCODE_MOV; 505 case TGSI_OPCODE_MUL: 506 return VGPU10_OPCODE_MUL; 507 case TGSI_OPCODE_ADD: 508 return VGPU10_OPCODE_ADD; 509 case TGSI_OPCODE_DP3: 510 return VGPU10_OPCODE_DP3; 511 case TGSI_OPCODE_DP4: 512 return VGPU10_OPCODE_DP4; 513 case TGSI_OPCODE_MIN: 514 return VGPU10_OPCODE_MIN; 515 case TGSI_OPCODE_MAX: 516 return VGPU10_OPCODE_MAX; 517 case TGSI_OPCODE_MAD: 518 return VGPU10_OPCODE_MAD; 519 case TGSI_OPCODE_SQRT: 520 return VGPU10_OPCODE_SQRT; 521 case TGSI_OPCODE_FRC: 522 return VGPU10_OPCODE_FRC; 523 case TGSI_OPCODE_FLR: 524 return VGPU10_OPCODE_ROUND_NI; 525 case TGSI_OPCODE_FSEQ: 526 return VGPU10_OPCODE_EQ; 527 case TGSI_OPCODE_FSGE: 528 return VGPU10_OPCODE_GE; 529 case TGSI_OPCODE_FSNE: 530 return VGPU10_OPCODE_NE; 531 case TGSI_OPCODE_DDX: 532 return VGPU10_OPCODE_DERIV_RTX; 533 case TGSI_OPCODE_DDY: 534 return VGPU10_OPCODE_DERIV_RTY; 535 case TGSI_OPCODE_RET: 536 return VGPU10_OPCODE_RET; 537 case TGSI_OPCODE_DIV: 538 return VGPU10_OPCODE_DIV; 539 case TGSI_OPCODE_IDIV: 540 return VGPU10_OPCODE_IDIV; 541 case TGSI_OPCODE_DP2: 542 return VGPU10_OPCODE_DP2; 543 case TGSI_OPCODE_BRK: 544 return VGPU10_OPCODE_BREAK; 545 case TGSI_OPCODE_IF: 546 return VGPU10_OPCODE_IF; 547 case TGSI_OPCODE_ELSE: 548 return VGPU10_OPCODE_ELSE; 549 case TGSI_OPCODE_ENDIF: 550 return VGPU10_OPCODE_ENDIF; 551 case TGSI_OPCODE_CEIL: 552 return VGPU10_OPCODE_ROUND_PI; 553 case TGSI_OPCODE_I2F: 554 return VGPU10_OPCODE_ITOF; 555 case TGSI_OPCODE_NOT: 556 return VGPU10_OPCODE_NOT; 557 case TGSI_OPCODE_TRUNC: 558 return VGPU10_OPCODE_ROUND_Z; 559 case TGSI_OPCODE_SHL: 560 return VGPU10_OPCODE_ISHL; 561 case TGSI_OPCODE_AND: 562 return VGPU10_OPCODE_AND; 563 case TGSI_OPCODE_OR: 564 return VGPU10_OPCODE_OR; 565 case TGSI_OPCODE_XOR: 566 return VGPU10_OPCODE_XOR; 567 case TGSI_OPCODE_CONT: 568 return VGPU10_OPCODE_CONTINUE; 569 case TGSI_OPCODE_EMIT: 570 return VGPU10_OPCODE_EMIT; 571 case TGSI_OPCODE_ENDPRIM: 572 return VGPU10_OPCODE_CUT; 573 case TGSI_OPCODE_BGNLOOP: 574 return VGPU10_OPCODE_LOOP; 575 case TGSI_OPCODE_ENDLOOP: 576 return VGPU10_OPCODE_ENDLOOP; 577 case TGSI_OPCODE_ENDSUB: 578 return VGPU10_OPCODE_RET; 579 case TGSI_OPCODE_NOP: 580 return VGPU10_OPCODE_NOP; 581 case TGSI_OPCODE_BREAKC: 582 return VGPU10_OPCODE_BREAKC; 583 case TGSI_OPCODE_END: 584 return VGPU10_OPCODE_RET; 585 case TGSI_OPCODE_F2I: 586 return VGPU10_OPCODE_FTOI; 587 case TGSI_OPCODE_IMAX: 588 return VGPU10_OPCODE_IMAX; 589 case TGSI_OPCODE_IMIN: 590 return VGPU10_OPCODE_IMIN; 591 case TGSI_OPCODE_UDIV: 592 case TGSI_OPCODE_UMOD: 593 case TGSI_OPCODE_MOD: 594 return VGPU10_OPCODE_UDIV; 595 case TGSI_OPCODE_IMUL_HI: 596 return VGPU10_OPCODE_IMUL; 597 case TGSI_OPCODE_INEG: 598 return VGPU10_OPCODE_INEG; 599 case TGSI_OPCODE_ISHR: 600 return VGPU10_OPCODE_ISHR; 601 case TGSI_OPCODE_ISGE: 602 return VGPU10_OPCODE_IGE; 603 case TGSI_OPCODE_ISLT: 604 return VGPU10_OPCODE_ILT; 605 case TGSI_OPCODE_F2U: 606 return VGPU10_OPCODE_FTOU; 607 case TGSI_OPCODE_UADD: 608 return VGPU10_OPCODE_IADD; 609 case TGSI_OPCODE_U2F: 610 return VGPU10_OPCODE_UTOF; 611 case TGSI_OPCODE_UCMP: 612 return VGPU10_OPCODE_MOVC; 613 case TGSI_OPCODE_UMAD: 614 return VGPU10_OPCODE_UMAD; 615 case TGSI_OPCODE_UMAX: 616 return VGPU10_OPCODE_UMAX; 617 case TGSI_OPCODE_UMIN: 618 return VGPU10_OPCODE_UMIN; 619 case TGSI_OPCODE_UMUL: 620 case TGSI_OPCODE_UMUL_HI: 621 return VGPU10_OPCODE_UMUL; 622 case TGSI_OPCODE_USEQ: 623 return VGPU10_OPCODE_IEQ; 624 case TGSI_OPCODE_USGE: 625 return VGPU10_OPCODE_UGE; 626 case TGSI_OPCODE_USHR: 627 return VGPU10_OPCODE_USHR; 628 case TGSI_OPCODE_USLT: 629 return VGPU10_OPCODE_ULT; 630 case TGSI_OPCODE_USNE: 631 return VGPU10_OPCODE_INE; 632 case TGSI_OPCODE_SWITCH: 633 return VGPU10_OPCODE_SWITCH; 634 case TGSI_OPCODE_CASE: 635 return VGPU10_OPCODE_CASE; 636 case TGSI_OPCODE_DEFAULT: 637 return VGPU10_OPCODE_DEFAULT; 638 case TGSI_OPCODE_ENDSWITCH: 639 return VGPU10_OPCODE_ENDSWITCH; 640 case TGSI_OPCODE_FSLT: 641 return VGPU10_OPCODE_LT; 642 case TGSI_OPCODE_ROUND: 643 return VGPU10_OPCODE_ROUND_NE; 644 default: 645 assert(!"Unexpected TGSI opcode in translate_opcode()"); 646 return VGPU10_OPCODE_NOP; 647 } 648} 649 650 651/** 652 * Translate a TGSI register file type into a VGPU10 operand type. 653 * \param array is the TGSI_FILE_TEMPORARY register an array? 654 */ 655static VGPU10_OPERAND_TYPE 656translate_register_file(enum tgsi_file_type file, boolean array) 657{ 658 switch (file) { 659 case TGSI_FILE_CONSTANT: 660 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 661 case TGSI_FILE_INPUT: 662 return VGPU10_OPERAND_TYPE_INPUT; 663 case TGSI_FILE_OUTPUT: 664 return VGPU10_OPERAND_TYPE_OUTPUT; 665 case TGSI_FILE_TEMPORARY: 666 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP 667 : VGPU10_OPERAND_TYPE_TEMP; 668 case TGSI_FILE_IMMEDIATE: 669 /* all immediates are 32-bit values at this time so 670 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time. 671 */ 672 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER; 673 case TGSI_FILE_SAMPLER: 674 return VGPU10_OPERAND_TYPE_SAMPLER; 675 case TGSI_FILE_SYSTEM_VALUE: 676 return VGPU10_OPERAND_TYPE_INPUT; 677 678 /* XXX TODO more cases to finish */ 679 680 default: 681 assert(!"Bad tgsi register file!"); 682 return VGPU10_OPERAND_TYPE_NULL; 683 } 684} 685 686 687/** 688 * Emit a null dst register 689 */ 690static void 691emit_null_dst_register(struct svga_shader_emitter_v10 *emit) 692{ 693 VGPU10OperandToken0 operand; 694 695 operand.value = 0; 696 operand.operandType = VGPU10_OPERAND_TYPE_NULL; 697 operand.numComponents = VGPU10_OPERAND_0_COMPONENT; 698 699 emit_dword(emit, operand.value); 700} 701 702 703/** 704 * If the given register is a temporary, return the array ID. 705 * Else return zero. 706 */ 707static unsigned 708get_temp_array_id(const struct svga_shader_emitter_v10 *emit, 709 unsigned file, unsigned index) 710{ 711 if (file == TGSI_FILE_TEMPORARY) { 712 return emit->temp_map[index].arrayId; 713 } 714 else { 715 return 0; 716 } 717} 718 719 720/** 721 * If the given register is a temporary, convert the index from a TGSI 722 * TEMPORARY index to a VGPU10 temp index. 723 */ 724static unsigned 725remap_temp_index(const struct svga_shader_emitter_v10 *emit, 726 unsigned file, unsigned index) 727{ 728 if (file == TGSI_FILE_TEMPORARY) { 729 return emit->temp_map[index].index; 730 } 731 else { 732 return index; 733 } 734} 735 736 737/** 738 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc). 739 * Note: the operandType field must already be initialized. 740 */ 741static VGPU10OperandToken0 742setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, 743 VGPU10OperandToken0 operand0, 744 unsigned file, 745 boolean indirect, boolean index2D, 746 unsigned tempArrayID) 747{ 748 unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_0D; 749 750 /* 751 * Compute index dimensions 752 */ 753 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 || 754 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 755 /* there's no swizzle for in-line immediates */ 756 indexDim = VGPU10_OPERAND_INDEX_0D; 757 assert(operand0.selectionMode == 0); 758 } 759 else { 760 if (index2D || 761 tempArrayID > 0 || 762 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 763 indexDim = VGPU10_OPERAND_INDEX_2D; 764 } 765 else { 766 indexDim = VGPU10_OPERAND_INDEX_1D; 767 } 768 } 769 770 /* 771 * Compute index representations (immediate, relative, etc). 772 */ 773 if (tempArrayID > 0) { 774 assert(file == TGSI_FILE_TEMPORARY); 775 /* First index is the array ID, second index is the array element */ 776 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 777 if (indirect) { 778 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 779 } 780 else { 781 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 782 } 783 } 784 else if (indirect) { 785 if (file == TGSI_FILE_CONSTANT) { 786 /* index[0] indicates which constant buffer while index[1] indicates 787 * the position in the constant buffer. 788 */ 789 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 790 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 791 } 792 else { 793 /* All other register files are 1-dimensional */ 794 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 795 } 796 } 797 else { 798 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 799 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 800 } 801 802 operand0.indexDimension = indexDim; 803 operand0.index0Representation = index0Rep; 804 operand0.index1Representation = index1Rep; 805 806 return operand0; 807} 808 809 810/** 811 * Emit the operand for expressing an address register for indirect indexing. 812 * Note that the address register is really just a temp register. 813 * \param addr_reg_index which address register to use 814 */ 815static void 816emit_indirect_register(struct svga_shader_emitter_v10 *emit, 817 unsigned addr_reg_index) 818{ 819 unsigned tmp_reg_index; 820 VGPU10OperandToken0 operand0; 821 822 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS); 823 824 tmp_reg_index = emit->address_reg_index[addr_reg_index]; 825 826 /* operand0 is a simple temporary register, selecting one component */ 827 operand0.value = 0; 828 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP; 829 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 830 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 831 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 832 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 833 operand0.swizzleX = 0; 834 operand0.swizzleY = 1; 835 operand0.swizzleZ = 2; 836 operand0.swizzleW = 3; 837 838 emit_dword(emit, operand0.value); 839 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index)); 840} 841 842 843/** 844 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens. 845 * \param emit the emitter context 846 * \param reg the TGSI dst register to translate 847 */ 848static void 849emit_dst_register(struct svga_shader_emitter_v10 *emit, 850 const struct tgsi_full_dst_register *reg) 851{ 852 unsigned file = reg->Register.File; 853 unsigned index = reg->Register.Index; 854 const unsigned sem_name = emit->info.output_semantic_name[index]; 855 const unsigned sem_index = emit->info.output_semantic_index[index]; 856 unsigned writemask = reg->Register.WriteMask; 857 const unsigned indirect = reg->Register.Indirect; 858 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 859 const unsigned index2d = reg->Register.Dimension; 860 VGPU10OperandToken0 operand0; 861 862 if (file == TGSI_FILE_OUTPUT) { 863 if (emit->unit == PIPE_SHADER_VERTEX || 864 emit->unit == PIPE_SHADER_GEOMETRY) { 865 if (index == emit->vposition.out_index && 866 emit->vposition.tmp_index != INVALID_INDEX) { 867 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the 868 * vertex position result in a temporary so that we can modify 869 * it in the post_helper() code. 870 */ 871 file = TGSI_FILE_TEMPORARY; 872 index = emit->vposition.tmp_index; 873 } 874 else if (sem_name == TGSI_SEMANTIC_CLIPDIST && 875 emit->clip_dist_tmp_index != INVALID_INDEX) { 876 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. 877 * We store the clip distance in a temporary first, then 878 * we'll copy it to the shadow copy and to CLIPDIST with the 879 * enabled planes mask in emit_clip_distance_instructions(). 880 */ 881 file = TGSI_FILE_TEMPORARY; 882 index = emit->clip_dist_tmp_index + sem_index; 883 } 884 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && 885 emit->clip_vertex_tmp_index != INVALID_INDEX) { 886 /* replace the CLIPVERTEX output register with a temporary */ 887 assert(emit->clip_mode == CLIP_VERTEX); 888 assert(sem_index == 0); 889 file = TGSI_FILE_TEMPORARY; 890 index = emit->clip_vertex_tmp_index; 891 } 892 } 893 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 894 if (sem_name == TGSI_SEMANTIC_POSITION) { 895 /* Fragment depth output register */ 896 operand0.value = 0; 897 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 898 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 899 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 900 emit_dword(emit, operand0.value); 901 return; 902 } 903 else if (index == emit->fs.color_out_index[0] && 904 emit->fs.color_tmp_index != INVALID_INDEX) { 905 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the 906 * fragment color result in a temporary so that we can read it 907 * it in the post_helper() code. 908 */ 909 file = TGSI_FILE_TEMPORARY; 910 index = emit->fs.color_tmp_index; 911 } 912 else { 913 /* Typically, for fragment shaders, the output register index 914 * matches the color semantic index. But not when we write to 915 * the fragment depth register. In that case, OUT[0] will be 916 * fragdepth and OUT[1] will be the 0th color output. We need 917 * to use the semantic index for color outputs. 918 */ 919 assert(sem_name == TGSI_SEMANTIC_COLOR); 920 index = emit->info.output_semantic_index[index]; 921 922 emit->num_output_writes++; 923 } 924 } 925 } 926 927 /* init operand tokens to all zero */ 928 operand0.value = 0; 929 930 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 931 932 /* the operand has a writemask */ 933 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 934 935 /* Which of the four dest components to write to. Note that we can use a 936 * simple assignment here since TGSI writemasks match VGPU10 writemasks. 937 */ 938 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X); 939 operand0.mask = writemask; 940 941 /* translate TGSI register file type to VGPU10 operand type */ 942 operand0.operandType = translate_register_file(file, tempArrayId > 0); 943 944 check_register_index(emit, operand0.operandType, index); 945 946 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 947 index2d, tempArrayId); 948 949 /* Emit tokens */ 950 emit_dword(emit, operand0.value); 951 if (tempArrayId > 0) { 952 emit_dword(emit, tempArrayId); 953 } 954 955 emit_dword(emit, remap_temp_index(emit, file, index)); 956 957 if (indirect) { 958 emit_indirect_register(emit, reg->Indirect.Index); 959 } 960} 961 962 963/** 964 * Translate a src register of a TGSI instruction and emit VGPU10 tokens. 965 */ 966static void 967emit_src_register(struct svga_shader_emitter_v10 *emit, 968 const struct tgsi_full_src_register *reg) 969{ 970 unsigned file = reg->Register.File; 971 unsigned index = reg->Register.Index; 972 const unsigned indirect = reg->Register.Indirect; 973 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 974 const unsigned index2d = reg->Register.Dimension; 975 const unsigned swizzleX = reg->Register.SwizzleX; 976 const unsigned swizzleY = reg->Register.SwizzleY; 977 const unsigned swizzleZ = reg->Register.SwizzleZ; 978 const unsigned swizzleW = reg->Register.SwizzleW; 979 const unsigned absolute = reg->Register.Absolute; 980 const unsigned negate = reg->Register.Negate; 981 bool is_prim_id = FALSE; 982 983 VGPU10OperandToken0 operand0; 984 VGPU10OperandToken1 operand1; 985 986 if (emit->unit == PIPE_SHADER_FRAGMENT && 987 file == TGSI_FILE_INPUT) { 988 if (index == emit->fs.face_input_index) { 989 /* Replace INPUT[FACE] with TEMP[FACE] */ 990 file = TGSI_FILE_TEMPORARY; 991 index = emit->fs.face_tmp_index; 992 } 993 else if (index == emit->fs.fragcoord_input_index) { 994 /* Replace INPUT[POSITION] with TEMP[POSITION] */ 995 file = TGSI_FILE_TEMPORARY; 996 index = emit->fs.fragcoord_tmp_index; 997 } 998 else { 999 /* We remap fragment shader inputs to that FS input indexes 1000 * match up with VS/GS output indexes. 1001 */ 1002 index = emit->linkage.input_map[index]; 1003 } 1004 } 1005 else if (emit->unit == PIPE_SHADER_GEOMETRY && 1006 file == TGSI_FILE_INPUT) { 1007 is_prim_id = (index == emit->gs.prim_id_index); 1008 index = emit->linkage.input_map[index]; 1009 } 1010 else if (emit->unit == PIPE_SHADER_VERTEX) { 1011 if (file == TGSI_FILE_INPUT) { 1012 /* if input is adjusted... */ 1013 if ((emit->key.vs.adjust_attrib_w_1 | 1014 emit->key.vs.adjust_attrib_itof | 1015 emit->key.vs.adjust_attrib_utof | 1016 emit->key.vs.attrib_is_bgra | 1017 emit->key.vs.attrib_puint_to_snorm | 1018 emit->key.vs.attrib_puint_to_uscaled | 1019 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) { 1020 file = TGSI_FILE_TEMPORARY; 1021 index = emit->vs.adjusted_input[index]; 1022 } 1023 } 1024 else if (file == TGSI_FILE_SYSTEM_VALUE) { 1025 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1026 index = emit->system_value_indexes[index]; 1027 } 1028 } 1029 1030 operand0.value = operand1.value = 0; 1031 1032 if (is_prim_id) { 1033 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 1034 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 1035 } 1036 else { 1037 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1038 operand0.operandType = translate_register_file(file, tempArrayId > 0); 1039 } 1040 1041 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 1042 index2d, tempArrayId); 1043 1044 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 && 1045 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 1046 /* there's no swizzle for in-line immediates */ 1047 if (swizzleX == swizzleY && 1048 swizzleX == swizzleZ && 1049 swizzleX == swizzleW) { 1050 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1051 } 1052 else { 1053 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1054 } 1055 1056 operand0.swizzleX = swizzleX; 1057 operand0.swizzleY = swizzleY; 1058 operand0.swizzleZ = swizzleZ; 1059 operand0.swizzleW = swizzleW; 1060 1061 if (absolute || negate) { 1062 operand0.extended = 1; 1063 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER; 1064 if (absolute && !negate) 1065 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS; 1066 if (!absolute && negate) 1067 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG; 1068 if (absolute && negate) 1069 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG; 1070 } 1071 } 1072 1073 /* Emit the operand tokens */ 1074 emit_dword(emit, operand0.value); 1075 if (operand0.extended) 1076 emit_dword(emit, operand1.value); 1077 1078 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) { 1079 /* Emit the four float/int in-line immediate values */ 1080 unsigned *c; 1081 assert(index < ARRAY_SIZE(emit->immediates)); 1082 assert(file == TGSI_FILE_IMMEDIATE); 1083 assert(swizzleX < 4); 1084 assert(swizzleY < 4); 1085 assert(swizzleZ < 4); 1086 assert(swizzleW < 4); 1087 c = (unsigned *) emit->immediates[index]; 1088 emit_dword(emit, c[swizzleX]); 1089 emit_dword(emit, c[swizzleY]); 1090 emit_dword(emit, c[swizzleZ]); 1091 emit_dword(emit, c[swizzleW]); 1092 } 1093 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) { 1094 /* Emit the register index(es) */ 1095 if (index2d || 1096 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 1097 emit_dword(emit, reg->Dimension.Index); 1098 } 1099 1100 if (tempArrayId > 0) { 1101 emit_dword(emit, tempArrayId); 1102 } 1103 1104 emit_dword(emit, remap_temp_index(emit, file, index)); 1105 1106 if (indirect) { 1107 emit_indirect_register(emit, reg->Indirect.Index); 1108 } 1109 } 1110} 1111 1112 1113/** 1114 * Emit a resource operand (for use with a SAMPLE instruction). 1115 */ 1116static void 1117emit_resource_register(struct svga_shader_emitter_v10 *emit, 1118 unsigned resource_number) 1119{ 1120 VGPU10OperandToken0 operand0; 1121 1122 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number); 1123 1124 /* init */ 1125 operand0.value = 0; 1126 1127 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 1128 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1129 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1130 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1131 operand0.swizzleX = VGPU10_COMPONENT_X; 1132 operand0.swizzleY = VGPU10_COMPONENT_Y; 1133 operand0.swizzleZ = VGPU10_COMPONENT_Z; 1134 operand0.swizzleW = VGPU10_COMPONENT_W; 1135 1136 emit_dword(emit, operand0.value); 1137 emit_dword(emit, resource_number); 1138} 1139 1140 1141/** 1142 * Emit a sampler operand (for use with a SAMPLE instruction). 1143 */ 1144static void 1145emit_sampler_register(struct svga_shader_emitter_v10 *emit, 1146 unsigned sampler_number) 1147{ 1148 VGPU10OperandToken0 operand0; 1149 1150 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); 1151 1152 /* init */ 1153 operand0.value = 0; 1154 1155 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 1156 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1157 1158 emit_dword(emit, operand0.value); 1159 emit_dword(emit, sampler_number); 1160} 1161 1162 1163/** 1164 * Emit an operand which reads the IS_FRONT_FACING register. 1165 */ 1166static void 1167emit_face_register(struct svga_shader_emitter_v10 *emit) 1168{ 1169 VGPU10OperandToken0 operand0; 1170 unsigned index = emit->linkage.input_map[emit->fs.face_input_index]; 1171 1172 /* init */ 1173 operand0.value = 0; 1174 1175 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT; 1176 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1177 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1178 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1179 1180 operand0.swizzleX = VGPU10_COMPONENT_X; 1181 operand0.swizzleY = VGPU10_COMPONENT_X; 1182 operand0.swizzleZ = VGPU10_COMPONENT_X; 1183 operand0.swizzleW = VGPU10_COMPONENT_X; 1184 1185 emit_dword(emit, operand0.value); 1186 emit_dword(emit, index); 1187} 1188 1189 1190/** 1191 * Emit the token for a VGPU10 opcode. 1192 * \param saturate clamp result to [0,1]? 1193 */ 1194static void 1195emit_opcode(struct svga_shader_emitter_v10 *emit, 1196 unsigned vgpu10_opcode, boolean saturate) 1197{ 1198 VGPU10OpcodeToken0 token0; 1199 1200 token0.value = 0; /* init all fields to zero */ 1201 token0.opcodeType = vgpu10_opcode; 1202 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1203 token0.saturate = saturate; 1204 1205 emit_dword(emit, token0.value); 1206} 1207 1208 1209/** 1210 * Emit the token for a VGPU10 resinfo instruction. 1211 * \param modifier return type modifier, _uint or _rcpFloat. 1212 * TODO: We may want to remove this parameter if it will 1213 * only ever be used as _uint. 1214 */ 1215static void 1216emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit, 1217 VGPU10_RESINFO_RETURN_TYPE modifier) 1218{ 1219 VGPU10OpcodeToken0 token0; 1220 1221 token0.value = 0; /* init all fields to zero */ 1222 token0.opcodeType = VGPU10_OPCODE_RESINFO; 1223 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1224 token0.resinfoReturnType = modifier; 1225 1226 emit_dword(emit, token0.value); 1227} 1228 1229 1230/** 1231 * Emit opcode tokens for a texture sample instruction. Texture instructions 1232 * can be rather complicated (texel offsets, etc) so we have this specialized 1233 * function. 1234 */ 1235static void 1236emit_sample_opcode(struct svga_shader_emitter_v10 *emit, 1237 unsigned vgpu10_opcode, boolean saturate, 1238 const int offsets[3]) 1239{ 1240 VGPU10OpcodeToken0 token0; 1241 VGPU10OpcodeToken1 token1; 1242 1243 token0.value = 0; /* init all fields to zero */ 1244 token0.opcodeType = vgpu10_opcode; 1245 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1246 token0.saturate = saturate; 1247 1248 if (offsets[0] || offsets[1] || offsets[2]) { 1249 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1250 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1251 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1252 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1253 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1254 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1255 1256 token0.extended = 1; 1257 token1.value = 0; 1258 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS; 1259 token1.offsetU = offsets[0]; 1260 token1.offsetV = offsets[1]; 1261 token1.offsetW = offsets[2]; 1262 } 1263 1264 emit_dword(emit, token0.value); 1265 if (token0.extended) { 1266 emit_dword(emit, token1.value); 1267 } 1268} 1269 1270 1271/** 1272 * Emit a DISCARD opcode token. 1273 * If nonzero is set, we'll discard the fragment if the X component is not 0. 1274 * Otherwise, we'll discard the fragment if the X component is 0. 1275 */ 1276static void 1277emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero) 1278{ 1279 VGPU10OpcodeToken0 opcode0; 1280 1281 opcode0.value = 0; 1282 opcode0.opcodeType = VGPU10_OPCODE_DISCARD; 1283 if (nonzero) 1284 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 1285 1286 emit_dword(emit, opcode0.value); 1287} 1288 1289 1290/** 1291 * We need to call this before we begin emitting a VGPU10 instruction. 1292 */ 1293static void 1294begin_emit_instruction(struct svga_shader_emitter_v10 *emit) 1295{ 1296 assert(emit->inst_start_token == 0); 1297 /* Save location of the instruction's VGPU10OpcodeToken0 token. 1298 * Note, we can't save a pointer because it would become invalid if 1299 * we have to realloc the output buffer. 1300 */ 1301 emit->inst_start_token = emit_get_num_tokens(emit); 1302} 1303 1304 1305/** 1306 * We need to call this after we emit the last token of a VGPU10 instruction. 1307 * This function patches in the opcode token's instructionLength field. 1308 */ 1309static void 1310end_emit_instruction(struct svga_shader_emitter_v10 *emit) 1311{ 1312 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; 1313 unsigned inst_length; 1314 1315 assert(emit->inst_start_token > 0); 1316 1317 if (emit->discard_instruction) { 1318 /* Back up the emit->ptr to where this instruction started so 1319 * that we discard the current instruction. 1320 */ 1321 emit->ptr = (char *) (tokens + emit->inst_start_token); 1322 } 1323 else { 1324 /* Compute instruction length and patch that into the start of 1325 * the instruction. 1326 */ 1327 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token; 1328 1329 assert(inst_length > 0); 1330 1331 tokens[emit->inst_start_token].instructionLength = inst_length; 1332 } 1333 1334 emit->inst_start_token = 0; /* reset to zero for error checking */ 1335 emit->discard_instruction = FALSE; 1336} 1337 1338 1339/** 1340 * Return index for a free temporary register. 1341 */ 1342static unsigned 1343get_temp_index(struct svga_shader_emitter_v10 *emit) 1344{ 1345 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS); 1346 return emit->num_shader_temps + emit->internal_temp_count++; 1347} 1348 1349 1350/** 1351 * Release the temporaries which were generated by get_temp_index(). 1352 */ 1353static void 1354free_temp_indexes(struct svga_shader_emitter_v10 *emit) 1355{ 1356 emit->internal_temp_count = 0; 1357} 1358 1359 1360/** 1361 * Create a tgsi_full_src_register. 1362 */ 1363static struct tgsi_full_src_register 1364make_src_reg(unsigned file, unsigned index) 1365{ 1366 struct tgsi_full_src_register reg; 1367 1368 memset(®, 0, sizeof(reg)); 1369 reg.Register.File = file; 1370 reg.Register.Index = index; 1371 reg.Register.SwizzleX = TGSI_SWIZZLE_X; 1372 reg.Register.SwizzleY = TGSI_SWIZZLE_Y; 1373 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1374 reg.Register.SwizzleW = TGSI_SWIZZLE_W; 1375 return reg; 1376} 1377 1378 1379/** 1380 * Create a tgsi_full_src_register for a temporary. 1381 */ 1382static struct tgsi_full_src_register 1383make_src_temp_reg(unsigned index) 1384{ 1385 return make_src_reg(TGSI_FILE_TEMPORARY, index); 1386} 1387 1388 1389/** 1390 * Create a tgsi_full_src_register for a constant. 1391 */ 1392static struct tgsi_full_src_register 1393make_src_const_reg(unsigned index) 1394{ 1395 return make_src_reg(TGSI_FILE_CONSTANT, index); 1396} 1397 1398 1399/** 1400 * Create a tgsi_full_src_register for an immediate constant. 1401 */ 1402static struct tgsi_full_src_register 1403make_src_immediate_reg(unsigned index) 1404{ 1405 return make_src_reg(TGSI_FILE_IMMEDIATE, index); 1406} 1407 1408 1409/** 1410 * Create a tgsi_full_dst_register. 1411 */ 1412static struct tgsi_full_dst_register 1413make_dst_reg(unsigned file, unsigned index) 1414{ 1415 struct tgsi_full_dst_register reg; 1416 1417 memset(®, 0, sizeof(reg)); 1418 reg.Register.File = file; 1419 reg.Register.Index = index; 1420 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW; 1421 return reg; 1422} 1423 1424 1425/** 1426 * Create a tgsi_full_dst_register for a temporary. 1427 */ 1428static struct tgsi_full_dst_register 1429make_dst_temp_reg(unsigned index) 1430{ 1431 return make_dst_reg(TGSI_FILE_TEMPORARY, index); 1432} 1433 1434 1435/** 1436 * Create a tgsi_full_dst_register for an output. 1437 */ 1438static struct tgsi_full_dst_register 1439make_dst_output_reg(unsigned index) 1440{ 1441 return make_dst_reg(TGSI_FILE_OUTPUT, index); 1442} 1443 1444 1445/** 1446 * Create negated tgsi_full_src_register. 1447 */ 1448static struct tgsi_full_src_register 1449negate_src(const struct tgsi_full_src_register *reg) 1450{ 1451 struct tgsi_full_src_register neg = *reg; 1452 neg.Register.Negate = !reg->Register.Negate; 1453 return neg; 1454} 1455 1456/** 1457 * Create absolute value of a tgsi_full_src_register. 1458 */ 1459static struct tgsi_full_src_register 1460absolute_src(const struct tgsi_full_src_register *reg) 1461{ 1462 struct tgsi_full_src_register absolute = *reg; 1463 absolute.Register.Absolute = 1; 1464 return absolute; 1465} 1466 1467 1468/** Return the named swizzle term from the src register */ 1469static inline unsigned 1470get_swizzle(const struct tgsi_full_src_register *reg, unsigned term) 1471{ 1472 switch (term) { 1473 case TGSI_SWIZZLE_X: 1474 return reg->Register.SwizzleX; 1475 case TGSI_SWIZZLE_Y: 1476 return reg->Register.SwizzleY; 1477 case TGSI_SWIZZLE_Z: 1478 return reg->Register.SwizzleZ; 1479 case TGSI_SWIZZLE_W: 1480 return reg->Register.SwizzleW; 1481 default: 1482 assert(!"Bad swizzle"); 1483 return TGSI_SWIZZLE_X; 1484 } 1485} 1486 1487 1488/** 1489 * Create swizzled tgsi_full_src_register. 1490 */ 1491static struct tgsi_full_src_register 1492swizzle_src(const struct tgsi_full_src_register *reg, 1493 unsigned swizzleX, unsigned swizzleY, 1494 unsigned swizzleZ, unsigned swizzleW) 1495{ 1496 struct tgsi_full_src_register swizzled = *reg; 1497 /* Note: we swizzle the current swizzle */ 1498 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX); 1499 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY); 1500 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ); 1501 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW); 1502 return swizzled; 1503} 1504 1505 1506/** 1507 * Create swizzled tgsi_full_src_register where all the swizzle 1508 * terms are the same. 1509 */ 1510static struct tgsi_full_src_register 1511scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle) 1512{ 1513 struct tgsi_full_src_register swizzled = *reg; 1514 /* Note: we swizzle the current swizzle */ 1515 swizzled.Register.SwizzleX = 1516 swizzled.Register.SwizzleY = 1517 swizzled.Register.SwizzleZ = 1518 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle); 1519 return swizzled; 1520} 1521 1522 1523/** 1524 * Create new tgsi_full_dst_register with writemask. 1525 * \param mask bitmask of TGSI_WRITEMASK_[XYZW] 1526 */ 1527static struct tgsi_full_dst_register 1528writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask) 1529{ 1530 struct tgsi_full_dst_register masked = *reg; 1531 masked.Register.WriteMask = mask; 1532 return masked; 1533} 1534 1535 1536/** 1537 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW. 1538 */ 1539static boolean 1540same_swizzle_terms(const struct tgsi_full_src_register *reg) 1541{ 1542 return (reg->Register.SwizzleX == reg->Register.SwizzleY && 1543 reg->Register.SwizzleY == reg->Register.SwizzleZ && 1544 reg->Register.SwizzleZ == reg->Register.SwizzleW); 1545} 1546 1547 1548/** 1549 * Search the vector for the value 'x' and return its position. 1550 */ 1551static int 1552find_imm_in_vec4(const union tgsi_immediate_data vec[4], 1553 union tgsi_immediate_data x) 1554{ 1555 unsigned i; 1556 for (i = 0; i < 4; i++) { 1557 if (vec[i].Int == x.Int) 1558 return i; 1559 } 1560 return -1; 1561} 1562 1563 1564/** 1565 * Helper used by make_immediate_reg(), make_immediate_reg_4(). 1566 */ 1567static int 1568find_immediate(struct svga_shader_emitter_v10 *emit, 1569 union tgsi_immediate_data x, unsigned startIndex) 1570{ 1571 const unsigned endIndex = emit->num_immediates; 1572 unsigned i; 1573 1574 assert(emit->immediates_emitted); 1575 1576 /* Search immediates for x, y, z, w */ 1577 for (i = startIndex; i < endIndex; i++) { 1578 if (x.Int == emit->immediates[i][0].Int || 1579 x.Int == emit->immediates[i][1].Int || 1580 x.Int == emit->immediates[i][2].Int || 1581 x.Int == emit->immediates[i][3].Int) { 1582 return i; 1583 } 1584 } 1585 /* Should never try to use an immediate value that wasn't pre-declared */ 1586 assert(!"find_immediate() failed!"); 1587 return -1; 1588} 1589 1590 1591/** 1592 * Return a tgsi_full_src_register for an immediate/literal 1593 * union tgsi_immediate_data[4] value. 1594 * Note: the values must have been previously declared/allocated in 1595 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same 1596 * vec4 immediate. 1597 */ 1598static struct tgsi_full_src_register 1599make_immediate_reg_4(struct svga_shader_emitter_v10 *emit, 1600 const union tgsi_immediate_data imm[4]) 1601{ 1602 struct tgsi_full_src_register reg; 1603 unsigned i; 1604 1605 for (i = 0; i < emit->num_common_immediates; i++) { 1606 /* search for first component value */ 1607 int immpos = find_immediate(emit, imm[0], i); 1608 int x, y, z, w; 1609 1610 assert(immpos >= 0); 1611 1612 /* find remaining components within the immediate vector */ 1613 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]); 1614 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]); 1615 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]); 1616 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]); 1617 1618 if (x >=0 && y >= 0 && z >= 0 && w >= 0) { 1619 /* found them all */ 1620 memset(®, 0, sizeof(reg)); 1621 reg.Register.File = TGSI_FILE_IMMEDIATE; 1622 reg.Register.Index = immpos; 1623 reg.Register.SwizzleX = x; 1624 reg.Register.SwizzleY = y; 1625 reg.Register.SwizzleZ = z; 1626 reg.Register.SwizzleW = w; 1627 return reg; 1628 } 1629 /* else, keep searching */ 1630 } 1631 1632 assert(!"Failed to find immediate register!"); 1633 1634 /* Just return IMM[0].xxxx */ 1635 memset(®, 0, sizeof(reg)); 1636 reg.Register.File = TGSI_FILE_IMMEDIATE; 1637 return reg; 1638} 1639 1640 1641/** 1642 * Return a tgsi_full_src_register for an immediate/literal 1643 * union tgsi_immediate_data value of the form {value, value, value, value}. 1644 * \sa make_immediate_reg_4() regarding allowed values. 1645 */ 1646static struct tgsi_full_src_register 1647make_immediate_reg(struct svga_shader_emitter_v10 *emit, 1648 union tgsi_immediate_data value) 1649{ 1650 struct tgsi_full_src_register reg; 1651 int immpos = find_immediate(emit, value, 0); 1652 1653 assert(immpos >= 0); 1654 1655 memset(®, 0, sizeof(reg)); 1656 reg.Register.File = TGSI_FILE_IMMEDIATE; 1657 reg.Register.Index = immpos; 1658 reg.Register.SwizzleX = 1659 reg.Register.SwizzleY = 1660 reg.Register.SwizzleZ = 1661 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value); 1662 1663 return reg; 1664} 1665 1666 1667/** 1668 * Return a tgsi_full_src_register for an immediate/literal float[4] value. 1669 * \sa make_immediate_reg_4() regarding allowed values. 1670 */ 1671static struct tgsi_full_src_register 1672make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit, 1673 float x, float y, float z, float w) 1674{ 1675 union tgsi_immediate_data imm[4]; 1676 imm[0].Float = x; 1677 imm[1].Float = y; 1678 imm[2].Float = z; 1679 imm[3].Float = w; 1680 return make_immediate_reg_4(emit, imm); 1681} 1682 1683 1684/** 1685 * Return a tgsi_full_src_register for an immediate/literal float value 1686 * of the form {value, value, value, value}. 1687 * \sa make_immediate_reg_4() regarding allowed values. 1688 */ 1689static struct tgsi_full_src_register 1690make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value) 1691{ 1692 union tgsi_immediate_data imm; 1693 imm.Float = value; 1694 return make_immediate_reg(emit, imm); 1695} 1696 1697 1698/** 1699 * Return a tgsi_full_src_register for an immediate/literal int[4] vector. 1700 */ 1701static struct tgsi_full_src_register 1702make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit, 1703 int x, int y, int z, int w) 1704{ 1705 union tgsi_immediate_data imm[4]; 1706 imm[0].Int = x; 1707 imm[1].Int = y; 1708 imm[2].Int = z; 1709 imm[3].Int = w; 1710 return make_immediate_reg_4(emit, imm); 1711} 1712 1713 1714/** 1715 * Return a tgsi_full_src_register for an immediate/literal int value 1716 * of the form {value, value, value, value}. 1717 * \sa make_immediate_reg_4() regarding allowed values. 1718 */ 1719static struct tgsi_full_src_register 1720make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value) 1721{ 1722 union tgsi_immediate_data imm; 1723 imm.Int = value; 1724 return make_immediate_reg(emit, imm); 1725} 1726 1727 1728/** 1729 * Allocate space for a union tgsi_immediate_data[4] immediate. 1730 * \return the index/position of the immediate. 1731 */ 1732static unsigned 1733alloc_immediate_4(struct svga_shader_emitter_v10 *emit, 1734 const union tgsi_immediate_data imm[4]) 1735{ 1736 unsigned n = emit->num_immediates++; 1737 assert(!emit->immediates_emitted); 1738 assert(n < ARRAY_SIZE(emit->immediates)); 1739 emit->immediates[n][0] = imm[0]; 1740 emit->immediates[n][1] = imm[1]; 1741 emit->immediates[n][2] = imm[2]; 1742 emit->immediates[n][3] = imm[3]; 1743 return n; 1744} 1745 1746 1747/** 1748 * Allocate space for a float[4] immediate. 1749 * \return the index/position of the immediate. 1750 */ 1751static unsigned 1752alloc_immediate_float4(struct svga_shader_emitter_v10 *emit, 1753 float x, float y, float z, float w) 1754{ 1755 union tgsi_immediate_data imm[4]; 1756 imm[0].Float = x; 1757 imm[1].Float = y; 1758 imm[2].Float = z; 1759 imm[3].Float = w; 1760 return alloc_immediate_4(emit, imm); 1761} 1762 1763 1764/** 1765 * Allocate space for a int[4] immediate. 1766 * \return the index/position of the immediate. 1767 */ 1768static unsigned 1769alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, 1770 int x, int y, int z, int w) 1771{ 1772 union tgsi_immediate_data imm[4]; 1773 imm[0].Int = x; 1774 imm[1].Int = y; 1775 imm[2].Int = z; 1776 imm[3].Int = w; 1777 return alloc_immediate_4(emit, imm); 1778} 1779 1780 1781/** 1782 * Allocate a shader input to store a system value. 1783 */ 1784static unsigned 1785alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) 1786{ 1787 const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index; 1788 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1789 emit->system_value_indexes[index] = n; 1790 return n; 1791} 1792 1793 1794/** 1795 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10. 1796 */ 1797static boolean 1798emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, 1799 const struct tgsi_full_immediate *imm) 1800{ 1801 /* We don't actually emit any code here. We just save the 1802 * immediate values and emit them later. 1803 */ 1804 alloc_immediate_4(emit, imm->u); 1805 return TRUE; 1806} 1807 1808 1809/** 1810 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block 1811 * containing all the immediate values previously allocated 1812 * with alloc_immediate_4(). 1813 */ 1814static boolean 1815emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit) 1816{ 1817 VGPU10OpcodeToken0 token; 1818 1819 assert(!emit->immediates_emitted); 1820 1821 token.value = 0; 1822 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA; 1823 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER; 1824 1825 /* Note: no begin/end_emit_instruction() calls */ 1826 emit_dword(emit, token.value); 1827 emit_dword(emit, 2 + 4 * emit->num_immediates); 1828 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates); 1829 1830 emit->immediates_emitted = TRUE; 1831 1832 return TRUE; 1833} 1834 1835 1836/** 1837 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10 1838 * interpolation mode. 1839 * \return a VGPU10_INTERPOLATION_x value 1840 */ 1841static unsigned 1842translate_interpolation(const struct svga_shader_emitter_v10 *emit, 1843 unsigned interp, unsigned interpolate_loc) 1844{ 1845 if (interp == TGSI_INTERPOLATE_COLOR) { 1846 interp = emit->key.fs.flatshade ? 1847 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE; 1848 } 1849 1850 switch (interp) { 1851 case TGSI_INTERPOLATE_CONSTANT: 1852 return VGPU10_INTERPOLATION_CONSTANT; 1853 case TGSI_INTERPOLATE_LINEAR: 1854 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1855 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID : 1856 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; 1857 case TGSI_INTERPOLATE_PERSPECTIVE: 1858 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1859 VGPU10_INTERPOLATION_LINEAR_CENTROID : 1860 VGPU10_INTERPOLATION_LINEAR; 1861 default: 1862 assert(!"Unexpected interpolation mode"); 1863 return VGPU10_INTERPOLATION_CONSTANT; 1864 } 1865} 1866 1867 1868/** 1869 * Translate a TGSI property to VGPU10. 1870 * Don't emit any instructions yet, only need to gather the primitive property information. 1871 * The output primitive topology might be changed later. The final property instructions 1872 * will be emitted as part of the pre-helper code. 1873 */ 1874static boolean 1875emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, 1876 const struct tgsi_full_property *prop) 1877{ 1878 static const VGPU10_PRIMITIVE primType[] = { 1879 VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */ 1880 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */ 1881 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */ 1882 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */ 1883 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */ 1884 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */ 1885 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */ 1886 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */ 1887 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1888 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1889 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1890 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1891 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1892 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1893 }; 1894 1895 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = { 1896 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */ 1897 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */ 1898 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */ 1899 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */ 1900 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */ 1901 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */ 1902 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */ 1903 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */ 1904 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1905 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1906 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1907 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1908 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1909 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1910 }; 1911 1912 static const unsigned inputArraySize[] = { 1913 0, /* VGPU10_PRIMITIVE_UNDEFINED */ 1914 1, /* VGPU10_PRIMITIVE_POINT */ 1915 2, /* VGPU10_PRIMITIVE_LINE */ 1916 3, /* VGPU10_PRIMITIVE_TRIANGLE */ 1917 0, 1918 0, 1919 4, /* VGPU10_PRIMITIVE_LINE_ADJ */ 1920 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */ 1921 }; 1922 1923 switch (prop->Property.PropertyName) { 1924 case TGSI_PROPERTY_GS_INPUT_PRIM: 1925 assert(prop->u[0].Data < ARRAY_SIZE(primType)); 1926 emit->gs.prim_type = primType[prop->u[0].Data]; 1927 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED); 1928 emit->gs.input_size = inputArraySize[emit->gs.prim_type]; 1929 break; 1930 1931 case TGSI_PROPERTY_GS_OUTPUT_PRIM: 1932 assert(prop->u[0].Data < ARRAY_SIZE(primTopology)); 1933 emit->gs.prim_topology = primTopology[prop->u[0].Data]; 1934 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED); 1935 break; 1936 1937 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: 1938 emit->gs.max_out_vertices = prop->u[0].Data; 1939 break; 1940 1941 default: 1942 break; 1943 } 1944 1945 return TRUE; 1946} 1947 1948 1949static void 1950emit_property_instruction(struct svga_shader_emitter_v10 *emit, 1951 VGPU10OpcodeToken0 opcode0, unsigned nData, 1952 unsigned data) 1953{ 1954 begin_emit_instruction(emit); 1955 emit_dword(emit, opcode0.value); 1956 if (nData) 1957 emit_dword(emit, data); 1958 end_emit_instruction(emit); 1959} 1960 1961 1962/** 1963 * Emit property instructions 1964 */ 1965static void 1966emit_property_instructions(struct svga_shader_emitter_v10 *emit) 1967{ 1968 VGPU10OpcodeToken0 opcode0; 1969 1970 assert(emit->unit == PIPE_SHADER_GEOMETRY); 1971 1972 /* emit input primitive type declaration */ 1973 opcode0.value = 0; 1974 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE; 1975 opcode0.primitive = emit->gs.prim_type; 1976 emit_property_instruction(emit, opcode0, 0, 0); 1977 1978 /* emit output primitive topology declaration */ 1979 opcode0.value = 0; 1980 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; 1981 opcode0.primitiveTopology = emit->gs.prim_topology; 1982 emit_property_instruction(emit, opcode0, 0, 0); 1983 1984 /* emit max output vertices */ 1985 opcode0.value = 0; 1986 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT; 1987 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices); 1988} 1989 1990 1991/** 1992 * Emit a vgpu10 declaration "instruction". 1993 * \param index the register index 1994 * \param size array size of the operand. In most cases, it is 1, 1995 * but for inputs to geometry shader, the array size varies 1996 * depending on the primitive type. 1997 */ 1998static void 1999emit_decl_instruction(struct svga_shader_emitter_v10 *emit, 2000 VGPU10OpcodeToken0 opcode0, 2001 VGPU10OperandToken0 operand0, 2002 VGPU10NameToken name_token, 2003 unsigned index, unsigned size) 2004{ 2005 assert(opcode0.opcodeType); 2006 assert(operand0.mask); 2007 2008 begin_emit_instruction(emit); 2009 emit_dword(emit, opcode0.value); 2010 2011 emit_dword(emit, operand0.value); 2012 2013 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { 2014 /* Next token is the index of the register to declare */ 2015 emit_dword(emit, index); 2016 } 2017 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { 2018 /* Next token is the size of the register */ 2019 emit_dword(emit, size); 2020 2021 /* Followed by the index of the register */ 2022 emit_dword(emit, index); 2023 } 2024 2025 if (name_token.value) { 2026 emit_dword(emit, name_token.value); 2027 } 2028 2029 end_emit_instruction(emit); 2030} 2031 2032 2033/** 2034 * Emit the declaration for a shader input. 2035 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx 2036 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x 2037 * \param dim index dimension 2038 * \param index the input register index 2039 * \param size array size of the operand. In most cases, it is 1, 2040 * but for inputs to geometry shader, the array size varies 2041 * depending on the primitive type. 2042 * \param name one of VGPU10_NAME_x 2043 * \parma numComp number of components 2044 * \param selMode component selection mode 2045 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2046 * \param interpMode interpolation mode 2047 */ 2048static void 2049emit_input_declaration(struct svga_shader_emitter_v10 *emit, 2050 unsigned opcodeType, unsigned operandType, 2051 unsigned dim, unsigned index, unsigned size, 2052 unsigned name, unsigned numComp, 2053 unsigned selMode, unsigned usageMask, 2054 unsigned interpMode) 2055{ 2056 VGPU10OpcodeToken0 opcode0; 2057 VGPU10OperandToken0 operand0; 2058 VGPU10NameToken name_token; 2059 2060 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2061 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || 2062 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || 2063 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || 2064 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); 2065 assert(operandType == VGPU10_OPERAND_TYPE_INPUT || 2066 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID); 2067 assert(numComp <= VGPU10_OPERAND_4_COMPONENT); 2068 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); 2069 assert(dim <= VGPU10_OPERAND_INDEX_3D); 2070 assert(name == VGPU10_NAME_UNDEFINED || 2071 name == VGPU10_NAME_POSITION || 2072 name == VGPU10_NAME_INSTANCE_ID || 2073 name == VGPU10_NAME_VERTEX_ID || 2074 name == VGPU10_NAME_PRIMITIVE_ID || 2075 name == VGPU10_NAME_IS_FRONT_FACE); 2076 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || 2077 interpMode == VGPU10_INTERPOLATION_CONSTANT || 2078 interpMode == VGPU10_INTERPOLATION_LINEAR || 2079 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || 2080 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || 2081 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID); 2082 2083 check_register_index(emit, opcodeType, index); 2084 2085 opcode0.value = operand0.value = name_token.value = 0; 2086 2087 opcode0.opcodeType = opcodeType; 2088 opcode0.interpolationMode = interpMode; 2089 2090 operand0.operandType = operandType; 2091 operand0.numComponents = numComp; 2092 operand0.selectionMode = selMode; 2093 operand0.mask = usageMask; 2094 operand0.indexDimension = dim; 2095 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2096 if (dim == VGPU10_OPERAND_INDEX_2D) 2097 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2098 2099 name_token.name = name; 2100 2101 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); 2102} 2103 2104 2105/** 2106 * Emit the declaration for a shader output. 2107 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx 2108 * \param index the output register index 2109 * \param name one of VGPU10_NAME_x 2110 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2111 */ 2112static void 2113emit_output_declaration(struct svga_shader_emitter_v10 *emit, 2114 unsigned type, unsigned index, 2115 unsigned name, unsigned usageMask) 2116{ 2117 VGPU10OpcodeToken0 opcode0; 2118 VGPU10OperandToken0 operand0; 2119 VGPU10NameToken name_token; 2120 2121 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2122 assert(type == VGPU10_OPCODE_DCL_OUTPUT || 2123 type == VGPU10_OPCODE_DCL_OUTPUT_SGV || 2124 type == VGPU10_OPCODE_DCL_OUTPUT_SIV); 2125 assert(name == VGPU10_NAME_UNDEFINED || 2126 name == VGPU10_NAME_POSITION || 2127 name == VGPU10_NAME_PRIMITIVE_ID || 2128 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || 2129 name == VGPU10_NAME_CLIP_DISTANCE); 2130 2131 check_register_index(emit, type, index); 2132 2133 opcode0.value = operand0.value = name_token.value = 0; 2134 2135 opcode0.opcodeType = type; 2136 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; 2137 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2138 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2139 operand0.mask = usageMask; 2140 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2141 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2142 2143 name_token.name = name; 2144 2145 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); 2146} 2147 2148 2149/** 2150 * Emit the declaration for the fragment depth output. 2151 */ 2152static void 2153emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) 2154{ 2155 VGPU10OpcodeToken0 opcode0; 2156 VGPU10OperandToken0 operand0; 2157 VGPU10NameToken name_token; 2158 2159 assert(emit->unit == PIPE_SHADER_FRAGMENT); 2160 2161 opcode0.value = operand0.value = name_token.value = 0; 2162 2163 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; 2164 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 2165 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 2166 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 2167 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2168 2169 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); 2170} 2171 2172 2173/** 2174 * Emit the declaration for a system value input/output. 2175 */ 2176static void 2177emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, 2178 unsigned semantic_name, unsigned index) 2179{ 2180 switch (semantic_name) { 2181 case TGSI_SEMANTIC_INSTANCEID: 2182 index = alloc_system_value_index(emit, index); 2183 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2184 VGPU10_OPERAND_TYPE_INPUT, 2185 VGPU10_OPERAND_INDEX_1D, 2186 index, 1, 2187 VGPU10_NAME_INSTANCE_ID, 2188 VGPU10_OPERAND_4_COMPONENT, 2189 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2190 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2191 VGPU10_INTERPOLATION_UNDEFINED); 2192 break; 2193 case TGSI_SEMANTIC_VERTEXID: 2194 index = alloc_system_value_index(emit, index); 2195 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2196 VGPU10_OPERAND_TYPE_INPUT, 2197 VGPU10_OPERAND_INDEX_1D, 2198 index, 1, 2199 VGPU10_NAME_VERTEX_ID, 2200 VGPU10_OPERAND_4_COMPONENT, 2201 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2202 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2203 VGPU10_INTERPOLATION_UNDEFINED); 2204 break; 2205 default: 2206 ; /* XXX */ 2207 } 2208} 2209 2210/** 2211 * Translate a TGSI declaration to VGPU10. 2212 */ 2213static boolean 2214emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, 2215 const struct tgsi_full_declaration *decl) 2216{ 2217 switch (decl->Declaration.File) { 2218 case TGSI_FILE_INPUT: 2219 /* do nothing - see emit_input_declarations() */ 2220 return TRUE; 2221 2222 case TGSI_FILE_OUTPUT: 2223 assert(decl->Range.First == decl->Range.Last); 2224 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; 2225 return TRUE; 2226 2227 case TGSI_FILE_TEMPORARY: 2228 /* Don't declare the temps here. Just keep track of how many 2229 * and emit the declaration later. 2230 */ 2231 if (decl->Declaration.Array) { 2232 /* Indexed temporary array. Save the start index of the array 2233 * and the size of the array. 2234 */ 2235 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); 2236 unsigned i; 2237 2238 assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); 2239 2240 /* Save this array so we can emit the declaration for it later */ 2241 emit->temp_arrays[arrayID].start = decl->Range.First; 2242 emit->temp_arrays[arrayID].size = 2243 decl->Range.Last - decl->Range.First + 1; 2244 2245 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); 2246 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); 2247 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); 2248 2249 /* Fill in the temp_map entries for this array */ 2250 for (i = decl->Range.First; i <= decl->Range.Last; i++) { 2251 emit->temp_map[i].arrayId = arrayID; 2252 emit->temp_map[i].index = i - decl->Range.First; 2253 } 2254 } 2255 2256 /* for all temps, indexed or not, keep track of highest index */ 2257 emit->num_shader_temps = MAX2(emit->num_shader_temps, 2258 decl->Range.Last + 1); 2259 return TRUE; 2260 2261 case TGSI_FILE_CONSTANT: 2262 /* Don't declare constants here. Just keep track and emit later. */ 2263 { 2264 unsigned constbuf = 0, num_consts; 2265 if (decl->Declaration.Dimension) { 2266 constbuf = decl->Dim.Index2D; 2267 } 2268 /* We throw an assertion here when, in fact, the shader should never 2269 * have linked due to constbuf index out of bounds, so we shouldn't 2270 * have reached here. 2271 */ 2272 assert(constbuf < ARRAY_SIZE(emit->num_shader_consts)); 2273 2274 num_consts = MAX2(emit->num_shader_consts[constbuf], 2275 decl->Range.Last + 1); 2276 2277 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 2278 debug_printf("Warning: constant buffer is declared to size [%u]" 2279 " but [%u] is the limit.\n", 2280 num_consts, 2281 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2282 } 2283 /* The linker doesn't enforce the max UBO size so we clamp here */ 2284 emit->num_shader_consts[constbuf] = 2285 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2286 } 2287 return TRUE; 2288 2289 case TGSI_FILE_IMMEDIATE: 2290 assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); 2291 return FALSE; 2292 2293 case TGSI_FILE_SYSTEM_VALUE: 2294 emit_system_value_declaration(emit, decl->Semantic.Name, 2295 decl->Range.First); 2296 return TRUE; 2297 2298 case TGSI_FILE_SAMPLER: 2299 /* Don't declare samplers here. Just keep track and emit later. */ 2300 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); 2301 return TRUE; 2302 2303#if 0 2304 case TGSI_FILE_RESOURCE: 2305 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ 2306 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ 2307 assert(!"TGSI_FILE_RESOURCE not handled yet"); 2308 return FALSE; 2309#endif 2310 2311 case TGSI_FILE_ADDRESS: 2312 emit->num_address_regs = MAX2(emit->num_address_regs, 2313 decl->Range.Last + 1); 2314 return TRUE; 2315 2316 case TGSI_FILE_SAMPLER_VIEW: 2317 { 2318 unsigned unit = decl->Range.First; 2319 assert(decl->Range.First == decl->Range.Last); 2320 emit->sampler_target[unit] = decl->SamplerView.Resource; 2321 /* Note: we can ignore YZW return types for now */ 2322 emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX; 2323 } 2324 return TRUE; 2325 2326 default: 2327 assert(!"Unexpected type of declaration"); 2328 return FALSE; 2329 } 2330} 2331 2332 2333 2334/** 2335 * Emit all input declarations. 2336 */ 2337static boolean 2338emit_input_declarations(struct svga_shader_emitter_v10 *emit) 2339{ 2340 unsigned i; 2341 2342 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2343 2344 for (i = 0; i < emit->linkage.num_inputs; i++) { 2345 unsigned semantic_name = emit->info.input_semantic_name[i]; 2346 unsigned usage_mask = emit->info.input_usage_mask[i]; 2347 unsigned index = emit->linkage.input_map[i]; 2348 unsigned type, interpolationMode, name; 2349 2350 if (usage_mask == 0) 2351 continue; /* register is not actually used */ 2352 2353 if (semantic_name == TGSI_SEMANTIC_POSITION) { 2354 /* fragment position input */ 2355 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2356 interpolationMode = VGPU10_INTERPOLATION_LINEAR; 2357 name = VGPU10_NAME_POSITION; 2358 if (usage_mask & TGSI_WRITEMASK_W) { 2359 /* we need to replace use of 'w' with '1/w' */ 2360 emit->fs.fragcoord_input_index = i; 2361 } 2362 } 2363 else if (semantic_name == TGSI_SEMANTIC_FACE) { 2364 /* fragment front-facing input */ 2365 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2366 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2367 name = VGPU10_NAME_IS_FRONT_FACE; 2368 emit->fs.face_input_index = i; 2369 } 2370 else if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2371 /* primitive ID */ 2372 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2373 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2374 name = VGPU10_NAME_PRIMITIVE_ID; 2375 } 2376 else { 2377 /* general fragment input */ 2378 type = VGPU10_OPCODE_DCL_INPUT_PS; 2379 interpolationMode = 2380 translate_interpolation(emit, 2381 emit->info.input_interpolate[i], 2382 emit->info.input_interpolate_loc[i]); 2383 2384 /* keeps track if flat interpolation mode is being used */ 2385 emit->uses_flat_interp = emit->uses_flat_interp || 2386 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); 2387 2388 name = VGPU10_NAME_UNDEFINED; 2389 } 2390 2391 emit_input_declaration(emit, type, 2392 VGPU10_OPERAND_TYPE_INPUT, 2393 VGPU10_OPERAND_INDEX_1D, index, 1, 2394 name, 2395 VGPU10_OPERAND_4_COMPONENT, 2396 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2397 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2398 interpolationMode); 2399 } 2400 } 2401 else if (emit->unit == PIPE_SHADER_GEOMETRY) { 2402 2403 for (i = 0; i < emit->info.num_inputs; i++) { 2404 unsigned semantic_name = emit->info.input_semantic_name[i]; 2405 unsigned usage_mask = emit->info.input_usage_mask[i]; 2406 unsigned index = emit->linkage.input_map[i]; 2407 unsigned opcodeType, operandType; 2408 unsigned numComp, selMode; 2409 unsigned name; 2410 unsigned dim; 2411 2412 if (usage_mask == 0) 2413 continue; /* register is not actually used */ 2414 2415 opcodeType = VGPU10_OPCODE_DCL_INPUT; 2416 operandType = VGPU10_OPERAND_TYPE_INPUT; 2417 numComp = VGPU10_OPERAND_4_COMPONENT; 2418 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2419 name = VGPU10_NAME_UNDEFINED; 2420 2421 /* all geometry shader inputs are two dimensional except gl_PrimitiveID */ 2422 dim = VGPU10_OPERAND_INDEX_2D; 2423 2424 if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2425 /* Primitive ID */ 2426 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 2427 dim = VGPU10_OPERAND_INDEX_0D; 2428 numComp = VGPU10_OPERAND_0_COMPONENT; 2429 selMode = 0; 2430 2431 /* also save the register index so we can check for 2432 * primitive id when emit src register. We need to modify the 2433 * operand type, index dimension when emit primitive id src reg. 2434 */ 2435 emit->gs.prim_id_index = i; 2436 } 2437 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2438 /* vertex position input */ 2439 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; 2440 name = VGPU10_NAME_POSITION; 2441 } 2442 2443 emit_input_declaration(emit, opcodeType, operandType, 2444 dim, index, 2445 emit->gs.input_size, 2446 name, 2447 numComp, selMode, 2448 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2449 VGPU10_INTERPOLATION_UNDEFINED); 2450 } 2451 } 2452 else { 2453 assert(emit->unit == PIPE_SHADER_VERTEX); 2454 2455 for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) { 2456 unsigned usage_mask = emit->info.input_usage_mask[i]; 2457 unsigned index = i; 2458 2459 if (usage_mask == 0) 2460 continue; /* register is not actually used */ 2461 2462 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 2463 VGPU10_OPERAND_TYPE_INPUT, 2464 VGPU10_OPERAND_INDEX_1D, index, 1, 2465 VGPU10_NAME_UNDEFINED, 2466 VGPU10_OPERAND_4_COMPONENT, 2467 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2468 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2469 VGPU10_INTERPOLATION_UNDEFINED); 2470 } 2471 } 2472 2473 return TRUE; 2474} 2475 2476 2477/** 2478 * Emit all output declarations. 2479 */ 2480static boolean 2481emit_output_declarations(struct svga_shader_emitter_v10 *emit) 2482{ 2483 unsigned i; 2484 2485 for (i = 0; i < emit->info.num_outputs; i++) { 2486 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ 2487 const unsigned semantic_name = emit->info.output_semantic_name[i]; 2488 const unsigned semantic_index = emit->info.output_semantic_index[i]; 2489 unsigned index = i; 2490 2491 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2492 if (semantic_name == TGSI_SEMANTIC_COLOR) { 2493 assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index)); 2494 2495 emit->fs.color_out_index[semantic_index] = index; 2496 2497 /* The semantic index is the shader's color output/buffer index */ 2498 emit_output_declaration(emit, 2499 VGPU10_OPCODE_DCL_OUTPUT, semantic_index, 2500 VGPU10_NAME_UNDEFINED, 2501 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2502 2503 if (semantic_index == 0) { 2504 if (emit->key.fs.write_color0_to_n_cbufs > 1) { 2505 /* Emit declarations for the additional color outputs 2506 * for broadcasting. 2507 */ 2508 unsigned j; 2509 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { 2510 /* Allocate a new output index */ 2511 unsigned idx = emit->info.num_outputs + j - 1; 2512 emit->fs.color_out_index[j] = idx; 2513 emit_output_declaration(emit, 2514 VGPU10_OPCODE_DCL_OUTPUT, idx, 2515 VGPU10_NAME_UNDEFINED, 2516 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2517 emit->info.output_semantic_index[idx] = j; 2518 } 2519 } 2520 } 2521 else { 2522 assert(!emit->key.fs.write_color0_to_n_cbufs); 2523 } 2524 } 2525 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2526 /* Fragment depth output */ 2527 emit_fragdepth_output_declaration(emit); 2528 } 2529 else { 2530 assert(!"Bad output semantic name"); 2531 } 2532 } 2533 else { 2534 /* VS or GS */ 2535 unsigned name, type; 2536 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2537 2538 switch (semantic_name) { 2539 case TGSI_SEMANTIC_POSITION: 2540 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2541 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2542 name = VGPU10_NAME_POSITION; 2543 /* Save the index of the vertex position output register */ 2544 emit->vposition.out_index = index; 2545 break; 2546 case TGSI_SEMANTIC_CLIPDIST: 2547 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2548 name = VGPU10_NAME_CLIP_DISTANCE; 2549 /* save the starting index of the clip distance output register */ 2550 if (semantic_index == 0) 2551 emit->clip_dist_out_index = index; 2552 writemask = emit->output_usage_mask[index]; 2553 writemask = apply_clip_plane_mask(emit, writemask, semantic_index); 2554 if (writemask == 0x0) { 2555 continue; /* discard this do-nothing declaration */ 2556 } 2557 break; 2558 case TGSI_SEMANTIC_PRIMID: 2559 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2560 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2561 name = VGPU10_NAME_PRIMITIVE_ID; 2562 break; 2563 case TGSI_SEMANTIC_LAYER: 2564 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2565 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2566 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; 2567 break; 2568 case TGSI_SEMANTIC_CLIPVERTEX: 2569 type = VGPU10_OPCODE_DCL_OUTPUT; 2570 name = VGPU10_NAME_UNDEFINED; 2571 emit->clip_vertex_out_index = index; 2572 break; 2573 default: 2574 /* generic output */ 2575 type = VGPU10_OPCODE_DCL_OUTPUT; 2576 name = VGPU10_NAME_UNDEFINED; 2577 } 2578 2579 emit_output_declaration(emit, type, index, name, writemask); 2580 } 2581 } 2582 2583 if (emit->vposition.so_index != INVALID_INDEX && 2584 emit->vposition.out_index != INVALID_INDEX) { 2585 2586 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2587 2588 /* Emit the declaration for the non-adjusted vertex position 2589 * for stream output purpose 2590 */ 2591 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2592 emit->vposition.so_index, 2593 VGPU10_NAME_UNDEFINED, 2594 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2595 } 2596 2597 if (emit->clip_dist_so_index != INVALID_INDEX && 2598 emit->clip_dist_out_index != INVALID_INDEX) { 2599 2600 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2601 2602 /* Emit the declaration for the clip distance shadow copy which 2603 * will be used for stream output purpose and for clip distance 2604 * varying variable 2605 */ 2606 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2607 emit->clip_dist_so_index, 2608 VGPU10_NAME_UNDEFINED, 2609 emit->output_usage_mask[emit->clip_dist_out_index]); 2610 2611 if (emit->info.num_written_clipdistance > 4) { 2612 /* for the second clip distance register, each handles 4 planes */ 2613 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2614 emit->clip_dist_so_index + 1, 2615 VGPU10_NAME_UNDEFINED, 2616 emit->output_usage_mask[emit->clip_dist_out_index+1]); 2617 } 2618 } 2619 2620 return TRUE; 2621} 2622 2623 2624/** 2625 * Emit the declaration for the temporary registers. 2626 */ 2627static boolean 2628emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) 2629{ 2630 unsigned total_temps, reg, i; 2631 2632 total_temps = emit->num_shader_temps; 2633 2634 /* Allocate extra temps for specially-implemented instructions, 2635 * such as LIT. 2636 */ 2637 total_temps += MAX_INTERNAL_TEMPS; 2638 2639 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { 2640 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || 2641 emit->key.clip_plane_enable || 2642 emit->vposition.so_index != INVALID_INDEX) { 2643 emit->vposition.tmp_index = total_temps; 2644 total_temps += 1; 2645 } 2646 2647 if (emit->unit == PIPE_SHADER_VERTEX) { 2648 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | 2649 emit->key.vs.adjust_attrib_itof | 2650 emit->key.vs.adjust_attrib_utof | 2651 emit->key.vs.attrib_is_bgra | 2652 emit->key.vs.attrib_puint_to_snorm | 2653 emit->key.vs.attrib_puint_to_uscaled | 2654 emit->key.vs.attrib_puint_to_sscaled); 2655 while (attrib_mask) { 2656 unsigned index = u_bit_scan(&attrib_mask); 2657 emit->vs.adjusted_input[index] = total_temps++; 2658 } 2659 } 2660 2661 if (emit->clip_mode == CLIP_DISTANCE) { 2662 /* We need to write the clip distance to a temporary register 2663 * first. Then it will be copied to the shadow copy for 2664 * the clip distance varying variable and stream output purpose. 2665 * It will also be copied to the actual CLIPDIST register 2666 * according to the enabled clip planes 2667 */ 2668 emit->clip_dist_tmp_index = total_temps++; 2669 if (emit->info.num_written_clipdistance > 4) 2670 total_temps++; /* second clip register */ 2671 } 2672 else if (emit->clip_mode == CLIP_VERTEX) { 2673 /* We need to convert the TGSI CLIPVERTEX output to one or more 2674 * clip distances. Allocate a temp reg for the clipvertex here. 2675 */ 2676 assert(emit->info.writes_clipvertex > 0); 2677 emit->clip_vertex_tmp_index = total_temps; 2678 total_temps++; 2679 } 2680 } 2681 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 2682 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS || 2683 emit->key.fs.white_fragments || 2684 emit->key.fs.write_color0_to_n_cbufs > 1) { 2685 /* Allocate a temp to hold the output color */ 2686 emit->fs.color_tmp_index = total_temps; 2687 total_temps += 1; 2688 } 2689 2690 if (emit->fs.face_input_index != INVALID_INDEX) { 2691 /* Allocate a temp for the +/-1 face register */ 2692 emit->fs.face_tmp_index = total_temps; 2693 total_temps += 1; 2694 } 2695 2696 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 2697 /* Allocate a temp for modified fragment position register */ 2698 emit->fs.fragcoord_tmp_index = total_temps; 2699 total_temps += 1; 2700 } 2701 } 2702 2703 for (i = 0; i < emit->num_address_regs; i++) { 2704 emit->address_reg_index[i] = total_temps++; 2705 } 2706 2707 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10 2708 * temp indexes. Basically, we compact all the non-array temp register 2709 * indexes into a consecutive series. 2710 * 2711 * Before, we may have some TGSI declarations like: 2712 * DCL TEMP[0..1], LOCAL 2713 * DCL TEMP[2..4], ARRAY(1), LOCAL 2714 * DCL TEMP[5..7], ARRAY(2), LOCAL 2715 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things 2716 * 2717 * After, we'll have a map like this: 2718 * temp_map[0] = { array 0, index 0 } 2719 * temp_map[1] = { array 0, index 1 } 2720 * temp_map[2] = { array 1, index 0 } 2721 * temp_map[3] = { array 1, index 1 } 2722 * temp_map[4] = { array 1, index 2 } 2723 * temp_map[5] = { array 2, index 0 } 2724 * temp_map[6] = { array 2, index 1 } 2725 * temp_map[7] = { array 2, index 2 } 2726 * temp_map[8] = { array 0, index 2 } 2727 * temp_map[9] = { array 0, index 3 } 2728 * 2729 * We'll declare two arrays of 3 elements, plus a set of four non-indexed 2730 * temps numbered 0..3 2731 * 2732 * Any time we emit a temporary register index, we'll have to use the 2733 * temp_map[] table to convert the TGSI index to the VGPU10 index. 2734 * 2735 * Finally, we recompute the total_temps value here. 2736 */ 2737 reg = 0; 2738 for (i = 0; i < total_temps; i++) { 2739 if (emit->temp_map[i].arrayId == 0) { 2740 emit->temp_map[i].index = reg++; 2741 } 2742 } 2743 total_temps = reg; 2744 2745 if (0) { 2746 debug_printf("total_temps %u\n", total_temps); 2747 for (i = 0; i < 30; i++) { 2748 debug_printf("temp %u -> array %u index %u\n", 2749 i, emit->temp_map[i].arrayId, emit->temp_map[i].index); 2750 } 2751 } 2752 2753 /* Emit declaration of ordinary temp registers */ 2754 if (total_temps > 0) { 2755 VGPU10OpcodeToken0 opcode0; 2756 2757 opcode0.value = 0; 2758 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS; 2759 2760 begin_emit_instruction(emit); 2761 emit_dword(emit, opcode0.value); 2762 emit_dword(emit, total_temps); 2763 end_emit_instruction(emit); 2764 } 2765 2766 /* Emit declarations for indexable temp arrays. Skip 0th entry since 2767 * it's unused. 2768 */ 2769 for (i = 1; i < emit->num_temp_arrays; i++) { 2770 unsigned num_temps = emit->temp_arrays[i].size; 2771 2772 if (num_temps > 0) { 2773 VGPU10OpcodeToken0 opcode0; 2774 2775 opcode0.value = 0; 2776 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP; 2777 2778 begin_emit_instruction(emit); 2779 emit_dword(emit, opcode0.value); 2780 emit_dword(emit, i); /* which array */ 2781 emit_dword(emit, num_temps); 2782 emit_dword(emit, 4); /* num components */ 2783 end_emit_instruction(emit); 2784 2785 total_temps += num_temps; 2786 } 2787 } 2788 2789 /* Check that the grand total of all regular and indexed temps is 2790 * under the limit. 2791 */ 2792 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1); 2793 2794 return TRUE; 2795} 2796 2797 2798static boolean 2799emit_constant_declaration(struct svga_shader_emitter_v10 *emit) 2800{ 2801 VGPU10OpcodeToken0 opcode0; 2802 VGPU10OperandToken0 operand0; 2803 unsigned total_consts, i; 2804 2805 opcode0.value = 0; 2806 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER; 2807 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED; 2808 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */ 2809 2810 operand0.value = 0; 2811 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2812 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D; 2813 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2814 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2815 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 2816 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 2817 operand0.swizzleX = 0; 2818 operand0.swizzleY = 1; 2819 operand0.swizzleZ = 2; 2820 operand0.swizzleW = 3; 2821 2822 /** 2823 * Emit declaration for constant buffer [0]. We also allocate 2824 * room for the extra constants here. 2825 */ 2826 total_consts = emit->num_shader_consts[0]; 2827 2828 /* Now, allocate constant slots for the "extra" constants */ 2829 2830 /* Vertex position scale/translation */ 2831 if (emit->vposition.need_prescale) { 2832 emit->vposition.prescale_scale_index = total_consts++; 2833 emit->vposition.prescale_trans_index = total_consts++; 2834 } 2835 2836 if (emit->unit == PIPE_SHADER_VERTEX) { 2837 if (emit->key.vs.undo_viewport) { 2838 emit->vs.viewport_index = total_consts++; 2839 } 2840 } 2841 2842 /* user-defined clip planes */ 2843 if (emit->key.clip_plane_enable) { 2844 unsigned n = util_bitcount(emit->key.clip_plane_enable); 2845 assert(emit->unit == PIPE_SHADER_VERTEX || 2846 emit->unit == PIPE_SHADER_GEOMETRY); 2847 for (i = 0; i < n; i++) { 2848 emit->clip_plane_const[i] = total_consts++; 2849 } 2850 } 2851 2852 /* Texcoord scale factors for RECT textures */ 2853 { 2854 for (i = 0; i < emit->num_samplers; i++) { 2855 if (emit->key.tex[i].unnormalized) { 2856 emit->texcoord_scale_index[i] = total_consts++; 2857 } 2858 } 2859 } 2860 2861 /* Texture buffer sizes */ 2862 for (i = 0; i < emit->num_samplers; i++) { 2863 if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) { 2864 emit->texture_buffer_size_index[i] = total_consts++; 2865 } 2866 } 2867 2868 if (total_consts > 0) { 2869 begin_emit_instruction(emit); 2870 emit_dword(emit, opcode0.value); 2871 emit_dword(emit, operand0.value); 2872 emit_dword(emit, 0); /* which const buffer slot */ 2873 emit_dword(emit, total_consts); 2874 end_emit_instruction(emit); 2875 } 2876 2877 /* Declare remaining constant buffers (UBOs) */ 2878 for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) { 2879 if (emit->num_shader_consts[i] > 0) { 2880 begin_emit_instruction(emit); 2881 emit_dword(emit, opcode0.value); 2882 emit_dword(emit, operand0.value); 2883 emit_dword(emit, i); /* which const buffer slot */ 2884 emit_dword(emit, emit->num_shader_consts[i]); 2885 end_emit_instruction(emit); 2886 } 2887 } 2888 2889 return TRUE; 2890} 2891 2892 2893/** 2894 * Emit declarations for samplers. 2895 */ 2896static boolean 2897emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) 2898{ 2899 unsigned i; 2900 2901 for (i = 0; i < emit->num_samplers; i++) { 2902 VGPU10OpcodeToken0 opcode0; 2903 VGPU10OperandToken0 operand0; 2904 2905 opcode0.value = 0; 2906 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER; 2907 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT; 2908 2909 operand0.value = 0; 2910 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 2911 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 2912 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2913 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2914 2915 begin_emit_instruction(emit); 2916 emit_dword(emit, opcode0.value); 2917 emit_dword(emit, operand0.value); 2918 emit_dword(emit, i); 2919 end_emit_instruction(emit); 2920 } 2921 2922 return TRUE; 2923} 2924 2925 2926/** 2927 * Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x. 2928 */ 2929static unsigned 2930tgsi_texture_to_resource_dimension(unsigned target, boolean is_array) 2931{ 2932 switch (target) { 2933 case TGSI_TEXTURE_BUFFER: 2934 return VGPU10_RESOURCE_DIMENSION_BUFFER; 2935 case TGSI_TEXTURE_1D: 2936 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2937 case TGSI_TEXTURE_2D: 2938 case TGSI_TEXTURE_RECT: 2939 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2940 case TGSI_TEXTURE_3D: 2941 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; 2942 case TGSI_TEXTURE_CUBE: 2943 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 2944 case TGSI_TEXTURE_SHADOW1D: 2945 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2946 case TGSI_TEXTURE_SHADOW2D: 2947 case TGSI_TEXTURE_SHADOWRECT: 2948 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2949 case TGSI_TEXTURE_1D_ARRAY: 2950 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2951 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY 2952 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2953 case TGSI_TEXTURE_2D_ARRAY: 2954 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2955 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY 2956 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2957 case TGSI_TEXTURE_SHADOWCUBE: 2958 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 2959 case TGSI_TEXTURE_2D_MSAA: 2960 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 2961 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2962 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY 2963 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 2964 case TGSI_TEXTURE_CUBE_ARRAY: 2965 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY; 2966 default: 2967 assert(!"Unexpected resource type"); 2968 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2969 } 2970} 2971 2972 2973/** 2974 * Given a tgsi_return_type, return true iff it is an integer type. 2975 */ 2976static boolean 2977is_integer_type(enum tgsi_return_type type) 2978{ 2979 switch (type) { 2980 case TGSI_RETURN_TYPE_SINT: 2981 case TGSI_RETURN_TYPE_UINT: 2982 return TRUE; 2983 case TGSI_RETURN_TYPE_FLOAT: 2984 case TGSI_RETURN_TYPE_UNORM: 2985 case TGSI_RETURN_TYPE_SNORM: 2986 return FALSE; 2987 case TGSI_RETURN_TYPE_COUNT: 2988 default: 2989 assert(!"is_integer_type: Unknown tgsi_return_type"); 2990 return FALSE; 2991 } 2992} 2993 2994 2995/** 2996 * Emit declarations for resources. 2997 * XXX When we're sure that all TGSI shaders will be generated with 2998 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may 2999 * rework this code. 3000 */ 3001static boolean 3002emit_resource_declarations(struct svga_shader_emitter_v10 *emit) 3003{ 3004 unsigned i; 3005 3006 /* Emit resource decl for each sampler */ 3007 for (i = 0; i < emit->num_samplers; i++) { 3008 VGPU10OpcodeToken0 opcode0; 3009 VGPU10OperandToken0 operand0; 3010 VGPU10ResourceReturnTypeToken return_type; 3011 VGPU10_RESOURCE_RETURN_TYPE rt; 3012 3013 opcode0.value = 0; 3014 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; 3015 opcode0.resourceDimension = 3016 tgsi_texture_to_resource_dimension(emit->sampler_target[i], 3017 emit->key.tex[i].is_array); 3018 operand0.value = 0; 3019 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 3020 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 3021 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 3022 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3023 3024#if 1 3025 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */ 3026 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1); 3027 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1); 3028 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1); 3029 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1); 3030 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1); 3031 assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT); 3032 rt = emit->sampler_return_type[i] + 1; 3033#else 3034 switch (emit->sampler_return_type[i]) { 3035 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break; 3036 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break; 3037 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break; 3038 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break; 3039 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break; 3040 case TGSI_RETURN_TYPE_COUNT: 3041 default: 3042 rt = VGPU10_RETURN_TYPE_FLOAT; 3043 assert(!"emit_resource_declarations: Unknown tgsi_return_type"); 3044 } 3045#endif 3046 3047 return_type.value = 0; 3048 return_type.component0 = rt; 3049 return_type.component1 = rt; 3050 return_type.component2 = rt; 3051 return_type.component3 = rt; 3052 3053 begin_emit_instruction(emit); 3054 emit_dword(emit, opcode0.value); 3055 emit_dword(emit, operand0.value); 3056 emit_dword(emit, i); 3057 emit_dword(emit, return_type.value); 3058 end_emit_instruction(emit); 3059 } 3060 3061 return TRUE; 3062} 3063 3064static void 3065emit_instruction_op1(struct svga_shader_emitter_v10 *emit, 3066 unsigned opcode, 3067 const struct tgsi_full_dst_register *dst, 3068 const struct tgsi_full_src_register *src, 3069 boolean saturate) 3070{ 3071 begin_emit_instruction(emit); 3072 emit_opcode(emit, opcode, saturate); 3073 emit_dst_register(emit, dst); 3074 emit_src_register(emit, src); 3075 end_emit_instruction(emit); 3076} 3077 3078static void 3079emit_instruction_op2(struct svga_shader_emitter_v10 *emit, 3080 unsigned opcode, 3081 const struct tgsi_full_dst_register *dst, 3082 const struct tgsi_full_src_register *src1, 3083 const struct tgsi_full_src_register *src2, 3084 boolean saturate) 3085{ 3086 begin_emit_instruction(emit); 3087 emit_opcode(emit, opcode, saturate); 3088 emit_dst_register(emit, dst); 3089 emit_src_register(emit, src1); 3090 emit_src_register(emit, src2); 3091 end_emit_instruction(emit); 3092} 3093 3094static void 3095emit_instruction_op3(struct svga_shader_emitter_v10 *emit, 3096 unsigned opcode, 3097 const struct tgsi_full_dst_register *dst, 3098 const struct tgsi_full_src_register *src1, 3099 const struct tgsi_full_src_register *src2, 3100 const struct tgsi_full_src_register *src3, 3101 boolean saturate) 3102{ 3103 begin_emit_instruction(emit); 3104 emit_opcode(emit, opcode, saturate); 3105 emit_dst_register(emit, dst); 3106 emit_src_register(emit, src1); 3107 emit_src_register(emit, src2); 3108 emit_src_register(emit, src3); 3109 end_emit_instruction(emit); 3110} 3111 3112/** 3113 * Emit the actual clip distance instructions to be used for clipping 3114 * by copying the clip distance from the temporary registers to the 3115 * CLIPDIST registers written with the enabled planes mask. 3116 * Also copy the clip distance from the temporary to the clip distance 3117 * shadow copy register which will be referenced by the input shader 3118 */ 3119static void 3120emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) 3121{ 3122 struct tgsi_full_src_register tmp_clip_dist_src; 3123 struct tgsi_full_dst_register clip_dist_dst; 3124 3125 unsigned i; 3126 unsigned clip_plane_enable = emit->key.clip_plane_enable; 3127 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; 3128 int num_written_clipdist = emit->info.num_written_clipdistance; 3129 3130 assert(emit->clip_dist_out_index != INVALID_INDEX); 3131 assert(emit->clip_dist_tmp_index != INVALID_INDEX); 3132 3133 /** 3134 * Temporary reset the temporary clip dist register index so 3135 * that the copy to the real clip dist register will not 3136 * attempt to copy to the temporary register again 3137 */ 3138 emit->clip_dist_tmp_index = INVALID_INDEX; 3139 3140 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) { 3141 3142 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); 3143 3144 /** 3145 * copy to the shadow copy for use by varying variable and 3146 * stream output. All clip distances 3147 * will be written regardless of the enabled clipping planes. 3148 */ 3149 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3150 emit->clip_dist_so_index + i); 3151 3152 /* MOV clip_dist_so, tmp_clip_dist */ 3153 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3154 &tmp_clip_dist_src, FALSE); 3155 3156 /** 3157 * copy those clip distances to enabled clipping planes 3158 * to CLIPDIST registers for clipping 3159 */ 3160 if (clip_plane_enable & 0xf) { 3161 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3162 emit->clip_dist_out_index + i); 3163 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf); 3164 3165 /* MOV CLIPDIST, tmp_clip_dist */ 3166 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3167 &tmp_clip_dist_src, FALSE); 3168 } 3169 /* four clip planes per clip register */ 3170 clip_plane_enable >>= 4; 3171 } 3172 /** 3173 * set the temporary clip dist register index back to the 3174 * temporary index for the next vertex 3175 */ 3176 emit->clip_dist_tmp_index = clip_dist_tmp_index; 3177} 3178 3179/* Declare clip distance output registers for user-defined clip planes 3180 * or the TGSI_CLIPVERTEX output. 3181 */ 3182static void 3183emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) 3184{ 3185 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3186 unsigned index = emit->num_outputs; 3187 unsigned plane_mask; 3188 3189 assert(emit->unit == PIPE_SHADER_VERTEX || 3190 emit->unit == PIPE_SHADER_GEOMETRY); 3191 assert(num_clip_planes <= 8); 3192 3193 if (emit->clip_mode != CLIP_LEGACY && 3194 emit->clip_mode != CLIP_VERTEX) { 3195 return; 3196 } 3197 3198 if (num_clip_planes == 0) 3199 return; 3200 3201 /* Declare one or two clip output registers. The number of components 3202 * in the mask reflects the number of clip planes. For example, if 5 3203 * clip planes are needed, we'll declare outputs similar to: 3204 * dcl_output_siv o2.xyzw, clip_distance 3205 * dcl_output_siv o3.x, clip_distance 3206 */ 3207 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */ 3208 3209 plane_mask = (1 << num_clip_planes) - 1; 3210 if (plane_mask & 0xf) { 3211 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3212 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, 3213 VGPU10_NAME_CLIP_DISTANCE, cmask); 3214 emit->num_outputs++; 3215 } 3216 if (plane_mask & 0xf0) { 3217 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3218 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, 3219 VGPU10_NAME_CLIP_DISTANCE, cmask); 3220 emit->num_outputs++; 3221 } 3222} 3223 3224 3225/** 3226 * Emit the instructions for writing to the clip distance registers 3227 * to handle legacy/automatic clip planes. 3228 * For each clip plane, the distance is the dot product of the vertex 3229 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients. 3230 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE 3231 * output registers already declared. 3232 */ 3233static void 3234emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, 3235 unsigned vpos_tmp_index) 3236{ 3237 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3238 3239 assert(emit->clip_mode == CLIP_LEGACY); 3240 assert(num_clip_planes <= 8); 3241 3242 assert(emit->unit == PIPE_SHADER_VERTEX || 3243 emit->unit == PIPE_SHADER_GEOMETRY); 3244 3245 for (i = 0; i < num_clip_planes; i++) { 3246 struct tgsi_full_dst_register dst; 3247 struct tgsi_full_src_register plane_src, vpos_src; 3248 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3249 unsigned comp = i % 4; 3250 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3251 3252 /* create dst, src regs */ 3253 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3254 dst = writemask_dst(&dst, writemask); 3255 3256 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3257 vpos_src = make_src_temp_reg(vpos_tmp_index); 3258 3259 /* DP4 clip_dist, plane, vpos */ 3260 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3261 &plane_src, &vpos_src, FALSE); 3262 } 3263} 3264 3265 3266/** 3267 * Emit the instructions for computing the clip distance results from 3268 * the clip vertex temporary. 3269 * For each clip plane, the distance is the dot product of the clip vertex 3270 * position (found in a temp reg) and the clip plane coefficients. 3271 */ 3272static void 3273emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) 3274{ 3275 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable); 3276 unsigned i; 3277 struct tgsi_full_dst_register dst; 3278 struct tgsi_full_src_register clipvert_src; 3279 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index; 3280 3281 assert(emit->unit == PIPE_SHADER_VERTEX || 3282 emit->unit == PIPE_SHADER_GEOMETRY); 3283 3284 assert(emit->clip_mode == CLIP_VERTEX); 3285 3286 clipvert_src = make_src_temp_reg(clip_vertex_tmp); 3287 3288 for (i = 0; i < num_clip; i++) { 3289 struct tgsi_full_src_register plane_src; 3290 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3291 unsigned comp = i % 4; 3292 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3293 3294 /* create dst, src regs */ 3295 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3296 dst = writemask_dst(&dst, writemask); 3297 3298 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3299 3300 /* DP4 clip_dist, plane, vpos */ 3301 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3302 &plane_src, &clipvert_src, FALSE); 3303 } 3304 3305 /* copy temporary clip vertex register to the clip vertex register */ 3306 3307 assert(emit->clip_vertex_out_index != INVALID_INDEX); 3308 3309 /** 3310 * temporary reset the temporary clip vertex register index so 3311 * that copy to the clip vertex register will not attempt 3312 * to copy to the temporary register again 3313 */ 3314 emit->clip_vertex_tmp_index = INVALID_INDEX; 3315 3316 /* MOV clip_vertex, clip_vertex_tmp */ 3317 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index); 3318 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 3319 &dst, &clipvert_src, FALSE); 3320 3321 /** 3322 * set the temporary clip vertex register index back to the 3323 * temporary index for the next vertex 3324 */ 3325 emit->clip_vertex_tmp_index = clip_vertex_tmp; 3326} 3327 3328/** 3329 * Emit code to convert RGBA to BGRA 3330 */ 3331static void 3332emit_swap_r_b(struct svga_shader_emitter_v10 *emit, 3333 const struct tgsi_full_dst_register *dst, 3334 const struct tgsi_full_src_register *src) 3335{ 3336 struct tgsi_full_src_register bgra_src = 3337 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W); 3338 3339 begin_emit_instruction(emit); 3340 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 3341 emit_dst_register(emit, dst); 3342 emit_src_register(emit, &bgra_src); 3343 end_emit_instruction(emit); 3344} 3345 3346 3347/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */ 3348static void 3349emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit, 3350 const struct tgsi_full_dst_register *dst, 3351 const struct tgsi_full_src_register *src) 3352{ 3353 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f); 3354 struct tgsi_full_src_register two = 3355 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f); 3356 struct tgsi_full_src_register neg_two = 3357 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 3358 3359 unsigned val_tmp = get_temp_index(emit); 3360 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp); 3361 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp); 3362 3363 unsigned bias_tmp = get_temp_index(emit); 3364 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp); 3365 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp); 3366 3367 /* val = src * 2.0 */ 3368 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, 3369 src, &two, FALSE); 3370 3371 /* bias = src > 0.5 */ 3372 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, 3373 src, &half, FALSE); 3374 3375 /* bias = bias & -2.0 */ 3376 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst, 3377 &bias_src, &neg_two, FALSE); 3378 3379 /* dst = val + bias */ 3380 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst, 3381 &val_src, &bias_src, FALSE); 3382 3383 free_temp_indexes(emit); 3384} 3385 3386 3387/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */ 3388static void 3389emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit, 3390 const struct tgsi_full_dst_register *dst, 3391 const struct tgsi_full_src_register *src) 3392{ 3393 struct tgsi_full_src_register scale = 3394 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f); 3395 3396 /* dst = src * scale */ 3397 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE); 3398} 3399 3400 3401/** Convert from R32_UINT to 10_10_10_2_sscaled */ 3402static void 3403emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, 3404 const struct tgsi_full_dst_register *dst, 3405 const struct tgsi_full_src_register *src) 3406{ 3407 struct tgsi_full_src_register lshift = 3408 make_immediate_reg_int4(emit, 22, 12, 2, 0); 3409 struct tgsi_full_src_register rshift = 3410 make_immediate_reg_int4(emit, 22, 22, 22, 30); 3411 3412 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X); 3413 3414 unsigned tmp = get_temp_index(emit); 3415 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3416 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3417 3418 /* 3419 * r = (pixel << 22) >> 22; # signed int in [511, -512] 3420 * g = (pixel << 12) >> 22; # signed int in [511, -512] 3421 * b = (pixel << 2) >> 22; # signed int in [511, -512] 3422 * a = (pixel << 0) >> 30; # signed int in [1, -2] 3423 * dst = i_to_f(r,g,b,a); # convert to float 3424 */ 3425 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst, 3426 &src_xxxx, &lshift, FALSE); 3427 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst, 3428 &tmp_src, &rshift, FALSE); 3429 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE); 3430 3431 free_temp_indexes(emit); 3432} 3433 3434 3435/** 3436 * Emit code for TGSI_OPCODE_ABS instruction. 3437 */ 3438static boolean 3439emit_abs(struct svga_shader_emitter_v10 *emit, 3440 const struct tgsi_full_instruction *inst) 3441{ 3442 /* dst = ABS(s0): 3443 * dst = abs(s0) 3444 * Translates into: 3445 * MOV dst, abs(s0) 3446 */ 3447 struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]); 3448 3449 /* MOV dst, abs(s0) */ 3450 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 3451 &abs_src0, inst->Instruction.Saturate); 3452 3453 return TRUE; 3454} 3455 3456 3457/** 3458 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction. 3459 */ 3460static boolean 3461emit_arl_uarl(struct svga_shader_emitter_v10 *emit, 3462 const struct tgsi_full_instruction *inst) 3463{ 3464 unsigned index = inst->Dst[0].Register.Index; 3465 struct tgsi_full_dst_register dst; 3466 unsigned opcode; 3467 3468 assert(index < MAX_VGPU10_ADDR_REGS); 3469 dst = make_dst_temp_reg(emit->address_reg_index[index]); 3470 3471 /* ARL dst, s0 3472 * Translates into: 3473 * FTOI address_tmp, s0 3474 * 3475 * UARL dst, s0 3476 * Translates into: 3477 * MOV address_tmp, s0 3478 */ 3479 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL) 3480 opcode = VGPU10_OPCODE_FTOI; 3481 else 3482 opcode = VGPU10_OPCODE_MOV; 3483 3484 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE); 3485 3486 return TRUE; 3487} 3488 3489 3490/** 3491 * Emit code for TGSI_OPCODE_CAL instruction. 3492 */ 3493static boolean 3494emit_cal(struct svga_shader_emitter_v10 *emit, 3495 const struct tgsi_full_instruction *inst) 3496{ 3497 unsigned label = inst->Label.Label; 3498 VGPU10OperandToken0 operand; 3499 operand.value = 0; 3500 operand.operandType = VGPU10_OPERAND_TYPE_LABEL; 3501 3502 begin_emit_instruction(emit); 3503 emit_dword(emit, operand.value); 3504 emit_dword(emit, label); 3505 end_emit_instruction(emit); 3506 3507 return TRUE; 3508} 3509 3510 3511/** 3512 * Emit code for TGSI_OPCODE_IABS instruction. 3513 */ 3514static boolean 3515emit_iabs(struct svga_shader_emitter_v10 *emit, 3516 const struct tgsi_full_instruction *inst) 3517{ 3518 /* dst.x = (src0.x < 0) ? -src0.x : src0.x 3519 * dst.y = (src0.y < 0) ? -src0.y : src0.y 3520 * dst.z = (src0.z < 0) ? -src0.z : src0.z 3521 * dst.w = (src0.w < 0) ? -src0.w : src0.w 3522 * 3523 * Translates into 3524 * IMAX dst, src, neg(src) 3525 */ 3526 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); 3527 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], 3528 &inst->Src[0], &neg_src, FALSE); 3529 3530 return TRUE; 3531} 3532 3533 3534/** 3535 * Emit code for TGSI_OPCODE_CMP instruction. 3536 */ 3537static boolean 3538emit_cmp(struct svga_shader_emitter_v10 *emit, 3539 const struct tgsi_full_instruction *inst) 3540{ 3541 /* dst.x = (src0.x < 0) ? src1.x : src2.x 3542 * dst.y = (src0.y < 0) ? src1.y : src2.y 3543 * dst.z = (src0.z < 0) ? src1.z : src2.z 3544 * dst.w = (src0.w < 0) ? src1.w : src2.w 3545 * 3546 * Translates into 3547 * LT tmp, src0, 0.0 3548 * MOVC dst, tmp, src1, src2 3549 */ 3550 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3551 unsigned tmp = get_temp_index(emit); 3552 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3553 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3554 3555 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, 3556 &inst->Src[0], &zero, FALSE); 3557 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], 3558 &tmp_src, &inst->Src[1], &inst->Src[2], 3559 inst->Instruction.Saturate); 3560 3561 free_temp_indexes(emit); 3562 3563 return TRUE; 3564} 3565 3566 3567/** 3568 * Emit code for TGSI_OPCODE_DP2A instruction. 3569 */ 3570static boolean 3571emit_dp2a(struct svga_shader_emitter_v10 *emit, 3572 const struct tgsi_full_instruction *inst) 3573{ 3574 /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x 3575 * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x 3576 * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x 3577 * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x 3578 * Translate into 3579 * MAD tmp.x, s0.y, s1.y, s2.x 3580 * MAD tmp.x, s0.x, s1.x, tmp.x 3581 * MOV dst.xyzw, tmp.xxxx 3582 */ 3583 unsigned tmp = get_temp_index(emit); 3584 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3585 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3586 3587 struct tgsi_full_src_register tmp_src_xxxx = 3588 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3589 struct tgsi_full_dst_register tmp_dst_x = 3590 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3591 3592 struct tgsi_full_src_register src0_xxxx = 3593 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 3594 struct tgsi_full_src_register src0_yyyy = 3595 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 3596 struct tgsi_full_src_register src1_xxxx = 3597 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 3598 struct tgsi_full_src_register src1_yyyy = 3599 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 3600 struct tgsi_full_src_register src2_xxxx = 3601 scalar_src(&inst->Src[2], TGSI_SWIZZLE_X); 3602 3603 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy, 3604 &src1_yyyy, &src2_xxxx, FALSE); 3605 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx, 3606 &src1_xxxx, &tmp_src_xxxx, FALSE); 3607 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 3608 &tmp_src_xxxx, inst->Instruction.Saturate); 3609 3610 free_temp_indexes(emit); 3611 3612 return TRUE; 3613} 3614 3615 3616/** 3617 * Emit code for TGSI_OPCODE_DPH instruction. 3618 */ 3619static boolean 3620emit_dph(struct svga_shader_emitter_v10 *emit, 3621 const struct tgsi_full_instruction *inst) 3622{ 3623 /* 3624 * DP3 tmp, s0, s1 3625 * ADD dst, tmp, s1.wwww 3626 */ 3627 3628 struct tgsi_full_src_register s1_wwww = 3629 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, 3630 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 3631 3632 unsigned tmp = get_temp_index(emit); 3633 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3634 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3635 3636 /* DP3 tmp, s0, s1 */ 3637 emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0], 3638 &inst->Src[1], FALSE); 3639 3640 /* ADD dst, tmp, s1.wwww */ 3641 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src, 3642 &s1_wwww, inst->Instruction.Saturate); 3643 3644 free_temp_indexes(emit); 3645 3646 return TRUE; 3647} 3648 3649 3650/** 3651 * Emit code for TGSI_OPCODE_DST instruction. 3652 */ 3653static boolean 3654emit_dst(struct svga_shader_emitter_v10 *emit, 3655 const struct tgsi_full_instruction *inst) 3656{ 3657 /* 3658 * dst.x = 1 3659 * dst.y = src0.y * src1.y 3660 * dst.z = src0.z 3661 * dst.w = src1.w 3662 */ 3663 3664 struct tgsi_full_src_register s0_yyyy = 3665 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 3666 struct tgsi_full_src_register s0_zzzz = 3667 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 3668 struct tgsi_full_src_register s1_yyyy = 3669 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 3670 struct tgsi_full_src_register s1_wwww = 3671 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W); 3672 3673 /* 3674 * If dst and either src0 and src1 are the same we need 3675 * to create a temporary for it and insert a extra move. 3676 */ 3677 unsigned tmp_move = get_temp_index(emit); 3678 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3679 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3680 3681 /* MOV dst.x, 1.0 */ 3682 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3683 struct tgsi_full_dst_register dst_x = 3684 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3685 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3686 3687 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 3688 } 3689 3690 /* MUL dst.y, s0.y, s1.y */ 3691 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3692 struct tgsi_full_dst_register dst_y = 3693 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3694 3695 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, 3696 &s1_yyyy, inst->Instruction.Saturate); 3697 } 3698 3699 /* MOV dst.z, s0.z */ 3700 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3701 struct tgsi_full_dst_register dst_z = 3702 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3703 3704 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz, 3705 inst->Instruction.Saturate); 3706 } 3707 3708 /* MOV dst.w, s1.w */ 3709 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3710 struct tgsi_full_dst_register dst_w = 3711 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3712 3713 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww, 3714 inst->Instruction.Saturate); 3715 } 3716 3717 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3718 FALSE); 3719 free_temp_indexes(emit); 3720 3721 return TRUE; 3722} 3723 3724 3725 3726/** 3727 * Emit code for TGSI_OPCODE_ENDPRIM (GS only) 3728 */ 3729static boolean 3730emit_endprim(struct svga_shader_emitter_v10 *emit, 3731 const struct tgsi_full_instruction *inst) 3732{ 3733 assert(emit->unit == PIPE_SHADER_GEOMETRY); 3734 3735 /* We can't use emit_simple() because the TGSI instruction has one 3736 * operand (vertex stream number) which we must ignore for VGPU10. 3737 */ 3738 begin_emit_instruction(emit); 3739 emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); 3740 end_emit_instruction(emit); 3741 return TRUE; 3742} 3743 3744 3745/** 3746 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction. 3747 */ 3748static boolean 3749emit_ex2(struct svga_shader_emitter_v10 *emit, 3750 const struct tgsi_full_instruction *inst) 3751{ 3752 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x 3753 * while VGPU10 computes four values. 3754 * 3755 * dst = EX2(src): 3756 * dst.xyzw = 2.0 ^ src.x 3757 */ 3758 3759 struct tgsi_full_src_register src_xxxx = 3760 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3761 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3762 3763 /* EXP tmp, s0.xxxx */ 3764 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, 3765 inst->Instruction.Saturate); 3766 3767 return TRUE; 3768} 3769 3770 3771/** 3772 * Emit code for TGSI_OPCODE_EXP instruction. 3773 */ 3774static boolean 3775emit_exp(struct svga_shader_emitter_v10 *emit, 3776 const struct tgsi_full_instruction *inst) 3777{ 3778 /* 3779 * dst.x = 2 ^ floor(s0.x) 3780 * dst.y = s0.x - floor(s0.x) 3781 * dst.z = 2 ^ s0.x 3782 * dst.w = 1.0 3783 */ 3784 3785 struct tgsi_full_src_register src_xxxx = 3786 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 3787 unsigned tmp = get_temp_index(emit); 3788 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3789 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3790 3791 /* 3792 * If dst and src are the same we need to create 3793 * a temporary for it and insert a extra move. 3794 */ 3795 unsigned tmp_move = get_temp_index(emit); 3796 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3797 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3798 3799 /* only use X component of temp reg */ 3800 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3801 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3802 3803 /* ROUND_NI tmp.x, s0.x */ 3804 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 3805 &src_xxxx, FALSE); /* round to -infinity */ 3806 3807 /* EXP dst.x, tmp.x */ 3808 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3809 struct tgsi_full_dst_register dst_x = 3810 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3811 3812 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, 3813 inst->Instruction.Saturate); 3814 } 3815 3816 /* ADD dst.y, s0.x, -tmp */ 3817 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3818 struct tgsi_full_dst_register dst_y = 3819 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3820 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src); 3821 3822 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, 3823 &neg_tmp_src, inst->Instruction.Saturate); 3824 } 3825 3826 /* EXP dst.z, s0.x */ 3827 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3828 struct tgsi_full_dst_register dst_z = 3829 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3830 3831 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, 3832 inst->Instruction.Saturate); 3833 } 3834 3835 /* MOV dst.w, 1.0 */ 3836 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3837 struct tgsi_full_dst_register dst_w = 3838 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3839 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3840 3841 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, 3842 FALSE); 3843 } 3844 3845 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3846 FALSE); 3847 3848 free_temp_indexes(emit); 3849 3850 return TRUE; 3851} 3852 3853 3854/** 3855 * Emit code for TGSI_OPCODE_IF instruction. 3856 */ 3857static boolean 3858emit_if(struct svga_shader_emitter_v10 *emit, 3859 const struct tgsi_full_instruction *inst) 3860{ 3861 VGPU10OpcodeToken0 opcode0; 3862 3863 /* The src register should be a scalar */ 3864 assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY && 3865 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ && 3866 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW); 3867 3868 /* The only special thing here is that we need to set the 3869 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if 3870 * src.x is non-zero. 3871 */ 3872 opcode0.value = 0; 3873 opcode0.opcodeType = VGPU10_OPCODE_IF; 3874 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 3875 3876 begin_emit_instruction(emit); 3877 emit_dword(emit, opcode0.value); 3878 emit_src_register(emit, &inst->Src[0]); 3879 end_emit_instruction(emit); 3880 3881 return TRUE; 3882} 3883 3884 3885/** 3886 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of 3887 * the register components are negative). 3888 */ 3889static boolean 3890emit_kill_if(struct svga_shader_emitter_v10 *emit, 3891 const struct tgsi_full_instruction *inst) 3892{ 3893 unsigned tmp = get_temp_index(emit); 3894 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3895 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3896 3897 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3898 3899 struct tgsi_full_dst_register tmp_dst_x = 3900 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3901 struct tgsi_full_src_register tmp_src_xxxx = 3902 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3903 3904 /* tmp = src[0] < 0.0 */ 3905 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 3906 &zero, FALSE); 3907 3908 if (!same_swizzle_terms(&inst->Src[0])) { 3909 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to 3910 * logically OR the swizzle terms. Most uses of KILL_IF only 3911 * test one channel so it's good to avoid these extra steps. 3912 */ 3913 struct tgsi_full_src_register tmp_src_yyyy = 3914 scalar_src(&tmp_src, TGSI_SWIZZLE_Y); 3915 struct tgsi_full_src_register tmp_src_zzzz = 3916 scalar_src(&tmp_src, TGSI_SWIZZLE_Z); 3917 struct tgsi_full_src_register tmp_src_wwww = 3918 scalar_src(&tmp_src, TGSI_SWIZZLE_W); 3919 3920 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3921 &tmp_src_yyyy, FALSE); 3922 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3923 &tmp_src_zzzz, FALSE); 3924 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3925 &tmp_src_wwww, FALSE); 3926 } 3927 3928 begin_emit_instruction(emit); 3929 emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */ 3930 emit_src_register(emit, &tmp_src_xxxx); 3931 end_emit_instruction(emit); 3932 3933 free_temp_indexes(emit); 3934 3935 return TRUE; 3936} 3937 3938 3939/** 3940 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard). 3941 */ 3942static boolean 3943emit_kill(struct svga_shader_emitter_v10 *emit, 3944 const struct tgsi_full_instruction *inst) 3945{ 3946 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3947 3948 /* DISCARD if 0.0 is zero */ 3949 begin_emit_instruction(emit); 3950 emit_discard_opcode(emit, FALSE); 3951 emit_src_register(emit, &zero); 3952 end_emit_instruction(emit); 3953 3954 return TRUE; 3955} 3956 3957 3958/** 3959 * Emit code for TGSI_OPCODE_LG2 instruction. 3960 */ 3961static boolean 3962emit_lg2(struct svga_shader_emitter_v10 *emit, 3963 const struct tgsi_full_instruction *inst) 3964{ 3965 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x 3966 * while VGPU10 computes four values. 3967 * 3968 * dst = LG2(src): 3969 * dst.xyzw = log2(src.x) 3970 */ 3971 3972 struct tgsi_full_src_register src_xxxx = 3973 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3974 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3975 3976 /* LOG tmp, s0.xxxx */ 3977 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx, 3978 inst->Instruction.Saturate); 3979 3980 return TRUE; 3981} 3982 3983 3984/** 3985 * Emit code for TGSI_OPCODE_LIT instruction. 3986 */ 3987static boolean 3988emit_lit(struct svga_shader_emitter_v10 *emit, 3989 const struct tgsi_full_instruction *inst) 3990{ 3991 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3992 3993 /* 3994 * If dst and src are the same we need to create 3995 * a temporary for it and insert a extra move. 3996 */ 3997 unsigned tmp_move = get_temp_index(emit); 3998 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3999 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 4000 4001 /* 4002 * dst.x = 1 4003 * dst.y = max(src.x, 0) 4004 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 4005 * dst.w = 1 4006 */ 4007 4008 /* MOV dst.x, 1.0 */ 4009 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4010 struct tgsi_full_dst_register dst_x = 4011 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 4012 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 4013 } 4014 4015 /* MOV dst.w, 1.0 */ 4016 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4017 struct tgsi_full_dst_register dst_w = 4018 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 4019 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 4020 } 4021 4022 /* MAX dst.y, src.x, 0.0 */ 4023 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4024 struct tgsi_full_dst_register dst_y = 4025 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 4026 struct tgsi_full_src_register zero = 4027 make_immediate_reg_float(emit, 0.0f); 4028 struct tgsi_full_src_register src_xxxx = 4029 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4030 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4031 4032 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, 4033 &zero, inst->Instruction.Saturate); 4034 } 4035 4036 /* 4037 * tmp1 = clamp(src.w, -128, 128); 4038 * MAX tmp1, src.w, -128 4039 * MIN tmp1, tmp1, 128 4040 * 4041 * tmp2 = max(tmp2, 0); 4042 * MAX tmp2, src.y, 0 4043 * 4044 * tmp1 = pow(tmp2, tmp1); 4045 * LOG tmp2, tmp2 4046 * MUL tmp1, tmp2, tmp1 4047 * EXP tmp1, tmp1 4048 * 4049 * tmp1 = (src.w == 0) ? 1 : tmp1; 4050 * EQ tmp2, 0, src.w 4051 * MOVC tmp1, tmp2, 1.0, tmp1 4052 * 4053 * dst.z = (0 < src.x) ? tmp1 : 0; 4054 * LT tmp2, 0, src.x 4055 * MOVC dst.z, tmp2, tmp1, 0.0 4056 */ 4057 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4058 struct tgsi_full_dst_register dst_z = 4059 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 4060 4061 unsigned tmp1 = get_temp_index(emit); 4062 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4063 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4064 unsigned tmp2 = get_temp_index(emit); 4065 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4066 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4067 4068 struct tgsi_full_src_register src_xxxx = 4069 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4070 struct tgsi_full_src_register src_yyyy = 4071 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 4072 struct tgsi_full_src_register src_wwww = 4073 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 4074 4075 struct tgsi_full_src_register zero = 4076 make_immediate_reg_float(emit, 0.0f); 4077 struct tgsi_full_src_register lowerbound = 4078 make_immediate_reg_float(emit, -128.0f); 4079 struct tgsi_full_src_register upperbound = 4080 make_immediate_reg_float(emit, 128.0f); 4081 4082 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, 4083 &lowerbound, FALSE); 4084 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, 4085 &upperbound, FALSE); 4086 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, 4087 &zero, FALSE); 4088 4089 /* POW tmp1, tmp2, tmp1 */ 4090 /* LOG tmp2, tmp2 */ 4091 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src, 4092 FALSE); 4093 4094 /* MUL tmp1, tmp2, tmp1 */ 4095 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, 4096 &tmp1_src, FALSE); 4097 4098 /* EXP tmp1, tmp1 */ 4099 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src, 4100 FALSE); 4101 4102 /* EQ tmp2, 0, src.w */ 4103 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, 4104 &src_wwww, FALSE); 4105 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ 4106 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, 4107 &tmp2_src, &one, &tmp1_src, FALSE); 4108 4109 /* LT tmp2, 0, src.x */ 4110 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, 4111 &src_xxxx, FALSE); 4112 /* MOVC dst.z, tmp2, tmp1, 0.0 */ 4113 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, 4114 &tmp2_src, &tmp1_src, &zero, FALSE); 4115 } 4116 4117 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 4118 FALSE); 4119 free_temp_indexes(emit); 4120 4121 return TRUE; 4122} 4123 4124 4125/** 4126 * Emit code for TGSI_OPCODE_LOG instruction. 4127 */ 4128static boolean 4129emit_log(struct svga_shader_emitter_v10 *emit, 4130 const struct tgsi_full_instruction *inst) 4131{ 4132 /* 4133 * dst.x = floor(lg2(abs(s0.x))) 4134 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) 4135 * dst.z = lg2(abs(s0.x)) 4136 * dst.w = 1.0 4137 */ 4138 4139 struct tgsi_full_src_register src_xxxx = 4140 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4141 unsigned tmp = get_temp_index(emit); 4142 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4143 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4144 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); 4145 4146 /* only use X component of temp reg */ 4147 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4148 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4149 4150 /* LOG tmp.x, abs(s0.x) */ 4151 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 4152 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, 4153 &abs_src_xxxx, FALSE); 4154 } 4155 4156 /* MOV dst.z, tmp.x */ 4157 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4158 struct tgsi_full_dst_register dst_z = 4159 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); 4160 4161 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, 4162 &tmp_src, inst->Instruction.Saturate); 4163 } 4164 4165 /* FLR tmp.x, tmp.x */ 4166 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { 4167 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 4168 &tmp_src, FALSE); 4169 } 4170 4171 /* MOV dst.x, tmp.x */ 4172 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4173 struct tgsi_full_dst_register dst_x = 4174 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 4175 4176 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src, 4177 inst->Instruction.Saturate); 4178 } 4179 4180 /* EXP tmp.x, tmp.x */ 4181 /* DIV dst.y, abs(s0.x), tmp.x */ 4182 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4183 struct tgsi_full_dst_register dst_y = 4184 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 4185 4186 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src, 4187 FALSE); 4188 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, 4189 &tmp_src, inst->Instruction.Saturate); 4190 } 4191 4192 /* MOV dst.w, 1.0 */ 4193 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4194 struct tgsi_full_dst_register dst_w = 4195 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); 4196 struct tgsi_full_src_register one = 4197 make_immediate_reg_float(emit, 1.0f); 4198 4199 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 4200 } 4201 4202 free_temp_indexes(emit); 4203 4204 return TRUE; 4205} 4206 4207 4208/** 4209 * Emit code for TGSI_OPCODE_LRP instruction. 4210 */ 4211static boolean 4212emit_lrp(struct svga_shader_emitter_v10 *emit, 4213 const struct tgsi_full_instruction *inst) 4214{ 4215 /* dst = LRP(s0, s1, s2): 4216 * dst = s0 * (s1 - s2) + s2 4217 * Translates into: 4218 * SUB tmp, s1, s2; tmp = s1 - s2 4219 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 4220 */ 4221 unsigned tmp = get_temp_index(emit); 4222 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); 4223 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); 4224 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); 4225 4226 /* ADD tmp, s1, -s2 */ 4227 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp, 4228 &inst->Src[1], &neg_src2, FALSE); 4229 4230 /* MAD dst, s1, tmp, s3 */ 4231 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], 4232 &inst->Src[0], &src_tmp, &inst->Src[2], 4233 inst->Instruction.Saturate); 4234 4235 free_temp_indexes(emit); 4236 4237 return TRUE; 4238} 4239 4240 4241/** 4242 * Emit code for TGSI_OPCODE_POW instruction. 4243 */ 4244static boolean 4245emit_pow(struct svga_shader_emitter_v10 *emit, 4246 const struct tgsi_full_instruction *inst) 4247{ 4248 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and 4249 * src1.x while VGPU10 computes four values. 4250 * 4251 * dst = POW(src0, src1): 4252 * dst.xyzw = src0.x ^ src1.x 4253 */ 4254 unsigned tmp = get_temp_index(emit); 4255 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4256 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4257 struct tgsi_full_src_register src0_xxxx = 4258 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4259 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4260 struct tgsi_full_src_register src1_xxxx = 4261 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4262 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4263 4264 /* LOG tmp, s0.xxxx */ 4265 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx, 4266 FALSE); 4267 4268 /* MUL tmp, tmp, s1.xxxx */ 4269 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src, 4270 &src1_xxxx, FALSE); 4271 4272 /* EXP tmp, s0.xxxx */ 4273 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], 4274 &tmp_src, inst->Instruction.Saturate); 4275 4276 /* free tmp */ 4277 free_temp_indexes(emit); 4278 4279 return TRUE; 4280} 4281 4282 4283/** 4284 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. 4285 */ 4286static boolean 4287emit_rcp(struct svga_shader_emitter_v10 *emit, 4288 const struct tgsi_full_instruction *inst) 4289{ 4290 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4291 4292 unsigned tmp = get_temp_index(emit); 4293 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4294 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4295 4296 struct tgsi_full_dst_register tmp_dst_x = 4297 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4298 struct tgsi_full_src_register tmp_src_xxxx = 4299 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4300 4301 /* DIV tmp.x, 1.0, s0 */ 4302 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one, 4303 &inst->Src[0], FALSE); 4304 4305 /* MOV dst, tmp.xxxx */ 4306 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4307 &tmp_src_xxxx, inst->Instruction.Saturate); 4308 4309 free_temp_indexes(emit); 4310 4311 return TRUE; 4312} 4313 4314 4315/** 4316 * Emit code for TGSI_OPCODE_RSQ instruction. 4317 */ 4318static boolean 4319emit_rsq(struct svga_shader_emitter_v10 *emit, 4320 const struct tgsi_full_instruction *inst) 4321{ 4322 /* dst = RSQ(src): 4323 * dst.xyzw = 1 / sqrt(src.x) 4324 * Translates into: 4325 * RSQ tmp, src.x 4326 * MOV dst, tmp.xxxx 4327 */ 4328 4329 unsigned tmp = get_temp_index(emit); 4330 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4331 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4332 4333 struct tgsi_full_dst_register tmp_dst_x = 4334 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4335 struct tgsi_full_src_register tmp_src_xxxx = 4336 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4337 4338 /* RSQ tmp, src.x */ 4339 emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x, 4340 &inst->Src[0], FALSE); 4341 4342 /* MOV dst, tmp.xxxx */ 4343 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4344 &tmp_src_xxxx, inst->Instruction.Saturate); 4345 4346 /* free tmp */ 4347 free_temp_indexes(emit); 4348 4349 return TRUE; 4350} 4351 4352 4353/** 4354 * Emit code for TGSI_OPCODE_SCS instruction. 4355 */ 4356static boolean 4357emit_scs(struct svga_shader_emitter_v10 *emit, 4358 const struct tgsi_full_instruction *inst) 4359{ 4360 /* dst.x = cos(src.x) 4361 * dst.y = sin(src.x) 4362 * dst.z = 0.0 4363 * dst.w = 1.0 4364 */ 4365 struct tgsi_full_dst_register dst_x = 4366 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 4367 struct tgsi_full_dst_register dst_y = 4368 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 4369 struct tgsi_full_dst_register dst_zw = 4370 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW); 4371 4372 struct tgsi_full_src_register zero_one = 4373 make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f); 4374 4375 begin_emit_instruction(emit); 4376 emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate); 4377 emit_dst_register(emit, &dst_y); 4378 emit_dst_register(emit, &dst_x); 4379 emit_src_register(emit, &inst->Src[0]); 4380 end_emit_instruction(emit); 4381 4382 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 4383 &dst_zw, &zero_one, inst->Instruction.Saturate); 4384 4385 return TRUE; 4386} 4387 4388 4389/** 4390 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. 4391 */ 4392static boolean 4393emit_seq(struct svga_shader_emitter_v10 *emit, 4394 const struct tgsi_full_instruction *inst) 4395{ 4396 /* dst = SEQ(s0, s1): 4397 * dst = s0 == s1 ? 1.0 : 0.0 (per component) 4398 * Translates into: 4399 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4400 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4401 */ 4402 unsigned tmp = get_temp_index(emit); 4403 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4404 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4405 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4406 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4407 4408 /* EQ tmp, s0, s1 */ 4409 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], 4410 &inst->Src[1], FALSE); 4411 4412 /* MOVC dst, tmp, one, zero */ 4413 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4414 &one, &zero, FALSE); 4415 4416 free_temp_indexes(emit); 4417 4418 return TRUE; 4419} 4420 4421 4422/** 4423 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. 4424 */ 4425static boolean 4426emit_sge(struct svga_shader_emitter_v10 *emit, 4427 const struct tgsi_full_instruction *inst) 4428{ 4429 /* dst = SGE(s0, s1): 4430 * dst = s0 >= s1 ? 1.0 : 0.0 (per component) 4431 * Translates into: 4432 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) 4433 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4434 */ 4435 unsigned tmp = get_temp_index(emit); 4436 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4437 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4438 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4439 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4440 4441 /* GE tmp, s0, s1 */ 4442 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], 4443 &inst->Src[1], FALSE); 4444 4445 /* MOVC dst, tmp, one, zero */ 4446 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4447 &one, &zero, FALSE); 4448 4449 free_temp_indexes(emit); 4450 4451 return TRUE; 4452} 4453 4454 4455/** 4456 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. 4457 */ 4458static boolean 4459emit_sgt(struct svga_shader_emitter_v10 *emit, 4460 const struct tgsi_full_instruction *inst) 4461{ 4462 /* dst = SGT(s0, s1): 4463 * dst = s0 > s1 ? 1.0 : 0.0 (per component) 4464 * Translates into: 4465 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) 4466 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4467 */ 4468 unsigned tmp = get_temp_index(emit); 4469 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4470 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4471 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4472 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4473 4474 /* LT tmp, s1, s0 */ 4475 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], 4476 &inst->Src[0], FALSE); 4477 4478 /* MOVC dst, tmp, one, zero */ 4479 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4480 &one, &zero, FALSE); 4481 4482 free_temp_indexes(emit); 4483 4484 return TRUE; 4485} 4486 4487 4488/** 4489 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. 4490 */ 4491static boolean 4492emit_sincos(struct svga_shader_emitter_v10 *emit, 4493 const struct tgsi_full_instruction *inst) 4494{ 4495 unsigned tmp = get_temp_index(emit); 4496 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4497 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4498 4499 struct tgsi_full_src_register tmp_src_xxxx = 4500 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4501 struct tgsi_full_dst_register tmp_dst_x = 4502 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4503 4504 begin_emit_instruction(emit); 4505 emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); 4506 4507 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) 4508 { 4509 emit_dst_register(emit, &tmp_dst_x); /* first destination register */ 4510 emit_null_dst_register(emit); /* second destination register */ 4511 } 4512 else { 4513 emit_null_dst_register(emit); 4514 emit_dst_register(emit, &tmp_dst_x); 4515 } 4516 4517 emit_src_register(emit, &inst->Src[0]); 4518 end_emit_instruction(emit); 4519 4520 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4521 &tmp_src_xxxx, inst->Instruction.Saturate); 4522 4523 free_temp_indexes(emit); 4524 4525 return TRUE; 4526} 4527 4528 4529/** 4530 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. 4531 */ 4532static boolean 4533emit_sle(struct svga_shader_emitter_v10 *emit, 4534 const struct tgsi_full_instruction *inst) 4535{ 4536 /* dst = SLE(s0, s1): 4537 * dst = s0 <= s1 ? 1.0 : 0.0 (per component) 4538 * Translates into: 4539 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) 4540 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4541 */ 4542 unsigned tmp = get_temp_index(emit); 4543 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4544 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4545 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4546 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4547 4548 /* GE tmp, s1, s0 */ 4549 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], 4550 &inst->Src[0], FALSE); 4551 4552 /* MOVC dst, tmp, one, zero */ 4553 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4554 &one, &zero, FALSE); 4555 4556 free_temp_indexes(emit); 4557 4558 return TRUE; 4559} 4560 4561 4562/** 4563 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. 4564 */ 4565static boolean 4566emit_slt(struct svga_shader_emitter_v10 *emit, 4567 const struct tgsi_full_instruction *inst) 4568{ 4569 /* dst = SLT(s0, s1): 4570 * dst = s0 < s1 ? 1.0 : 0.0 (per component) 4571 * Translates into: 4572 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) 4573 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4574 */ 4575 unsigned tmp = get_temp_index(emit); 4576 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4577 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4578 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4579 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4580 4581 /* LT tmp, s0, s1 */ 4582 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 4583 &inst->Src[1], FALSE); 4584 4585 /* MOVC dst, tmp, one, zero */ 4586 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4587 &one, &zero, FALSE); 4588 4589 free_temp_indexes(emit); 4590 4591 return TRUE; 4592} 4593 4594 4595/** 4596 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. 4597 */ 4598static boolean 4599emit_sne(struct svga_shader_emitter_v10 *emit, 4600 const struct tgsi_full_instruction *inst) 4601{ 4602 /* dst = SNE(s0, s1): 4603 * dst = s0 != s1 ? 1.0 : 0.0 (per component) 4604 * Translates into: 4605 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4606 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4607 */ 4608 unsigned tmp = get_temp_index(emit); 4609 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4610 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4611 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4612 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4613 4614 /* NE tmp, s0, s1 */ 4615 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], 4616 &inst->Src[1], FALSE); 4617 4618 /* MOVC dst, tmp, one, zero */ 4619 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4620 &one, &zero, FALSE); 4621 4622 free_temp_indexes(emit); 4623 4624 return TRUE; 4625} 4626 4627 4628/** 4629 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. 4630 */ 4631static boolean 4632emit_ssg(struct svga_shader_emitter_v10 *emit, 4633 const struct tgsi_full_instruction *inst) 4634{ 4635 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 4636 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 4637 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 4638 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 4639 * Translates into: 4640 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) 4641 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) 4642 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) 4643 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) 4644 */ 4645 struct tgsi_full_src_register zero = 4646 make_immediate_reg_float(emit, 0.0f); 4647 struct tgsi_full_src_register one = 4648 make_immediate_reg_float(emit, 1.0f); 4649 struct tgsi_full_src_register neg_one = 4650 make_immediate_reg_float(emit, -1.0f); 4651 4652 unsigned tmp1 = get_temp_index(emit); 4653 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4654 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4655 4656 unsigned tmp2 = get_temp_index(emit); 4657 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4658 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4659 4660 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], 4661 &zero, FALSE); 4662 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, 4663 &neg_one, &zero, FALSE); 4664 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, 4665 &inst->Src[0], FALSE); 4666 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, 4667 &one, &tmp2_src, FALSE); 4668 4669 free_temp_indexes(emit); 4670 4671 return TRUE; 4672} 4673 4674 4675/** 4676 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. 4677 */ 4678static boolean 4679emit_issg(struct svga_shader_emitter_v10 *emit, 4680 const struct tgsi_full_instruction *inst) 4681{ 4682 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 4683 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 4684 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 4685 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 4686 * Translates into: 4687 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) 4688 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) 4689 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) 4690 */ 4691 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4692 4693 unsigned tmp1 = get_temp_index(emit); 4694 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4695 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4696 4697 unsigned tmp2 = get_temp_index(emit); 4698 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4699 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4700 4701 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); 4702 4703 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, 4704 &inst->Src[0], &zero, FALSE); 4705 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, 4706 &zero, &inst->Src[0], FALSE); 4707 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], 4708 &tmp1_src, &neg_tmp2, FALSE); 4709 4710 free_temp_indexes(emit); 4711 4712 return TRUE; 4713} 4714 4715 4716/** 4717 * Emit code for TGSI_OPCODE_SUB instruction. 4718 */ 4719static boolean 4720emit_sub(struct svga_shader_emitter_v10 *emit, 4721 const struct tgsi_full_instruction *inst) 4722{ 4723 /* dst = SUB(s0, s1): 4724 * dst = s0 - s1 4725 * Translates into: 4726 * ADD dst, s0, neg(s1) 4727 */ 4728 struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]); 4729 4730 /* ADD dst, s0, neg(s1) */ 4731 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], 4732 &inst->Src[0], &neg_src1, 4733 inst->Instruction.Saturate); 4734 4735 return TRUE; 4736} 4737 4738 4739/** 4740 * Emit a comparison instruction. The dest register will get 4741 * 0 or ~0 values depending on the outcome of comparing src0 to src1. 4742 */ 4743static void 4744emit_comparison(struct svga_shader_emitter_v10 *emit, 4745 SVGA3dCmpFunc func, 4746 const struct tgsi_full_dst_register *dst, 4747 const struct tgsi_full_src_register *src0, 4748 const struct tgsi_full_src_register *src1) 4749{ 4750 struct tgsi_full_src_register immediate; 4751 VGPU10OpcodeToken0 opcode0; 4752 boolean swapSrc = FALSE; 4753 4754 /* Sanity checks for svga vs. gallium enums */ 4755 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); 4756 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); 4757 4758 opcode0.value = 0; 4759 4760 switch (func) { 4761 case SVGA3D_CMP_NEVER: 4762 immediate = make_immediate_reg_int(emit, 0); 4763 /* MOV dst, {0} */ 4764 begin_emit_instruction(emit); 4765 emit_dword(emit, VGPU10_OPCODE_MOV); 4766 emit_dst_register(emit, dst); 4767 emit_src_register(emit, &immediate); 4768 end_emit_instruction(emit); 4769 return; 4770 case SVGA3D_CMP_ALWAYS: 4771 immediate = make_immediate_reg_int(emit, -1); 4772 /* MOV dst, {-1} */ 4773 begin_emit_instruction(emit); 4774 emit_dword(emit, VGPU10_OPCODE_MOV); 4775 emit_dst_register(emit, dst); 4776 emit_src_register(emit, &immediate); 4777 end_emit_instruction(emit); 4778 return; 4779 case SVGA3D_CMP_LESS: 4780 opcode0.opcodeType = VGPU10_OPCODE_LT; 4781 break; 4782 case SVGA3D_CMP_EQUAL: 4783 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4784 break; 4785 case SVGA3D_CMP_LESSEQUAL: 4786 opcode0.opcodeType = VGPU10_OPCODE_GE; 4787 swapSrc = TRUE; 4788 break; 4789 case SVGA3D_CMP_GREATER: 4790 opcode0.opcodeType = VGPU10_OPCODE_LT; 4791 swapSrc = TRUE; 4792 break; 4793 case SVGA3D_CMP_NOTEQUAL: 4794 opcode0.opcodeType = VGPU10_OPCODE_NE; 4795 break; 4796 case SVGA3D_CMP_GREATEREQUAL: 4797 opcode0.opcodeType = VGPU10_OPCODE_GE; 4798 break; 4799 default: 4800 assert(!"Unexpected comparison mode"); 4801 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4802 } 4803 4804 begin_emit_instruction(emit); 4805 emit_dword(emit, opcode0.value); 4806 emit_dst_register(emit, dst); 4807 if (swapSrc) { 4808 emit_src_register(emit, src1); 4809 emit_src_register(emit, src0); 4810 } 4811 else { 4812 emit_src_register(emit, src0); 4813 emit_src_register(emit, src1); 4814 } 4815 end_emit_instruction(emit); 4816} 4817 4818 4819/** 4820 * Get texel/address offsets for a texture instruction. 4821 */ 4822static void 4823get_texel_offsets(const struct svga_shader_emitter_v10 *emit, 4824 const struct tgsi_full_instruction *inst, int offsets[3]) 4825{ 4826 if (inst->Texture.NumOffsets == 1) { 4827 /* According to OpenGL Shader Language spec the offsets are only 4828 * fetched from a previously-declared immediate/literal. 4829 */ 4830 const struct tgsi_texture_offset *off = inst->TexOffsets; 4831 const unsigned index = off[0].Index; 4832 const unsigned swizzleX = off[0].SwizzleX; 4833 const unsigned swizzleY = off[0].SwizzleY; 4834 const unsigned swizzleZ = off[0].SwizzleZ; 4835 const union tgsi_immediate_data *imm = emit->immediates[index]; 4836 4837 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); 4838 4839 offsets[0] = imm[swizzleX].Int; 4840 offsets[1] = imm[swizzleY].Int; 4841 offsets[2] = imm[swizzleZ].Int; 4842 } 4843 else { 4844 offsets[0] = offsets[1] = offsets[2] = 0; 4845 } 4846} 4847 4848 4849/** 4850 * Set up the coordinate register for texture sampling. 4851 * When we're sampling from a RECT texture we have to scale the 4852 * unnormalized coordinate to a normalized coordinate. 4853 * We do that by multiplying the coordinate by an "extra" constant. 4854 * An alternative would be to use the RESINFO instruction to query the 4855 * texture's size. 4856 */ 4857static struct tgsi_full_src_register 4858setup_texcoord(struct svga_shader_emitter_v10 *emit, 4859 unsigned unit, 4860 const struct tgsi_full_src_register *coord) 4861{ 4862 if (emit->key.tex[unit].unnormalized) { 4863 unsigned scale_index = emit->texcoord_scale_index[unit]; 4864 unsigned tmp = get_temp_index(emit); 4865 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4866 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4867 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); 4868 4869 /* MUL tmp, coord, const[] */ 4870 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, 4871 coord, &scale_src, FALSE); 4872 return tmp_src; 4873 } 4874 else { 4875 /* use texcoord as-is */ 4876 return *coord; 4877 } 4878} 4879 4880 4881/** 4882 * For SAMPLE_C instructions, emit the extra src register which indicates 4883 * the reference/comparision value. 4884 */ 4885static void 4886emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, 4887 unsigned target, 4888 const struct tgsi_full_src_register *coord) 4889{ 4890 struct tgsi_full_src_register coord_src_ref; 4891 unsigned component; 4892 4893 assert(tgsi_is_shadow_target(target)); 4894 4895 assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */ 4896 if (target == TGSI_TEXTURE_SHADOW2D_ARRAY || 4897 target == TGSI_TEXTURE_SHADOWCUBE) 4898 component = TGSI_SWIZZLE_W; 4899 else 4900 component = TGSI_SWIZZLE_Z; 4901 4902 coord_src_ref = scalar_src(coord, component); 4903 4904 emit_src_register(emit, &coord_src_ref); 4905} 4906 4907 4908/** 4909 * Info for implementing texture swizzles. 4910 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() 4911 * functions use this to encapsulate the extra steps needed to perform 4912 * a texture swizzle, or shadow/depth comparisons. 4913 * The shadow/depth comparison is only done here if for the cases where 4914 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). 4915 */ 4916struct tex_swizzle_info 4917{ 4918 boolean swizzled; 4919 boolean shadow_compare; 4920 unsigned unit; 4921 unsigned texture_target; /**< TGSI_TEXTURE_x */ 4922 struct tgsi_full_src_register tmp_src; 4923 struct tgsi_full_dst_register tmp_dst; 4924 const struct tgsi_full_dst_register *inst_dst; 4925 const struct tgsi_full_src_register *coord_src; 4926}; 4927 4928 4929/** 4930 * Do setup for handling texture swizzles or shadow compares. 4931 * \param unit the texture unit 4932 * \param inst the TGSI texture instruction 4933 * \param shadow_compare do shadow/depth comparison? 4934 * \param swz returns the swizzle info 4935 */ 4936static void 4937begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, 4938 unsigned unit, 4939 const struct tgsi_full_instruction *inst, 4940 boolean shadow_compare, 4941 struct tex_swizzle_info *swz) 4942{ 4943 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || 4944 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || 4945 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || 4946 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); 4947 4948 swz->shadow_compare = shadow_compare; 4949 swz->texture_target = inst->Texture.Texture; 4950 4951 if (swz->swizzled || shadow_compare) { 4952 /* Allocate temp register for the result of the SAMPLE instruction 4953 * and the source of the MOV/compare/swizzle instructions. 4954 */ 4955 unsigned tmp = get_temp_index(emit); 4956 swz->tmp_src = make_src_temp_reg(tmp); 4957 swz->tmp_dst = make_dst_temp_reg(tmp); 4958 4959 swz->unit = unit; 4960 } 4961 swz->inst_dst = &inst->Dst[0]; 4962 swz->coord_src = &inst->Src[0]; 4963} 4964 4965 4966/** 4967 * Returns the register to put the SAMPLE instruction results into. 4968 * This will either be the original instruction dst reg (if no swizzle 4969 * and no shadow comparison) or a temporary reg if there is a swizzle. 4970 */ 4971static const struct tgsi_full_dst_register * 4972get_tex_swizzle_dst(const struct tex_swizzle_info *swz) 4973{ 4974 return (swz->swizzled || swz->shadow_compare) 4975 ? &swz->tmp_dst : swz->inst_dst; 4976} 4977 4978 4979/** 4980 * This emits the MOV instruction that actually implements a texture swizzle 4981 * and/or shadow comparison. 4982 */ 4983static void 4984end_tex_swizzle(struct svga_shader_emitter_v10 *emit, 4985 const struct tex_swizzle_info *swz) 4986{ 4987 if (swz->shadow_compare) { 4988 /* Emit extra instructions to compare the fetched texel value against 4989 * a texture coordinate component. The result of the comparison 4990 * is 0.0 or 1.0. 4991 */ 4992 struct tgsi_full_src_register coord_src; 4993 struct tgsi_full_src_register texel_src = 4994 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); 4995 struct tgsi_full_src_register one = 4996 make_immediate_reg_float(emit, 1.0f); 4997 /* convert gallium comparison func to SVGA comparison func */ 4998 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; 4999 5000 assert(emit->unit == PIPE_SHADER_FRAGMENT); 5001 5002 switch (swz->texture_target) { 5003 case TGSI_TEXTURE_SHADOW2D: 5004 case TGSI_TEXTURE_SHADOWRECT: 5005 case TGSI_TEXTURE_SHADOW1D_ARRAY: 5006 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); 5007 break; 5008 case TGSI_TEXTURE_SHADOW1D: 5009 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y); 5010 break; 5011 case TGSI_TEXTURE_SHADOWCUBE: 5012 case TGSI_TEXTURE_SHADOW2D_ARRAY: 5013 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W); 5014 break; 5015 default: 5016 assert(!"Unexpected texture target in end_tex_swizzle()"); 5017 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); 5018 } 5019 5020 /* COMPARE tmp, coord, texel */ 5021 /* XXX it would seem that the texel and coord arguments should 5022 * be transposed here, but piglit tests indicate otherwise. 5023 */ 5024 emit_comparison(emit, compare_func, 5025 &swz->tmp_dst, &texel_src, &coord_src); 5026 5027 /* AND dest, tmp, {1.0} */ 5028 begin_emit_instruction(emit); 5029 emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); 5030 if (swz->swizzled) { 5031 emit_dst_register(emit, &swz->tmp_dst); 5032 } 5033 else { 5034 emit_dst_register(emit, swz->inst_dst); 5035 } 5036 emit_src_register(emit, &swz->tmp_src); 5037 emit_src_register(emit, &one); 5038 end_emit_instruction(emit); 5039 } 5040 5041 if (swz->swizzled) { 5042 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; 5043 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; 5044 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; 5045 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; 5046 unsigned writemask_0 = 0, writemask_1 = 0; 5047 boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]); 5048 5049 /* Swizzle w/out zero/one terms */ 5050 struct tgsi_full_src_register src_swizzled = 5051 swizzle_src(&swz->tmp_src, 5052 swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X, 5053 swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y, 5054 swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z, 5055 swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W); 5056 5057 /* MOV dst, color(tmp).<swizzle> */ 5058 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 5059 swz->inst_dst, &src_swizzled, FALSE); 5060 5061 /* handle swizzle zero terms */ 5062 writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) | 5063 ((swz_g == PIPE_SWIZZLE_0) << 1) | 5064 ((swz_b == PIPE_SWIZZLE_0) << 2) | 5065 ((swz_a == PIPE_SWIZZLE_0) << 3)); 5066 5067 if (writemask_0) { 5068 struct tgsi_full_src_register zero = int_tex ? 5069 make_immediate_reg_int(emit, 0) : 5070 make_immediate_reg_float(emit, 0.0f); 5071 struct tgsi_full_dst_register dst = 5072 writemask_dst(swz->inst_dst, writemask_0); 5073 5074 /* MOV dst.writemask_0, {0,0,0,0} */ 5075 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 5076 &dst, &zero, FALSE); 5077 } 5078 5079 /* handle swizzle one terms */ 5080 writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) | 5081 ((swz_g == PIPE_SWIZZLE_1) << 1) | 5082 ((swz_b == PIPE_SWIZZLE_1) << 2) | 5083 ((swz_a == PIPE_SWIZZLE_1) << 3)); 5084 5085 if (writemask_1) { 5086 struct tgsi_full_src_register one = int_tex ? 5087 make_immediate_reg_int(emit, 1) : 5088 make_immediate_reg_float(emit, 1.0f); 5089 struct tgsi_full_dst_register dst = 5090 writemask_dst(swz->inst_dst, writemask_1); 5091 5092 /* MOV dst.writemask_1, {1,1,1,1} */ 5093 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE); 5094 } 5095 } 5096} 5097 5098 5099/** 5100 * Emit code for TGSI_OPCODE_SAMPLE instruction. 5101 */ 5102static boolean 5103emit_sample(struct svga_shader_emitter_v10 *emit, 5104 const struct tgsi_full_instruction *inst) 5105{ 5106 const unsigned resource_unit = inst->Src[1].Register.Index; 5107 const unsigned sampler_unit = inst->Src[2].Register.Index; 5108 struct tgsi_full_src_register coord; 5109 int offsets[3]; 5110 struct tex_swizzle_info swz_info; 5111 5112 begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); 5113 5114 get_texel_offsets(emit, inst, offsets); 5115 5116 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); 5117 5118 /* SAMPLE dst, coord(s0), resource, sampler */ 5119 begin_emit_instruction(emit); 5120 5121 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, 5122 inst->Instruction.Saturate, offsets); 5123 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5124 emit_src_register(emit, &coord); 5125 emit_resource_register(emit, resource_unit); 5126 emit_sampler_register(emit, sampler_unit); 5127 end_emit_instruction(emit); 5128 5129 end_tex_swizzle(emit, &swz_info); 5130 5131 free_temp_indexes(emit); 5132 5133 return TRUE; 5134} 5135 5136 5137/** 5138 * Check if a texture instruction is valid. 5139 * An example of an invalid texture instruction is doing shadow comparison 5140 * with an integer-valued texture. 5141 * If we detect an invalid texture instruction, we replace it with: 5142 * MOV dst, {1,1,1,1}; 5143 * \return TRUE if valid, FALSE if invalid. 5144 */ 5145static boolean 5146is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, 5147 const struct tgsi_full_instruction *inst) 5148{ 5149 const unsigned unit = inst->Src[1].Register.Index; 5150 const unsigned target = inst->Texture.Texture; 5151 boolean valid = TRUE; 5152 5153 if (tgsi_is_shadow_target(target) && 5154 is_integer_type(emit->sampler_return_type[unit])) { 5155 debug_printf("Invalid SAMPLE_C with an integer texture!\n"); 5156 valid = FALSE; 5157 } 5158 /* XXX might check for other conditions in the future here */ 5159 5160 if (!valid) { 5161 /* emit a MOV dst, {1,1,1,1} instruction. */ 5162 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 5163 begin_emit_instruction(emit); 5164 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5165 emit_dst_register(emit, &inst->Dst[0]); 5166 emit_src_register(emit, &one); 5167 end_emit_instruction(emit); 5168 } 5169 5170 return valid; 5171} 5172 5173 5174/** 5175 * Emit code for TGSI_OPCODE_TEX (simple texture lookup) 5176 */ 5177static boolean 5178emit_tex(struct svga_shader_emitter_v10 *emit, 5179 const struct tgsi_full_instruction *inst) 5180{ 5181 const uint unit = inst->Src[1].Register.Index; 5182 unsigned target = inst->Texture.Texture; 5183 unsigned opcode; 5184 struct tgsi_full_src_register coord; 5185 int offsets[3]; 5186 struct tex_swizzle_info swz_info; 5187 5188 /* check that the sampler returns a float */ 5189 if (!is_valid_tex_instruction(emit, inst)) 5190 return TRUE; 5191 5192 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5193 5194 get_texel_offsets(emit, inst, offsets); 5195 5196 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5197 5198 /* SAMPLE dst, coord(s0), resource, sampler */ 5199 begin_emit_instruction(emit); 5200 5201 if (tgsi_is_shadow_target(target)) 5202 opcode = VGPU10_OPCODE_SAMPLE_C; 5203 else 5204 opcode = VGPU10_OPCODE_SAMPLE; 5205 5206 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5207 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5208 emit_src_register(emit, &coord); 5209 emit_resource_register(emit, unit); 5210 emit_sampler_register(emit, unit); 5211 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5212 emit_tex_compare_refcoord(emit, target, &coord); 5213 } 5214 end_emit_instruction(emit); 5215 5216 end_tex_swizzle(emit, &swz_info); 5217 5218 free_temp_indexes(emit); 5219 5220 return TRUE; 5221} 5222 5223 5224/** 5225 * Emit code for TGSI_OPCODE_TXP (projective texture) 5226 */ 5227static boolean 5228emit_txp(struct svga_shader_emitter_v10 *emit, 5229 const struct tgsi_full_instruction *inst) 5230{ 5231 const uint unit = inst->Src[1].Register.Index; 5232 unsigned target = inst->Texture.Texture; 5233 unsigned opcode; 5234 int offsets[3]; 5235 unsigned tmp = get_temp_index(emit); 5236 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 5237 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 5238 struct tgsi_full_src_register src0_wwww = 5239 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5240 struct tgsi_full_src_register coord; 5241 struct tex_swizzle_info swz_info; 5242 5243 /* check that the sampler returns a float */ 5244 if (!is_valid_tex_instruction(emit, inst)) 5245 return TRUE; 5246 5247 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5248 5249 get_texel_offsets(emit, inst, offsets); 5250 5251 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5252 5253 /* DIV tmp, coord, coord.wwww */ 5254 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, 5255 &coord, &src0_wwww, FALSE); 5256 5257 /* SAMPLE dst, coord(tmp), resource, sampler */ 5258 begin_emit_instruction(emit); 5259 5260 if (tgsi_is_shadow_target(target)) 5261 opcode = VGPU10_OPCODE_SAMPLE_C; 5262 else 5263 opcode = VGPU10_OPCODE_SAMPLE; 5264 5265 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5266 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5267 emit_src_register(emit, &tmp_src); /* projected coord */ 5268 emit_resource_register(emit, unit); 5269 emit_sampler_register(emit, unit); 5270 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5271 emit_tex_compare_refcoord(emit, target, &tmp_src); 5272 } 5273 end_emit_instruction(emit); 5274 5275 end_tex_swizzle(emit, &swz_info); 5276 5277 free_temp_indexes(emit); 5278 5279 return TRUE; 5280} 5281 5282 5283/* 5284 * Emit code for TGSI_OPCODE_XPD instruction. 5285 */ 5286static boolean 5287emit_xpd(struct svga_shader_emitter_v10 *emit, 5288 const struct tgsi_full_instruction *inst) 5289{ 5290 /* dst.x = src0.y * src1.z - src1.y * src0.z 5291 * dst.y = src0.z * src1.x - src1.z * src0.x 5292 * dst.z = src0.x * src1.y - src1.x * src0.y 5293 * dst.w = 1 5294 */ 5295 struct tgsi_full_src_register s0_xxxx = 5296 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 5297 struct tgsi_full_src_register s0_yyyy = 5298 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 5299 struct tgsi_full_src_register s0_zzzz = 5300 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 5301 5302 struct tgsi_full_src_register s1_xxxx = 5303 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 5304 struct tgsi_full_src_register s1_yyyy = 5305 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 5306 struct tgsi_full_src_register s1_zzzz = 5307 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z); 5308 5309 unsigned tmp1 = get_temp_index(emit); 5310 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 5311 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 5312 5313 unsigned tmp2 = get_temp_index(emit); 5314 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 5315 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 5316 struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src); 5317 5318 unsigned tmp3 = get_temp_index(emit); 5319 struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3); 5320 struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3); 5321 struct tgsi_full_dst_register tmp3_dst_x = 5322 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X); 5323 struct tgsi_full_dst_register tmp3_dst_y = 5324 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y); 5325 struct tgsi_full_dst_register tmp3_dst_z = 5326 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z); 5327 struct tgsi_full_dst_register tmp3_dst_w = 5328 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W); 5329 5330 /* Note: we put all the intermediate computations into tmp3 in case 5331 * the XPD dest register is that same as one of the src regs (in which 5332 * case we could clobber a src reg before we're done with it) . 5333 * 5334 * Note: we could get by with just one temp register instead of three 5335 * since we're doing scalar operations and there's enough room in one 5336 * temp for everything. 5337 */ 5338 5339 /* MUL tmp1, src0.y, src1.z */ 5340 /* MUL tmp2, src1.y, src0.z */ 5341 /* ADD tmp3.x, tmp1, -tmp2 */ 5342 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 5343 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, 5344 &s0_yyyy, &s1_zzzz, FALSE); 5345 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, 5346 &s1_yyyy, &s0_zzzz, FALSE); 5347 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x, 5348 &tmp1_src, &neg_tmp2_src, FALSE); 5349 } 5350 5351 /* MUL tmp1, src0.z, src1.x */ 5352 /* MUL tmp2, src1.z, src0.x */ 5353 /* ADD tmp3.y, tmp1, -tmp2 */ 5354 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 5355 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz, 5356 &s1_xxxx, FALSE); 5357 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz, 5358 &s0_xxxx, FALSE); 5359 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y, 5360 &tmp1_src, &neg_tmp2_src, FALSE); 5361 } 5362 5363 /* MUL tmp1, src0.x, src1.y */ 5364 /* MUL tmp2, src1.x, src0.y */ 5365 /* ADD tmp3.z, tmp1, -tmp2 */ 5366 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 5367 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx, 5368 &s1_yyyy, FALSE); 5369 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx, 5370 &s0_yyyy, FALSE); 5371 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z, 5372 &tmp1_src, &neg_tmp2_src, FALSE); 5373 } 5374 5375 /* MOV tmp3.w, 1.0 */ 5376 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 5377 struct tgsi_full_src_register one = 5378 make_immediate_reg_float(emit, 1.0f); 5379 5380 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE); 5381 } 5382 5383 /* MOV dst, tmp3 */ 5384 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src, 5385 inst->Instruction.Saturate); 5386 5387 5388 free_temp_indexes(emit); 5389 5390 return TRUE; 5391} 5392 5393 5394/** 5395 * Emit code for TGSI_OPCODE_TXD (explicit derivatives) 5396 */ 5397static boolean 5398emit_txd(struct svga_shader_emitter_v10 *emit, 5399 const struct tgsi_full_instruction *inst) 5400{ 5401 const uint unit = inst->Src[3].Register.Index; 5402 unsigned target = inst->Texture.Texture; 5403 int offsets[3]; 5404 struct tgsi_full_src_register coord; 5405 struct tex_swizzle_info swz_info; 5406 5407 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5408 &swz_info); 5409 5410 get_texel_offsets(emit, inst, offsets); 5411 5412 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5413 5414 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ 5415 begin_emit_instruction(emit); 5416 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, 5417 inst->Instruction.Saturate, offsets); 5418 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5419 emit_src_register(emit, &coord); 5420 emit_resource_register(emit, unit); 5421 emit_sampler_register(emit, unit); 5422 emit_src_register(emit, &inst->Src[1]); /* Xderiv */ 5423 emit_src_register(emit, &inst->Src[2]); /* Yderiv */ 5424 end_emit_instruction(emit); 5425 5426 end_tex_swizzle(emit, &swz_info); 5427 5428 free_temp_indexes(emit); 5429 5430 return TRUE; 5431} 5432 5433 5434/** 5435 * Emit code for TGSI_OPCODE_TXF (texel fetch) 5436 */ 5437static boolean 5438emit_txf(struct svga_shader_emitter_v10 *emit, 5439 const struct tgsi_full_instruction *inst) 5440{ 5441 const uint unit = inst->Src[1].Register.Index; 5442 const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture); 5443 int offsets[3]; 5444 struct tex_swizzle_info swz_info; 5445 5446 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5447 5448 get_texel_offsets(emit, inst, offsets); 5449 5450 if (msaa) { 5451 /* Fetch one sample from an MSAA texture */ 5452 struct tgsi_full_src_register sampleIndex = 5453 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5454 /* LD_MS dst, coord(s0), resource, sampleIndex */ 5455 begin_emit_instruction(emit); 5456 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, 5457 inst->Instruction.Saturate, offsets); 5458 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5459 emit_src_register(emit, &inst->Src[0]); 5460 emit_resource_register(emit, unit); 5461 emit_src_register(emit, &sampleIndex); 5462 end_emit_instruction(emit); 5463 } 5464 else { 5465 /* Fetch one texel specified by integer coordinate */ 5466 /* LD dst, coord(s0), resource */ 5467 begin_emit_instruction(emit); 5468 emit_sample_opcode(emit, VGPU10_OPCODE_LD, 5469 inst->Instruction.Saturate, offsets); 5470 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5471 emit_src_register(emit, &inst->Src[0]); 5472 emit_resource_register(emit, unit); 5473 end_emit_instruction(emit); 5474 } 5475 5476 end_tex_swizzle(emit, &swz_info); 5477 5478 free_temp_indexes(emit); 5479 5480 return TRUE; 5481} 5482 5483 5484/** 5485 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) 5486 * or TGSI_OPCODE_TXB2 (for cube shadow maps). 5487 */ 5488static boolean 5489emit_txl_txb(struct svga_shader_emitter_v10 *emit, 5490 const struct tgsi_full_instruction *inst) 5491{ 5492 unsigned target = inst->Texture.Texture; 5493 unsigned opcode, unit; 5494 int offsets[3]; 5495 struct tgsi_full_src_register coord, lod_bias; 5496 struct tex_swizzle_info swz_info; 5497 5498 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || 5499 inst->Instruction.Opcode == TGSI_OPCODE_TXB || 5500 inst->Instruction.Opcode == TGSI_OPCODE_TXB2); 5501 5502 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { 5503 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 5504 unit = inst->Src[2].Register.Index; 5505 } 5506 else { 5507 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5508 unit = inst->Src[1].Register.Index; 5509 } 5510 5511 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5512 &swz_info); 5513 5514 get_texel_offsets(emit, inst, offsets); 5515 5516 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5517 5518 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ 5519 begin_emit_instruction(emit); 5520 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { 5521 opcode = VGPU10_OPCODE_SAMPLE_L; 5522 } 5523 else { 5524 opcode = VGPU10_OPCODE_SAMPLE_B; 5525 } 5526 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5527 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5528 emit_src_register(emit, &coord); 5529 emit_resource_register(emit, unit); 5530 emit_sampler_register(emit, unit); 5531 emit_src_register(emit, &lod_bias); 5532 end_emit_instruction(emit); 5533 5534 end_tex_swizzle(emit, &swz_info); 5535 5536 free_temp_indexes(emit); 5537 5538 return TRUE; 5539} 5540 5541 5542/** 5543 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. 5544 */ 5545static boolean 5546emit_txq(struct svga_shader_emitter_v10 *emit, 5547 const struct tgsi_full_instruction *inst) 5548{ 5549 const uint unit = inst->Src[1].Register.Index; 5550 5551 if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) { 5552 /* RESINFO does not support querying texture buffers, so we instead 5553 * store texture buffer sizes in shader constants, then copy them to 5554 * implement TXQ instead of emitting RESINFO. 5555 * MOV dst, const[texture_buffer_size_index[unit]] 5556 */ 5557 struct tgsi_full_src_register size_src = 5558 make_src_const_reg(emit->texture_buffer_size_index[unit]); 5559 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src, 5560 FALSE); 5561 } else { 5562 /* RESINFO dst, srcMipLevel, resource */ 5563 begin_emit_instruction(emit); 5564 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); 5565 emit_dst_register(emit, &inst->Dst[0]); 5566 emit_src_register(emit, &inst->Src[0]); 5567 emit_resource_register(emit, unit); 5568 end_emit_instruction(emit); 5569 } 5570 5571 free_temp_indexes(emit); 5572 5573 return TRUE; 5574} 5575 5576 5577/** 5578 * Emit a simple instruction (like ADD, MUL, MIN, etc). 5579 */ 5580static boolean 5581emit_simple(struct svga_shader_emitter_v10 *emit, 5582 const struct tgsi_full_instruction *inst) 5583{ 5584 const unsigned opcode = inst->Instruction.Opcode; 5585 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5586 unsigned i; 5587 5588 begin_emit_instruction(emit); 5589 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5590 inst->Instruction.Saturate); 5591 for (i = 0; i < op->num_dst; i++) { 5592 emit_dst_register(emit, &inst->Dst[i]); 5593 } 5594 for (i = 0; i < op->num_src; i++) { 5595 emit_src_register(emit, &inst->Src[i]); 5596 } 5597 end_emit_instruction(emit); 5598 5599 return TRUE; 5600} 5601 5602 5603/** 5604 * We only special case the MOV instruction to try to detect constant 5605 * color writes in the fragment shader. 5606 */ 5607static boolean 5608emit_mov(struct svga_shader_emitter_v10 *emit, 5609 const struct tgsi_full_instruction *inst) 5610{ 5611 const struct tgsi_full_src_register *src = &inst->Src[0]; 5612 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 5613 5614 if (emit->unit == PIPE_SHADER_FRAGMENT && 5615 dst->Register.File == TGSI_FILE_OUTPUT && 5616 dst->Register.Index == 0 && 5617 src->Register.File == TGSI_FILE_CONSTANT && 5618 !src->Register.Indirect) { 5619 emit->constant_color_output = TRUE; 5620 } 5621 5622 return emit_simple(emit, inst); 5623} 5624 5625 5626/** 5627 * Emit a simple VGPU10 instruction which writes to multiple dest registers, 5628 * where TGSI only uses one dest register. 5629 */ 5630static boolean 5631emit_simple_1dst(struct svga_shader_emitter_v10 *emit, 5632 const struct tgsi_full_instruction *inst, 5633 unsigned dst_count, 5634 unsigned dst_index) 5635{ 5636 const unsigned opcode = inst->Instruction.Opcode; 5637 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5638 unsigned i; 5639 5640 begin_emit_instruction(emit); 5641 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5642 inst->Instruction.Saturate); 5643 5644 for (i = 0; i < dst_count; i++) { 5645 if (i == dst_index) { 5646 emit_dst_register(emit, &inst->Dst[0]); 5647 } else { 5648 emit_null_dst_register(emit); 5649 } 5650 } 5651 5652 for (i = 0; i < op->num_src; i++) { 5653 emit_src_register(emit, &inst->Src[i]); 5654 } 5655 end_emit_instruction(emit); 5656 5657 return TRUE; 5658} 5659 5660 5661/** 5662 * Translate a single TGSI instruction to VGPU10. 5663 */ 5664static boolean 5665emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, 5666 unsigned inst_number, 5667 const struct tgsi_full_instruction *inst) 5668{ 5669 const unsigned opcode = inst->Instruction.Opcode; 5670 5671 switch (opcode) { 5672 case TGSI_OPCODE_ADD: 5673 case TGSI_OPCODE_AND: 5674 case TGSI_OPCODE_BGNLOOP: 5675 case TGSI_OPCODE_BRK: 5676 case TGSI_OPCODE_CEIL: 5677 case TGSI_OPCODE_CONT: 5678 case TGSI_OPCODE_DDX: 5679 case TGSI_OPCODE_DDY: 5680 case TGSI_OPCODE_DIV: 5681 case TGSI_OPCODE_DP2: 5682 case TGSI_OPCODE_DP3: 5683 case TGSI_OPCODE_DP4: 5684 case TGSI_OPCODE_ELSE: 5685 case TGSI_OPCODE_ENDIF: 5686 case TGSI_OPCODE_ENDLOOP: 5687 case TGSI_OPCODE_ENDSUB: 5688 case TGSI_OPCODE_F2I: 5689 case TGSI_OPCODE_F2U: 5690 case TGSI_OPCODE_FLR: 5691 case TGSI_OPCODE_FRC: 5692 case TGSI_OPCODE_FSEQ: 5693 case TGSI_OPCODE_FSGE: 5694 case TGSI_OPCODE_FSLT: 5695 case TGSI_OPCODE_FSNE: 5696 case TGSI_OPCODE_I2F: 5697 case TGSI_OPCODE_IMAX: 5698 case TGSI_OPCODE_IMIN: 5699 case TGSI_OPCODE_INEG: 5700 case TGSI_OPCODE_ISGE: 5701 case TGSI_OPCODE_ISHR: 5702 case TGSI_OPCODE_ISLT: 5703 case TGSI_OPCODE_MAD: 5704 case TGSI_OPCODE_MAX: 5705 case TGSI_OPCODE_MIN: 5706 case TGSI_OPCODE_MUL: 5707 case TGSI_OPCODE_NOP: 5708 case TGSI_OPCODE_NOT: 5709 case TGSI_OPCODE_OR: 5710 case TGSI_OPCODE_RET: 5711 case TGSI_OPCODE_UADD: 5712 case TGSI_OPCODE_USEQ: 5713 case TGSI_OPCODE_USGE: 5714 case TGSI_OPCODE_USLT: 5715 case TGSI_OPCODE_UMIN: 5716 case TGSI_OPCODE_UMAD: 5717 case TGSI_OPCODE_UMAX: 5718 case TGSI_OPCODE_ROUND: 5719 case TGSI_OPCODE_SQRT: 5720 case TGSI_OPCODE_SHL: 5721 case TGSI_OPCODE_TRUNC: 5722 case TGSI_OPCODE_U2F: 5723 case TGSI_OPCODE_UCMP: 5724 case TGSI_OPCODE_USHR: 5725 case TGSI_OPCODE_USNE: 5726 case TGSI_OPCODE_XOR: 5727 /* simple instructions */ 5728 return emit_simple(emit, inst); 5729 5730 case TGSI_OPCODE_MOV: 5731 return emit_mov(emit, inst); 5732 case TGSI_OPCODE_EMIT: 5733 return emit_vertex(emit, inst); 5734 case TGSI_OPCODE_ENDPRIM: 5735 return emit_endprim(emit, inst); 5736 case TGSI_OPCODE_ABS: 5737 return emit_abs(emit, inst); 5738 case TGSI_OPCODE_IABS: 5739 return emit_iabs(emit, inst); 5740 case TGSI_OPCODE_ARL: 5741 /* fall-through */ 5742 case TGSI_OPCODE_UARL: 5743 return emit_arl_uarl(emit, inst); 5744 case TGSI_OPCODE_BGNSUB: 5745 /* no-op */ 5746 return TRUE; 5747 case TGSI_OPCODE_CAL: 5748 return emit_cal(emit, inst); 5749 case TGSI_OPCODE_CMP: 5750 return emit_cmp(emit, inst); 5751 case TGSI_OPCODE_COS: 5752 return emit_sincos(emit, inst); 5753 case TGSI_OPCODE_DP2A: 5754 return emit_dp2a(emit, inst); 5755 case TGSI_OPCODE_DPH: 5756 return emit_dph(emit, inst); 5757 case TGSI_OPCODE_DST: 5758 return emit_dst(emit, inst); 5759 case TGSI_OPCODE_EX2: 5760 return emit_ex2(emit, inst); 5761 case TGSI_OPCODE_EXP: 5762 return emit_exp(emit, inst); 5763 case TGSI_OPCODE_IF: 5764 return emit_if(emit, inst); 5765 case TGSI_OPCODE_KILL: 5766 return emit_kill(emit, inst); 5767 case TGSI_OPCODE_KILL_IF: 5768 return emit_kill_if(emit, inst); 5769 case TGSI_OPCODE_LG2: 5770 return emit_lg2(emit, inst); 5771 case TGSI_OPCODE_LIT: 5772 return emit_lit(emit, inst); 5773 case TGSI_OPCODE_LOG: 5774 return emit_log(emit, inst); 5775 case TGSI_OPCODE_LRP: 5776 return emit_lrp(emit, inst); 5777 case TGSI_OPCODE_POW: 5778 return emit_pow(emit, inst); 5779 case TGSI_OPCODE_RCP: 5780 return emit_rcp(emit, inst); 5781 case TGSI_OPCODE_RSQ: 5782 return emit_rsq(emit, inst); 5783 case TGSI_OPCODE_SAMPLE: 5784 return emit_sample(emit, inst); 5785 case TGSI_OPCODE_SCS: 5786 return emit_scs(emit, inst); 5787 case TGSI_OPCODE_SEQ: 5788 return emit_seq(emit, inst); 5789 case TGSI_OPCODE_SGE: 5790 return emit_sge(emit, inst); 5791 case TGSI_OPCODE_SGT: 5792 return emit_sgt(emit, inst); 5793 case TGSI_OPCODE_SIN: 5794 return emit_sincos(emit, inst); 5795 case TGSI_OPCODE_SLE: 5796 return emit_sle(emit, inst); 5797 case TGSI_OPCODE_SLT: 5798 return emit_slt(emit, inst); 5799 case TGSI_OPCODE_SNE: 5800 return emit_sne(emit, inst); 5801 case TGSI_OPCODE_SSG: 5802 return emit_ssg(emit, inst); 5803 case TGSI_OPCODE_ISSG: 5804 return emit_issg(emit, inst); 5805 case TGSI_OPCODE_SUB: 5806 return emit_sub(emit, inst); 5807 case TGSI_OPCODE_TEX: 5808 return emit_tex(emit, inst); 5809 case TGSI_OPCODE_TXP: 5810 return emit_txp(emit, inst); 5811 case TGSI_OPCODE_TXB: 5812 case TGSI_OPCODE_TXB2: 5813 case TGSI_OPCODE_TXL: 5814 return emit_txl_txb(emit, inst); 5815 case TGSI_OPCODE_TXD: 5816 return emit_txd(emit, inst); 5817 case TGSI_OPCODE_TXF: 5818 return emit_txf(emit, inst); 5819 case TGSI_OPCODE_TXQ: 5820 return emit_txq(emit, inst); 5821 case TGSI_OPCODE_UIF: 5822 return emit_if(emit, inst); 5823 case TGSI_OPCODE_XPD: 5824 return emit_xpd(emit, inst); 5825 case TGSI_OPCODE_UMUL_HI: 5826 case TGSI_OPCODE_IMUL_HI: 5827 case TGSI_OPCODE_UDIV: 5828 case TGSI_OPCODE_IDIV: 5829 /* These cases use only the FIRST of two destination registers */ 5830 return emit_simple_1dst(emit, inst, 2, 0); 5831 case TGSI_OPCODE_UMUL: 5832 case TGSI_OPCODE_UMOD: 5833 case TGSI_OPCODE_MOD: 5834 /* These cases use only the SECOND of two destination registers */ 5835 return emit_simple_1dst(emit, inst, 2, 1); 5836 case TGSI_OPCODE_END: 5837 if (!emit_post_helpers(emit)) 5838 return FALSE; 5839 return emit_simple(emit, inst); 5840 5841 default: 5842 debug_printf("Unimplemented tgsi instruction %s\n", 5843 tgsi_get_opcode_name(opcode)); 5844 return FALSE; 5845 } 5846 5847 return TRUE; 5848} 5849 5850 5851/** 5852 * Emit the extra instructions to adjust the vertex position. 5853 * There are two possible adjustments: 5854 * 1. Converting from Gallium to VGPU10 coordinate space by applying the 5855 * "prescale" and "pretranslate" values. 5856 * 2. Undoing the viewport transformation when we use the swtnl/draw path. 5857 * \param vs_pos_tmp_index which temporary register contains the vertex pos. 5858 */ 5859static void 5860emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, 5861 unsigned vs_pos_tmp_index) 5862{ 5863 struct tgsi_full_src_register tmp_pos_src; 5864 struct tgsi_full_dst_register pos_dst; 5865 5866 /* Don't bother to emit any extra vertex instructions if vertex position is 5867 * not written out 5868 */ 5869 if (emit->vposition.out_index == INVALID_INDEX) 5870 return; 5871 5872 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index); 5873 pos_dst = make_dst_output_reg(emit->vposition.out_index); 5874 5875 /* If non-adjusted vertex position register index 5876 * is valid, copy the vertex position from the temporary 5877 * vertex position register before it is modified by the 5878 * prescale computation. 5879 */ 5880 if (emit->vposition.so_index != INVALID_INDEX) { 5881 struct tgsi_full_dst_register pos_so_dst = 5882 make_dst_output_reg(emit->vposition.so_index); 5883 5884 /* MOV pos_so, tmp_pos */ 5885 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, 5886 &tmp_pos_src, FALSE); 5887 } 5888 5889 if (emit->vposition.need_prescale) { 5890 /* This code adjusts the vertex position to match the VGPU10 convention. 5891 * If p is the position computed by the shader (usually by applying the 5892 * modelview and projection matrices), the new position q is computed by: 5893 * 5894 * q.x = p.w * trans.x + p.x * scale.x 5895 * q.y = p.w * trans.y + p.y * scale.y 5896 * q.z = p.w * trans.z + p.z * scale.z; 5897 * q.w = p.w * trans.w + p.w; 5898 */ 5899 struct tgsi_full_src_register tmp_pos_src_w = 5900 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5901 struct tgsi_full_dst_register tmp_pos_dst = 5902 make_dst_temp_reg(vs_pos_tmp_index); 5903 struct tgsi_full_dst_register tmp_pos_dst_xyz = 5904 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ); 5905 5906 struct tgsi_full_src_register prescale_scale = 5907 make_src_const_reg(emit->vposition.prescale_scale_index); 5908 struct tgsi_full_src_register prescale_trans = 5909 make_src_const_reg(emit->vposition.prescale_trans_index); 5910 5911 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */ 5912 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz, 5913 &tmp_pos_src, &prescale_scale, FALSE); 5914 5915 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */ 5916 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w, 5917 &prescale_trans, &tmp_pos_src, FALSE); 5918 } 5919 else if (emit->key.vs.undo_viewport) { 5920 /* This code computes the final vertex position from the temporary 5921 * vertex position by undoing the viewport transformation and the 5922 * divide-by-W operation (we convert window coords back to clip coords). 5923 * This is needed when we use the 'draw' module for fallbacks. 5924 * If p is the temp pos in window coords, then the NDC coord q is: 5925 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w 5926 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w 5927 * q.z = p.z * p.w 5928 * q.w = p.w 5929 * CONST[vs_viewport_index] contains: 5930 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans } 5931 */ 5932 struct tgsi_full_dst_register tmp_pos_dst = 5933 make_dst_temp_reg(vs_pos_tmp_index); 5934 struct tgsi_full_dst_register tmp_pos_dst_xy = 5935 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY); 5936 struct tgsi_full_src_register tmp_pos_src_wwww = 5937 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5938 5939 struct tgsi_full_dst_register pos_dst_xyz = 5940 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ); 5941 struct tgsi_full_dst_register pos_dst_w = 5942 writemask_dst(&pos_dst, TGSI_WRITEMASK_W); 5943 5944 struct tgsi_full_src_register vp_xyzw = 5945 make_src_const_reg(emit->vs.viewport_index); 5946 struct tgsi_full_src_register vp_zwww = 5947 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, 5948 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 5949 5950 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */ 5951 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy, 5952 &tmp_pos_src, &vp_zwww, FALSE); 5953 5954 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */ 5955 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy, 5956 &tmp_pos_src, &vp_xyzw, FALSE); 5957 5958 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */ 5959 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz, 5960 &tmp_pos_src, &tmp_pos_src_wwww, FALSE); 5961 5962 /* MOV pos.w, tmp_pos.w */ 5963 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, 5964 &tmp_pos_src, FALSE); 5965 } 5966 else if (vs_pos_tmp_index != INVALID_INDEX) { 5967 /* This code is to handle the case where the temporary vertex 5968 * position register is created when the vertex shader has stream 5969 * output and prescale is disabled because rasterization is to be 5970 * discarded. 5971 */ 5972 struct tgsi_full_dst_register pos_dst = 5973 make_dst_output_reg(emit->vposition.out_index); 5974 5975 /* MOV pos, tmp_pos */ 5976 begin_emit_instruction(emit); 5977 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5978 emit_dst_register(emit, &pos_dst); 5979 emit_src_register(emit, &tmp_pos_src); 5980 end_emit_instruction(emit); 5981 } 5982} 5983 5984static void 5985emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) 5986{ 5987 if (emit->clip_mode == CLIP_DISTANCE) { 5988 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */ 5989 emit_clip_distance_instructions(emit); 5990 5991 } else if (emit->clip_mode == CLIP_VERTEX) { 5992 /* Convert TGSI CLIPVERTEX to CLIPDIST */ 5993 emit_clip_vertex_instructions(emit); 5994 } 5995 5996 /** 5997 * Emit vertex position and take care of legacy user planes only if 5998 * there is a valid vertex position register index. 5999 * This is to take care of the case 6000 * where the shader doesn't output vertex position. Then in 6001 * this case, don't bother to emit more vertex instructions. 6002 */ 6003 if (emit->vposition.out_index == INVALID_INDEX) 6004 return; 6005 6006 /** 6007 * Emit per-vertex clipping instructions for legacy user defined clip planes. 6008 * NOTE: we must emit the clip distance instructions before the 6009 * emit_vpos_instructions() call since the later function will change 6010 * the TEMP[vs_pos_tmp_index] value. 6011 */ 6012 if (emit->clip_mode == CLIP_LEGACY) { 6013 /* Emit CLIPDIST for legacy user defined clip planes */ 6014 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index); 6015 } 6016} 6017 6018 6019/** 6020 * Emit extra per-vertex instructions. This includes clip-coordinate 6021 * space conversion and computing clip distances. This is called for 6022 * each GS emit-vertex instruction and at the end of VS translation. 6023 */ 6024static void 6025emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) 6026{ 6027 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; 6028 6029 /* Emit clipping instructions based on clipping mode */ 6030 emit_clipping_instructions(emit); 6031 6032 /** 6033 * Reset the temporary vertex position register index 6034 * so that emit_dst_register() will use the real vertex position output 6035 */ 6036 emit->vposition.tmp_index = INVALID_INDEX; 6037 6038 /* Emit vertex position instructions */ 6039 emit_vpos_instructions(emit, vs_pos_tmp_index); 6040 6041 /* Restore original vposition.tmp_index value for the next GS vertex. 6042 * It doesn't matter for VS. 6043 */ 6044 emit->vposition.tmp_index = vs_pos_tmp_index; 6045} 6046 6047/** 6048 * Translate the TGSI_OPCODE_EMIT GS instruction. 6049 */ 6050static boolean 6051emit_vertex(struct svga_shader_emitter_v10 *emit, 6052 const struct tgsi_full_instruction *inst) 6053{ 6054 unsigned ret = TRUE; 6055 6056 assert(emit->unit == PIPE_SHADER_GEOMETRY); 6057 6058 emit_vertex_instructions(emit); 6059 6060 /* We can't use emit_simple() because the TGSI instruction has one 6061 * operand (vertex stream number) which we must ignore for VGPU10. 6062 */ 6063 begin_emit_instruction(emit); 6064 emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); 6065 end_emit_instruction(emit); 6066 6067 return ret; 6068} 6069 6070 6071/** 6072 * Emit the extra code to convert from VGPU10's boolean front-face 6073 * register to TGSI's signed front-face register. 6074 * 6075 * TODO: Make temporary front-face register a scalar. 6076 */ 6077static void 6078emit_frontface_instructions(struct svga_shader_emitter_v10 *emit) 6079{ 6080 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6081 6082 if (emit->fs.face_input_index != INVALID_INDEX) { 6083 /* convert vgpu10 boolean face register to gallium +/-1 value */ 6084 struct tgsi_full_dst_register tmp_dst = 6085 make_dst_temp_reg(emit->fs.face_tmp_index); 6086 struct tgsi_full_src_register one = 6087 make_immediate_reg_float(emit, 1.0f); 6088 struct tgsi_full_src_register neg_one = 6089 make_immediate_reg_float(emit, -1.0f); 6090 6091 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */ 6092 begin_emit_instruction(emit); 6093 emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE); 6094 emit_dst_register(emit, &tmp_dst); 6095 emit_face_register(emit); 6096 emit_src_register(emit, &one); 6097 emit_src_register(emit, &neg_one); 6098 end_emit_instruction(emit); 6099 } 6100} 6101 6102 6103/** 6104 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w. 6105 */ 6106static void 6107emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) 6108{ 6109 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6110 6111 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 6112 struct tgsi_full_dst_register tmp_dst = 6113 make_dst_temp_reg(emit->fs.fragcoord_tmp_index); 6114 struct tgsi_full_dst_register tmp_dst_xyz = 6115 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ); 6116 struct tgsi_full_dst_register tmp_dst_w = 6117 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 6118 struct tgsi_full_src_register one = 6119 make_immediate_reg_float(emit, 1.0f); 6120 struct tgsi_full_src_register fragcoord = 6121 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index); 6122 6123 /* save the input index */ 6124 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index; 6125 /* set to invalid to prevent substitution in emit_src_register() */ 6126 emit->fs.fragcoord_input_index = INVALID_INDEX; 6127 6128 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */ 6129 begin_emit_instruction(emit); 6130 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 6131 emit_dst_register(emit, &tmp_dst_xyz); 6132 emit_src_register(emit, &fragcoord); 6133 end_emit_instruction(emit); 6134 6135 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */ 6136 begin_emit_instruction(emit); 6137 emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE); 6138 emit_dst_register(emit, &tmp_dst_w); 6139 emit_src_register(emit, &one); 6140 emit_src_register(emit, &fragcoord); 6141 end_emit_instruction(emit); 6142 6143 /* restore saved value */ 6144 emit->fs.fragcoord_input_index = fragcoord_input_index; 6145 } 6146} 6147 6148 6149/** 6150 * Emit extra instructions to adjust VS inputs/attributes. This can 6151 * mean casting a vertex attribute from int to float or setting the 6152 * W component to 1, or both. 6153 */ 6154static void 6155emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) 6156{ 6157 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1; 6158 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof; 6159 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof; 6160 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra; 6161 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm; 6162 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled; 6163 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled; 6164 6165 unsigned adjust_mask = (save_w_1_mask | 6166 save_itof_mask | 6167 save_utof_mask | 6168 save_is_bgra_mask | 6169 save_puint_to_snorm_mask | 6170 save_puint_to_uscaled_mask | 6171 save_puint_to_sscaled_mask); 6172 6173 assert(emit->unit == PIPE_SHADER_VERTEX); 6174 6175 if (adjust_mask) { 6176 struct tgsi_full_src_register one = 6177 make_immediate_reg_float(emit, 1.0f); 6178 6179 struct tgsi_full_src_register one_int = 6180 make_immediate_reg_int(emit, 1); 6181 6182 /* We need to turn off these bitmasks while emitting the 6183 * instructions below, then restore them afterward. 6184 */ 6185 emit->key.vs.adjust_attrib_w_1 = 0; 6186 emit->key.vs.adjust_attrib_itof = 0; 6187 emit->key.vs.adjust_attrib_utof = 0; 6188 emit->key.vs.attrib_is_bgra = 0; 6189 emit->key.vs.attrib_puint_to_snorm = 0; 6190 emit->key.vs.attrib_puint_to_uscaled = 0; 6191 emit->key.vs.attrib_puint_to_sscaled = 0; 6192 6193 while (adjust_mask) { 6194 unsigned index = u_bit_scan(&adjust_mask); 6195 6196 /* skip the instruction if this vertex attribute is not being used */ 6197 if (emit->info.input_usage_mask[index] == 0) 6198 continue; 6199 6200 unsigned tmp = emit->vs.adjusted_input[index]; 6201 struct tgsi_full_src_register input_src = 6202 make_src_reg(TGSI_FILE_INPUT, index); 6203 6204 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6205 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6206 struct tgsi_full_dst_register tmp_dst_w = 6207 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 6208 6209 /* ITOF/UTOF/MOV tmp, input[index] */ 6210 if (save_itof_mask & (1 << index)) { 6211 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, 6212 &tmp_dst, &input_src, FALSE); 6213 } 6214 else if (save_utof_mask & (1 << index)) { 6215 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF, 6216 &tmp_dst, &input_src, FALSE); 6217 } 6218 else if (save_puint_to_snorm_mask & (1 << index)) { 6219 emit_puint_to_snorm(emit, &tmp_dst, &input_src); 6220 } 6221 else if (save_puint_to_uscaled_mask & (1 << index)) { 6222 emit_puint_to_uscaled(emit, &tmp_dst, &input_src); 6223 } 6224 else if (save_puint_to_sscaled_mask & (1 << index)) { 6225 emit_puint_to_sscaled(emit, &tmp_dst, &input_src); 6226 } 6227 else { 6228 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index)); 6229 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6230 &tmp_dst, &input_src, FALSE); 6231 } 6232 6233 if (save_is_bgra_mask & (1 << index)) { 6234 emit_swap_r_b(emit, &tmp_dst, &tmp_src); 6235 } 6236 6237 if (save_w_1_mask & (1 << index)) { 6238 /* MOV tmp.w, 1.0 */ 6239 if (emit->key.vs.attrib_is_pure_int & (1 << index)) { 6240 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6241 &tmp_dst_w, &one_int, FALSE); 6242 } 6243 else { 6244 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6245 &tmp_dst_w, &one, FALSE); 6246 } 6247 } 6248 } 6249 6250 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask; 6251 emit->key.vs.adjust_attrib_itof = save_itof_mask; 6252 emit->key.vs.adjust_attrib_utof = save_utof_mask; 6253 emit->key.vs.attrib_is_bgra = save_is_bgra_mask; 6254 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask; 6255 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask; 6256 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask; 6257 } 6258} 6259 6260 6261/** 6262 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed 6263 * to implement some instructions. We pre-allocate those values here 6264 * in the immediate constant buffer. 6265 */ 6266static void 6267alloc_common_immediates(struct svga_shader_emitter_v10 *emit) 6268{ 6269 unsigned n = 0; 6270 6271 emit->common_immediate_pos[n++] = 6272 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); 6273 6274 emit->common_immediate_pos[n++] = 6275 alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f); 6276 6277 emit->common_immediate_pos[n++] = 6278 alloc_immediate_int4(emit, 0, 1, 0, -1); 6279 6280 if (emit->key.vs.attrib_puint_to_snorm) { 6281 emit->common_immediate_pos[n++] = 6282 alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 6283 } 6284 6285 if (emit->key.vs.attrib_puint_to_uscaled) { 6286 emit->common_immediate_pos[n++] = 6287 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); 6288 } 6289 6290 if (emit->key.vs.attrib_puint_to_sscaled) { 6291 emit->common_immediate_pos[n++] = 6292 alloc_immediate_int4(emit, 22, 12, 2, 0); 6293 6294 emit->common_immediate_pos[n++] = 6295 alloc_immediate_int4(emit, 22, 30, 0, 0); 6296 } 6297 6298 assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); 6299 emit->num_common_immediates = n; 6300} 6301 6302 6303/** 6304 * Emit any extra/helper declarations/code that we might need between 6305 * the declaration section and code section. 6306 */ 6307static boolean 6308emit_pre_helpers(struct svga_shader_emitter_v10 *emit) 6309{ 6310 /* Properties */ 6311 if (emit->unit == PIPE_SHADER_GEOMETRY) 6312 emit_property_instructions(emit); 6313 6314 /* Declare inputs */ 6315 if (!emit_input_declarations(emit)) 6316 return FALSE; 6317 6318 /* Declare outputs */ 6319 if (!emit_output_declarations(emit)) 6320 return FALSE; 6321 6322 /* Declare temporary registers */ 6323 emit_temporaries_declaration(emit); 6324 6325 /* Declare constant registers */ 6326 emit_constant_declaration(emit); 6327 6328 /* Declare samplers and resources */ 6329 emit_sampler_declarations(emit); 6330 emit_resource_declarations(emit); 6331 6332 /* Declare clip distance output registers */ 6333 if (emit->unit == PIPE_SHADER_VERTEX || 6334 emit->unit == PIPE_SHADER_GEOMETRY) { 6335 emit_clip_distance_declarations(emit); 6336 } 6337 6338 alloc_common_immediates(emit); 6339 6340 if (emit->unit == PIPE_SHADER_FRAGMENT && 6341 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6342 float alpha = emit->key.fs.alpha_ref; 6343 emit->fs.alpha_ref_index = 6344 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha); 6345 } 6346 6347 /* Now, emit the constant block containing all the immediates 6348 * declared by shader, as well as the extra ones seen above. 6349 */ 6350 emit_vgpu10_immediates_block(emit); 6351 6352 if (emit->unit == PIPE_SHADER_FRAGMENT) { 6353 emit_frontface_instructions(emit); 6354 emit_fragcoord_instructions(emit); 6355 } 6356 else if (emit->unit == PIPE_SHADER_VERTEX) { 6357 emit_vertex_attrib_instructions(emit); 6358 } 6359 6360 return TRUE; 6361} 6362 6363 6364/** 6365 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w 6366 * against the alpha reference value and discards the fragment if the 6367 * comparison fails. 6368 */ 6369static void 6370emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, 6371 unsigned fs_color_tmp_index) 6372{ 6373 /* compare output color's alpha to alpha ref and kill */ 6374 unsigned tmp = get_temp_index(emit); 6375 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6376 struct tgsi_full_src_register tmp_src_x = 6377 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 6378 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6379 struct tgsi_full_src_register color_src = 6380 make_src_temp_reg(fs_color_tmp_index); 6381 struct tgsi_full_src_register color_src_w = 6382 scalar_src(&color_src, TGSI_SWIZZLE_W); 6383 struct tgsi_full_src_register ref_src = 6384 make_src_immediate_reg(emit->fs.alpha_ref_index); 6385 struct tgsi_full_dst_register color_dst = 6386 make_dst_output_reg(emit->fs.color_out_index[0]); 6387 6388 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6389 6390 /* dst = src0 'alpha_func' src1 */ 6391 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst, 6392 &color_src_w, &ref_src); 6393 6394 /* DISCARD if dst.x == 0 */ 6395 begin_emit_instruction(emit); 6396 emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */ 6397 emit_src_register(emit, &tmp_src_x); 6398 end_emit_instruction(emit); 6399 6400 /* If we don't need to broadcast the color below or set fragments to 6401 * white, emit final color here. 6402 */ 6403 if (emit->key.fs.write_color0_to_n_cbufs <= 1 && 6404 !emit->key.fs.white_fragments) { 6405 /* MOV output.color, tempcolor */ 6406 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6407 &color_src, FALSE); /* XXX saturate? */ 6408 } 6409 6410 free_temp_indexes(emit); 6411} 6412 6413 6414/** 6415 * When we need to emit white for all fragments (for emulating XOR logicop 6416 * mode), this function copies white into the temporary color output register. 6417 */ 6418static void 6419emit_set_color_white(struct svga_shader_emitter_v10 *emit, 6420 unsigned fs_color_tmp_index) 6421{ 6422 struct tgsi_full_dst_register color_dst = 6423 make_dst_temp_reg(fs_color_tmp_index); 6424 struct tgsi_full_src_register white = 6425 make_immediate_reg_float(emit, 1.0f); 6426 6427 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE); 6428} 6429 6430 6431/** 6432 * Emit instructions for writing a single color output to multiple 6433 * color buffers. 6434 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or 6435 * when key.fs.white_fragments is true). 6436 * property is set and the number of render targets is greater than one. 6437 * \param fs_color_tmp_index index of the temp register that holds the 6438 * color to broadcast. 6439 */ 6440static void 6441emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, 6442 unsigned fs_color_tmp_index) 6443{ 6444 const unsigned n = emit->key.fs.write_color0_to_n_cbufs; 6445 unsigned i; 6446 struct tgsi_full_src_register color_src = 6447 make_src_temp_reg(fs_color_tmp_index); 6448 6449 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6450 6451 for (i = 0; i < n; i++) { 6452 unsigned output_reg = emit->fs.color_out_index[i]; 6453 struct tgsi_full_dst_register color_dst = 6454 make_dst_output_reg(output_reg); 6455 6456 /* Fill in this semantic here since we'll use it later in 6457 * emit_dst_register(). 6458 */ 6459 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR; 6460 6461 /* MOV output.color[i], tempcolor */ 6462 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6463 &color_src, FALSE); /* XXX saturate? */ 6464 } 6465} 6466 6467 6468/** 6469 * Emit extra helper code after the original shader code, but before the 6470 * last END/RET instruction. 6471 * For vertex shaders this means emitting the extra code to apply the 6472 * prescale scale/translation. 6473 */ 6474static boolean 6475emit_post_helpers(struct svga_shader_emitter_v10 *emit) 6476{ 6477 if (emit->unit == PIPE_SHADER_VERTEX) { 6478 emit_vertex_instructions(emit); 6479 } 6480 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 6481 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index; 6482 6483 /* We no longer want emit_dst_register() to substitute the 6484 * temporary fragment color register for the real color output. 6485 */ 6486 emit->fs.color_tmp_index = INVALID_INDEX; 6487 6488 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6489 emit_alpha_test_instructions(emit, fs_color_tmp_index); 6490 } 6491 if (emit->key.fs.white_fragments) { 6492 emit_set_color_white(emit, fs_color_tmp_index); 6493 } 6494 if (emit->key.fs.write_color0_to_n_cbufs > 1 || 6495 emit->key.fs.white_fragments) { 6496 emit_broadcast_color_instructions(emit, fs_color_tmp_index); 6497 } 6498 } 6499 6500 return TRUE; 6501} 6502 6503 6504/** 6505 * Translate the TGSI tokens into VGPU10 tokens. 6506 */ 6507static boolean 6508emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, 6509 const struct tgsi_token *tokens) 6510{ 6511 struct tgsi_parse_context parse; 6512 boolean ret = TRUE; 6513 boolean pre_helpers_emitted = FALSE; 6514 unsigned inst_number = 0; 6515 6516 tgsi_parse_init(&parse, tokens); 6517 6518 while (!tgsi_parse_end_of_tokens(&parse)) { 6519 tgsi_parse_token(&parse); 6520 6521 switch (parse.FullToken.Token.Type) { 6522 case TGSI_TOKEN_TYPE_IMMEDIATE: 6523 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate); 6524 if (!ret) 6525 goto done; 6526 break; 6527 6528 case TGSI_TOKEN_TYPE_DECLARATION: 6529 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration); 6530 if (!ret) 6531 goto done; 6532 break; 6533 6534 case TGSI_TOKEN_TYPE_INSTRUCTION: 6535 if (!pre_helpers_emitted) { 6536 ret = emit_pre_helpers(emit); 6537 if (!ret) 6538 goto done; 6539 pre_helpers_emitted = TRUE; 6540 } 6541 ret = emit_vgpu10_instruction(emit, inst_number++, 6542 &parse.FullToken.FullInstruction); 6543 if (!ret) 6544 goto done; 6545 break; 6546 6547 case TGSI_TOKEN_TYPE_PROPERTY: 6548 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty); 6549 if (!ret) 6550 goto done; 6551 break; 6552 6553 default: 6554 break; 6555 } 6556 } 6557 6558done: 6559 tgsi_parse_free(&parse); 6560 return ret; 6561} 6562 6563 6564/** 6565 * Emit the first VGPU10 shader tokens. 6566 */ 6567static boolean 6568emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) 6569{ 6570 VGPU10ProgramToken ptoken; 6571 6572 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ 6573 ptoken.majorVersion = 4; 6574 ptoken.minorVersion = 0; 6575 ptoken.programType = translate_shader_type(emit->unit); 6576 if (!emit_dword(emit, ptoken.value)) 6577 return FALSE; 6578 6579 /* Second token: total length of shader, in tokens. We can't fill this 6580 * in until we're all done. Emit zero for now. 6581 */ 6582 return emit_dword(emit, 0); 6583} 6584 6585 6586static boolean 6587emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) 6588{ 6589 VGPU10ProgramToken *tokens; 6590 6591 /* Replace the second token with total shader length */ 6592 tokens = (VGPU10ProgramToken *) emit->buf; 6593 tokens[1].value = emit_get_num_tokens(emit); 6594 6595 return TRUE; 6596} 6597 6598 6599/** 6600 * Modify the FS to read the BCOLORs and use the FACE register 6601 * to choose between the front/back colors. 6602 */ 6603static const struct tgsi_token * 6604transform_fs_twoside(const struct tgsi_token *tokens) 6605{ 6606 if (0) { 6607 debug_printf("Before tgsi_add_two_side ------------------\n"); 6608 tgsi_dump(tokens,0); 6609 } 6610 tokens = tgsi_add_two_side(tokens); 6611 if (0) { 6612 debug_printf("After tgsi_add_two_side ------------------\n"); 6613 tgsi_dump(tokens, 0); 6614 } 6615 return tokens; 6616} 6617 6618 6619/** 6620 * Modify the FS to do polygon stipple. 6621 */ 6622static const struct tgsi_token * 6623transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, 6624 const struct tgsi_token *tokens) 6625{ 6626 const struct tgsi_token *new_tokens; 6627 unsigned unit; 6628 6629 if (0) { 6630 debug_printf("Before pstipple ------------------\n"); 6631 tgsi_dump(tokens,0); 6632 } 6633 6634 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, 6635 TGSI_FILE_INPUT); 6636 6637 emit->fs.pstipple_sampler_unit = unit; 6638 6639 /* Setup texture state for stipple */ 6640 emit->sampler_target[unit] = TGSI_TEXTURE_2D; 6641 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; 6642 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; 6643 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; 6644 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; 6645 6646 if (0) { 6647 debug_printf("After pstipple ------------------\n"); 6648 tgsi_dump(new_tokens, 0); 6649 } 6650 6651 return new_tokens; 6652} 6653 6654/** 6655 * Modify the FS to support anti-aliasing point. 6656 */ 6657static const struct tgsi_token * 6658transform_fs_aapoint(const struct tgsi_token *tokens, 6659 int aa_coord_index) 6660{ 6661 if (0) { 6662 debug_printf("Before tgsi_add_aa_point ------------------\n"); 6663 tgsi_dump(tokens,0); 6664 } 6665 tokens = tgsi_add_aa_point(tokens, aa_coord_index); 6666 if (0) { 6667 debug_printf("After tgsi_add_aa_point ------------------\n"); 6668 tgsi_dump(tokens, 0); 6669 } 6670 return tokens; 6671} 6672 6673/** 6674 * This is the main entrypoint for the TGSI -> VPGU10 translator. 6675 */ 6676struct svga_shader_variant * 6677svga_tgsi_vgpu10_translate(struct svga_context *svga, 6678 const struct svga_shader *shader, 6679 const struct svga_compile_key *key, 6680 unsigned unit) 6681{ 6682 struct svga_shader_variant *variant = NULL; 6683 struct svga_shader_emitter_v10 *emit; 6684 const struct tgsi_token *tokens = shader->tokens; 6685 struct svga_vertex_shader *vs = svga->curr.vs; 6686 struct svga_geometry_shader *gs = svga->curr.gs; 6687 6688 assert(unit == PIPE_SHADER_VERTEX || 6689 unit == PIPE_SHADER_GEOMETRY || 6690 unit == PIPE_SHADER_FRAGMENT); 6691 6692 /* These two flags cannot be used together */ 6693 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1); 6694 6695 /* 6696 * Setup the code emitter 6697 */ 6698 emit = alloc_emitter(); 6699 if (!emit) 6700 return NULL; 6701 6702 emit->unit = unit; 6703 emit->key = *key; 6704 6705 emit->vposition.need_prescale = (emit->key.vs.need_prescale || 6706 emit->key.gs.need_prescale); 6707 emit->vposition.tmp_index = INVALID_INDEX; 6708 emit->vposition.so_index = INVALID_INDEX; 6709 emit->vposition.out_index = INVALID_INDEX; 6710 6711 emit->fs.color_tmp_index = INVALID_INDEX; 6712 emit->fs.face_input_index = INVALID_INDEX; 6713 emit->fs.fragcoord_input_index = INVALID_INDEX; 6714 6715 emit->gs.prim_id_index = INVALID_INDEX; 6716 6717 emit->clip_dist_out_index = INVALID_INDEX; 6718 emit->clip_dist_tmp_index = INVALID_INDEX; 6719 emit->clip_dist_so_index = INVALID_INDEX; 6720 emit->clip_vertex_out_index = INVALID_INDEX; 6721 6722 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { 6723 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; 6724 } 6725 6726 if (unit == PIPE_SHADER_FRAGMENT) { 6727 if (key->fs.light_twoside) { 6728 tokens = transform_fs_twoside(tokens); 6729 } 6730 if (key->fs.pstipple) { 6731 const struct tgsi_token *new_tokens = 6732 transform_fs_pstipple(emit, tokens); 6733 if (tokens != shader->tokens) { 6734 /* free the two-sided shader tokens */ 6735 tgsi_free_tokens(tokens); 6736 } 6737 tokens = new_tokens; 6738 } 6739 if (key->fs.aa_point) { 6740 tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index); 6741 } 6742 } 6743 6744 if (SVGA_DEBUG & DEBUG_TGSI) { 6745 debug_printf("#####################################\n"); 6746 debug_printf("### TGSI Shader %u\n", shader->id); 6747 tgsi_dump(tokens, 0); 6748 } 6749 6750 /** 6751 * Rescan the header if the token string is different from the one 6752 * included in the shader; otherwise, the header info is already up-to-date 6753 */ 6754 if (tokens != shader->tokens) { 6755 tgsi_scan_shader(tokens, &emit->info); 6756 } else { 6757 emit->info = shader->info; 6758 } 6759 6760 emit->num_outputs = emit->info.num_outputs; 6761 6762 if (unit == PIPE_SHADER_FRAGMENT) { 6763 /* Compute FS input remapping to match the output from VS/GS */ 6764 if (gs) { 6765 svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage); 6766 } else { 6767 assert(vs); 6768 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6769 } 6770 } else if (unit == PIPE_SHADER_GEOMETRY) { 6771 assert(vs); 6772 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6773 } 6774 6775 determine_clipping_mode(emit); 6776 6777 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) { 6778 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) { 6779 /* if there is stream output declarations associated 6780 * with this shader or the shader writes to ClipDistance 6781 * then reserve extra registers for the non-adjusted vertex position 6782 * and the ClipDistance shadow copy 6783 */ 6784 emit->vposition.so_index = emit->num_outputs++; 6785 6786 if (emit->clip_mode == CLIP_DISTANCE) { 6787 emit->clip_dist_so_index = emit->num_outputs++; 6788 if (emit->info.num_written_clipdistance > 4) 6789 emit->num_outputs++; 6790 } 6791 } 6792 } 6793 6794 /* 6795 * Do actual shader translation. 6796 */ 6797 if (!emit_vgpu10_header(emit)) { 6798 debug_printf("svga: emit VGPU10 header failed\n"); 6799 goto cleanup; 6800 } 6801 6802 if (!emit_vgpu10_instructions(emit, tokens)) { 6803 debug_printf("svga: emit VGPU10 instructions failed\n"); 6804 goto cleanup; 6805 } 6806 6807 if (!emit_vgpu10_tail(emit)) { 6808 debug_printf("svga: emit VGPU10 tail failed\n"); 6809 goto cleanup; 6810 } 6811 6812 if (emit->register_overflow) { 6813 goto cleanup; 6814 } 6815 6816 /* 6817 * Create, initialize the 'variant' object. 6818 */ 6819 variant = svga_new_shader_variant(svga); 6820 if (!variant) 6821 goto cleanup; 6822 6823 variant->shader = shader; 6824 variant->nr_tokens = emit_get_num_tokens(emit); 6825 variant->tokens = (const unsigned *)emit->buf; 6826 emit->buf = NULL; /* buffer is no longer owed by emitter context */ 6827 memcpy(&variant->key, key, sizeof(*key)); 6828 variant->id = UTIL_BITMASK_INVALID_INDEX; 6829 6830 /* The extra constant starting offset starts with the number of 6831 * shader constants declared in the shader. 6832 */ 6833 variant->extra_const_start = emit->num_shader_consts[0]; 6834 if (key->gs.wide_point) { 6835 /** 6836 * The extra constant added in the transformed shader 6837 * for inverse viewport scale is to be supplied by the driver. 6838 * So the extra constant starting offset needs to be reduced by 1. 6839 */ 6840 assert(variant->extra_const_start > 0); 6841 variant->extra_const_start--; 6842 } 6843 6844 variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; 6845 6846 /* If there was exactly one write to a fragment shader output register 6847 * and it came from a constant buffer, we know all fragments will have 6848 * the same color (except for blending). 6849 */ 6850 variant->constant_color_output = 6851 emit->constant_color_output && emit->num_output_writes == 1; 6852 6853 /** keep track in the variant if flat interpolation is used 6854 * for any of the varyings. 6855 */ 6856 variant->uses_flat_interp = emit->uses_flat_interp; 6857 6858 if (tokens != shader->tokens) { 6859 tgsi_free_tokens(tokens); 6860 } 6861 6862cleanup: 6863 free_emitter(emit); 6864 6865 return variant; 6866} 6867