svga_tgsi_vgpu10.c revision a4ace98a9733b3e83d971f4871c2908749c0e5c8
1/********************************************************** 2 * Copyright 1998-2013 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26/** 27 * @file svga_tgsi_vgpu10.c 28 * 29 * TGSI -> VGPU10 shader translation. 30 * 31 * \author Mingcheng Chen 32 * \author Brian Paul 33 */ 34 35#include "pipe/p_compiler.h" 36#include "pipe/p_shader_tokens.h" 37#include "pipe/p_defines.h" 38#include "tgsi/tgsi_build.h" 39#include "tgsi/tgsi_dump.h" 40#include "tgsi/tgsi_info.h" 41#include "tgsi/tgsi_parse.h" 42#include "tgsi/tgsi_scan.h" 43#include "tgsi/tgsi_two_side.h" 44#include "tgsi/tgsi_aa_point.h" 45#include "tgsi/tgsi_util.h" 46#include "util/u_math.h" 47#include "util/u_memory.h" 48#include "util/u_bitmask.h" 49#include "util/u_debug.h" 50#include "util/u_pstipple.h" 51 52#include "svga_context.h" 53#include "svga_debug.h" 54#include "svga_link.h" 55#include "svga_shader.h" 56#include "svga_tgsi.h" 57 58#include "VGPU10ShaderTokens.h" 59 60 61#define INVALID_INDEX 99999 62#define MAX_INTERNAL_TEMPS 3 63#define MAX_SYSTEM_VALUES 4 64#define MAX_IMMEDIATE_COUNT \ 65 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) 66#define MAX_TEMP_ARRAYS 64 /* Enough? */ 67 68 69/** 70 * Clipping is complicated. There's four different cases which we 71 * handle during VS/GS shader translation: 72 */ 73enum clipping_mode 74{ 75 CLIP_NONE, /**< No clipping enabled */ 76 CLIP_LEGACY, /**< The shader has no clipping declarations or code but 77 * one or more user-defined clip planes are enabled. We 78 * generate extra code to emit clip distances. 79 */ 80 CLIP_DISTANCE, /**< The shader already declares clip distance output 81 * registers and has code to write to them. 82 */ 83 CLIP_VERTEX /**< The shader declares a clip vertex output register and 84 * has code that writes to the register. We convert the 85 * clipvertex position into one or more clip distances. 86 */ 87}; 88 89 90struct svga_shader_emitter_v10 91{ 92 /* The token output buffer */ 93 unsigned size; 94 char *buf; 95 char *ptr; 96 97 /* Information about the shader and state (does not change) */ 98 struct svga_compile_key key; 99 struct tgsi_shader_info info; 100 unsigned unit; 101 102 unsigned inst_start_token; 103 boolean discard_instruction; /**< throw away current instruction? */ 104 105 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; 106 unsigned num_immediates; /**< Number of immediates emitted */ 107 unsigned common_immediate_pos[8]; /**< literals for common immediates */ 108 unsigned num_common_immediates; 109 boolean immediates_emitted; 110 111 unsigned num_outputs; /**< include any extra outputs */ 112 /** The first extra output is reserved for 113 * non-adjusted vertex position for 114 * stream output purpose 115 */ 116 117 /* Temporary Registers */ 118 unsigned num_shader_temps; /**< num of temps used by original shader */ 119 unsigned internal_temp_count; /**< currently allocated internal temps */ 120 struct { 121 unsigned start, size; 122 } temp_arrays[MAX_TEMP_ARRAYS]; 123 unsigned num_temp_arrays; 124 125 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ 126 struct { 127 unsigned arrayId, index; 128 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ 129 130 /** Number of constants used by original shader for each constant buffer. 131 * The size should probably always match with that of svga_state.constbufs. 132 */ 133 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; 134 135 /* Samplers */ 136 unsigned num_samplers; 137 ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */ 138 ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */ 139 140 /* Address regs (really implemented with temps) */ 141 unsigned num_address_regs; 142 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; 143 144 /* Output register usage masks */ 145 ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; 146 147 /* To map TGSI system value index to VGPU shader input indexes */ 148 ubyte system_value_indexes[MAX_SYSTEM_VALUES]; 149 150 struct { 151 /* vertex position scale/translation */ 152 unsigned out_index; /**< the real position output reg */ 153 unsigned tmp_index; /**< the fake/temp position output reg */ 154 unsigned so_index; /**< the non-adjusted position output reg */ 155 unsigned prescale_scale_index, prescale_trans_index; 156 boolean need_prescale; 157 } vposition; 158 159 /* For vertex shaders only */ 160 struct { 161 /* viewport constant */ 162 unsigned viewport_index; 163 164 /* temp index of adjusted vertex attributes */ 165 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS]; 166 } vs; 167 168 /* For fragment shaders only */ 169 struct { 170 /* apha test */ 171 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */ 172 unsigned color_tmp_index; /**< fake/temp color output reg */ 173 unsigned alpha_ref_index; /**< immediate constant for alpha ref */ 174 175 /* front-face */ 176 unsigned face_input_index; /**< real fragment shader face reg (bool) */ 177 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ 178 179 unsigned pstipple_sampler_unit; 180 181 unsigned fragcoord_input_index; /**< real fragment position input reg */ 182 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ 183 } fs; 184 185 /* For geometry shaders only */ 186 struct { 187 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */ 188 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */ 189 unsigned input_size; /**< size of input arrays */ 190 unsigned prim_id_index; /**< primitive id register index */ 191 unsigned max_out_vertices; /**< maximum number of output vertices */ 192 } gs; 193 194 /* For vertex or geometry shaders */ 195 enum clipping_mode clip_mode; 196 unsigned clip_dist_out_index; /**< clip distance output register index */ 197 unsigned clip_dist_tmp_index; /**< clip distance temporary register */ 198 unsigned clip_dist_so_index; /**< clip distance shadow copy */ 199 200 /** Index of temporary holding the clipvertex coordinate */ 201 unsigned clip_vertex_out_index; /**< clip vertex output register index */ 202 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */ 203 204 /* user clip plane constant slot indexes */ 205 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; 206 207 unsigned num_output_writes; 208 boolean constant_color_output; 209 210 boolean uses_flat_interp; 211 212 /* For all shaders: const reg index for RECT coord scaling */ 213 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; 214 215 /* For all shaders: const reg index for texture buffer size */ 216 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; 217 218 /* VS/GS/FS Linkage info */ 219 struct shader_linkage linkage; 220 221 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ 222}; 223 224 225static boolean 226emit_post_helpers(struct svga_shader_emitter_v10 *emit); 227 228static boolean 229emit_vertex(struct svga_shader_emitter_v10 *emit, 230 const struct tgsi_full_instruction *inst); 231 232static char err_buf[128]; 233 234static boolean 235expand(struct svga_shader_emitter_v10 *emit) 236{ 237 char *new_buf; 238 unsigned newsize = emit->size * 2; 239 240 if (emit->buf != err_buf) 241 new_buf = REALLOC(emit->buf, emit->size, newsize); 242 else 243 new_buf = NULL; 244 245 if (!new_buf) { 246 emit->ptr = err_buf; 247 emit->buf = err_buf; 248 emit->size = sizeof(err_buf); 249 return FALSE; 250 } 251 252 emit->size = newsize; 253 emit->ptr = new_buf + (emit->ptr - emit->buf); 254 emit->buf = new_buf; 255 return TRUE; 256} 257 258/** 259 * Create and initialize a new svga_shader_emitter_v10 object. 260 */ 261static struct svga_shader_emitter_v10 * 262alloc_emitter(void) 263{ 264 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit)); 265 266 if (!emit) 267 return NULL; 268 269 /* to initialize the output buffer */ 270 emit->size = 512; 271 if (!expand(emit)) { 272 FREE(emit); 273 return NULL; 274 } 275 return emit; 276} 277 278/** 279 * Free an svga_shader_emitter_v10 object. 280 */ 281static void 282free_emitter(struct svga_shader_emitter_v10 *emit) 283{ 284 assert(emit); 285 FREE(emit->buf); /* will be NULL if translation succeeded */ 286 FREE(emit); 287} 288 289static inline boolean 290reserve(struct svga_shader_emitter_v10 *emit, 291 unsigned nr_dwords) 292{ 293 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) { 294 if (!expand(emit)) 295 return FALSE; 296 } 297 298 return TRUE; 299} 300 301static boolean 302emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword) 303{ 304 if (!reserve(emit, 1)) 305 return FALSE; 306 307 *(uint32 *)emit->ptr = dword; 308 emit->ptr += sizeof dword; 309 return TRUE; 310} 311 312static boolean 313emit_dwords(struct svga_shader_emitter_v10 *emit, 314 const uint32 *dwords, 315 unsigned nr) 316{ 317 if (!reserve(emit, nr)) 318 return FALSE; 319 320 memcpy(emit->ptr, dwords, nr * sizeof *dwords); 321 emit->ptr += nr * sizeof *dwords; 322 return TRUE; 323} 324 325/** Return the number of tokens in the emitter's buffer */ 326static unsigned 327emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit) 328{ 329 return (emit->ptr - emit->buf) / sizeof(unsigned); 330} 331 332 333/** 334 * Check for register overflow. If we overflow we'll set an 335 * error flag. This function can be called for register declarations 336 * or use as src/dst instruction operands. 337 * \param type register type. One of VGPU10_OPERAND_TYPE_x 338 or VGPU10_OPCODE_DCL_x 339 * \param index the register index 340 */ 341static void 342check_register_index(struct svga_shader_emitter_v10 *emit, 343 unsigned operandType, unsigned index) 344{ 345 bool overflow_before = emit->register_overflow; 346 347 switch (operandType) { 348 case VGPU10_OPERAND_TYPE_TEMP: 349 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP: 350 case VGPU10_OPCODE_DCL_TEMPS: 351 if (index >= VGPU10_MAX_TEMPS) { 352 emit->register_overflow = TRUE; 353 } 354 break; 355 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER: 356 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER: 357 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 358 emit->register_overflow = TRUE; 359 } 360 break; 361 case VGPU10_OPERAND_TYPE_INPUT: 362 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID: 363 case VGPU10_OPCODE_DCL_INPUT: 364 case VGPU10_OPCODE_DCL_INPUT_SGV: 365 case VGPU10_OPCODE_DCL_INPUT_SIV: 366 case VGPU10_OPCODE_DCL_INPUT_PS: 367 case VGPU10_OPCODE_DCL_INPUT_PS_SGV: 368 case VGPU10_OPCODE_DCL_INPUT_PS_SIV: 369 if ((emit->unit == PIPE_SHADER_VERTEX && 370 index >= VGPU10_MAX_VS_INPUTS) || 371 (emit->unit == PIPE_SHADER_GEOMETRY && 372 index >= VGPU10_MAX_GS_INPUTS) || 373 (emit->unit == PIPE_SHADER_FRAGMENT && 374 index >= VGPU10_MAX_FS_INPUTS)) { 375 emit->register_overflow = TRUE; 376 } 377 break; 378 case VGPU10_OPERAND_TYPE_OUTPUT: 379 case VGPU10_OPCODE_DCL_OUTPUT: 380 case VGPU10_OPCODE_DCL_OUTPUT_SGV: 381 case VGPU10_OPCODE_DCL_OUTPUT_SIV: 382 if ((emit->unit == PIPE_SHADER_VERTEX && 383 index >= VGPU10_MAX_VS_OUTPUTS) || 384 (emit->unit == PIPE_SHADER_GEOMETRY && 385 index >= VGPU10_MAX_GS_OUTPUTS) || 386 (emit->unit == PIPE_SHADER_FRAGMENT && 387 index >= VGPU10_MAX_FS_OUTPUTS)) { 388 emit->register_overflow = TRUE; 389 } 390 break; 391 case VGPU10_OPERAND_TYPE_SAMPLER: 392 case VGPU10_OPCODE_DCL_SAMPLER: 393 if (index >= VGPU10_MAX_SAMPLERS) { 394 emit->register_overflow = TRUE; 395 } 396 break; 397 case VGPU10_OPERAND_TYPE_RESOURCE: 398 case VGPU10_OPCODE_DCL_RESOURCE: 399 if (index >= VGPU10_MAX_RESOURCES) { 400 emit->register_overflow = TRUE; 401 } 402 break; 403 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: 404 if (index >= MAX_IMMEDIATE_COUNT) { 405 emit->register_overflow = TRUE; 406 } 407 break; 408 default: 409 assert(0); 410 ; /* nothing */ 411 } 412 413 if (emit->register_overflow && !overflow_before) { 414 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n", 415 operandType, index); 416 } 417} 418 419 420/** 421 * Examine misc state to determine the clipping mode. 422 */ 423static void 424determine_clipping_mode(struct svga_shader_emitter_v10 *emit) 425{ 426 if (emit->info.num_written_clipdistance > 0) { 427 emit->clip_mode = CLIP_DISTANCE; 428 } 429 else if (emit->info.writes_clipvertex) { 430 emit->clip_mode = CLIP_VERTEX; 431 } 432 else if (emit->key.clip_plane_enable) { 433 emit->clip_mode = CLIP_LEGACY; 434 } 435 else { 436 emit->clip_mode = CLIP_NONE; 437 } 438} 439 440 441/** 442 * For clip distance register declarations and clip distance register 443 * writes we need to mask the declaration usage or instruction writemask 444 * (respectively) against the set of the really-enabled clipping planes. 445 * 446 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables 447 * has a VS that writes to all 8 clip distance registers, but the plane enable 448 * flags are a subset of that. 449 * 450 * This function is used to apply the plane enable flags to the register 451 * declaration or instruction writemask. 452 * 453 * \param writemask the declaration usage mask or instruction writemask 454 * \param clip_reg_index which clip plane register is being declared/written. 455 * The legal values are 0 and 1 (two clip planes per 456 * register, for a total of 8 clip planes) 457 */ 458static unsigned 459apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit, 460 unsigned writemask, unsigned clip_reg_index) 461{ 462 unsigned shift; 463 464 assert(clip_reg_index < 2); 465 466 /* four clip planes per clip register: */ 467 shift = clip_reg_index * 4; 468 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf); 469 470 return writemask; 471} 472 473 474/** 475 * Translate gallium shader type into VGPU10 type. 476 */ 477static VGPU10_PROGRAM_TYPE 478translate_shader_type(unsigned type) 479{ 480 switch (type) { 481 case PIPE_SHADER_VERTEX: 482 return VGPU10_VERTEX_SHADER; 483 case PIPE_SHADER_GEOMETRY: 484 return VGPU10_GEOMETRY_SHADER; 485 case PIPE_SHADER_FRAGMENT: 486 return VGPU10_PIXEL_SHADER; 487 default: 488 assert(!"Unexpected shader type"); 489 return VGPU10_VERTEX_SHADER; 490 } 491} 492 493 494/** 495 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x 496 * Note: we only need to translate the opcodes for "simple" instructions, 497 * as seen below. All other opcodes are handled/translated specially. 498 */ 499static VGPU10_OPCODE_TYPE 500translate_opcode(unsigned opcode) 501{ 502 switch (opcode) { 503 case TGSI_OPCODE_MOV: 504 return VGPU10_OPCODE_MOV; 505 case TGSI_OPCODE_MUL: 506 return VGPU10_OPCODE_MUL; 507 case TGSI_OPCODE_ADD: 508 return VGPU10_OPCODE_ADD; 509 case TGSI_OPCODE_DP3: 510 return VGPU10_OPCODE_DP3; 511 case TGSI_OPCODE_DP4: 512 return VGPU10_OPCODE_DP4; 513 case TGSI_OPCODE_MIN: 514 return VGPU10_OPCODE_MIN; 515 case TGSI_OPCODE_MAX: 516 return VGPU10_OPCODE_MAX; 517 case TGSI_OPCODE_MAD: 518 return VGPU10_OPCODE_MAD; 519 case TGSI_OPCODE_SQRT: 520 return VGPU10_OPCODE_SQRT; 521 case TGSI_OPCODE_FRC: 522 return VGPU10_OPCODE_FRC; 523 case TGSI_OPCODE_FLR: 524 return VGPU10_OPCODE_ROUND_NI; 525 case TGSI_OPCODE_FSEQ: 526 return VGPU10_OPCODE_EQ; 527 case TGSI_OPCODE_FSGE: 528 return VGPU10_OPCODE_GE; 529 case TGSI_OPCODE_FSNE: 530 return VGPU10_OPCODE_NE; 531 case TGSI_OPCODE_DDX: 532 return VGPU10_OPCODE_DERIV_RTX; 533 case TGSI_OPCODE_DDY: 534 return VGPU10_OPCODE_DERIV_RTY; 535 case TGSI_OPCODE_RET: 536 return VGPU10_OPCODE_RET; 537 case TGSI_OPCODE_DIV: 538 return VGPU10_OPCODE_DIV; 539 case TGSI_OPCODE_IDIV: 540 return VGPU10_OPCODE_IDIV; 541 case TGSI_OPCODE_DP2: 542 return VGPU10_OPCODE_DP2; 543 case TGSI_OPCODE_BRK: 544 return VGPU10_OPCODE_BREAK; 545 case TGSI_OPCODE_IF: 546 return VGPU10_OPCODE_IF; 547 case TGSI_OPCODE_ELSE: 548 return VGPU10_OPCODE_ELSE; 549 case TGSI_OPCODE_ENDIF: 550 return VGPU10_OPCODE_ENDIF; 551 case TGSI_OPCODE_CEIL: 552 return VGPU10_OPCODE_ROUND_PI; 553 case TGSI_OPCODE_I2F: 554 return VGPU10_OPCODE_ITOF; 555 case TGSI_OPCODE_NOT: 556 return VGPU10_OPCODE_NOT; 557 case TGSI_OPCODE_TRUNC: 558 return VGPU10_OPCODE_ROUND_Z; 559 case TGSI_OPCODE_SHL: 560 return VGPU10_OPCODE_ISHL; 561 case TGSI_OPCODE_AND: 562 return VGPU10_OPCODE_AND; 563 case TGSI_OPCODE_OR: 564 return VGPU10_OPCODE_OR; 565 case TGSI_OPCODE_XOR: 566 return VGPU10_OPCODE_XOR; 567 case TGSI_OPCODE_CONT: 568 return VGPU10_OPCODE_CONTINUE; 569 case TGSI_OPCODE_EMIT: 570 return VGPU10_OPCODE_EMIT; 571 case TGSI_OPCODE_ENDPRIM: 572 return VGPU10_OPCODE_CUT; 573 case TGSI_OPCODE_BGNLOOP: 574 return VGPU10_OPCODE_LOOP; 575 case TGSI_OPCODE_ENDLOOP: 576 return VGPU10_OPCODE_ENDLOOP; 577 case TGSI_OPCODE_ENDSUB: 578 return VGPU10_OPCODE_RET; 579 case TGSI_OPCODE_NOP: 580 return VGPU10_OPCODE_NOP; 581 case TGSI_OPCODE_BREAKC: 582 return VGPU10_OPCODE_BREAKC; 583 case TGSI_OPCODE_END: 584 return VGPU10_OPCODE_RET; 585 case TGSI_OPCODE_F2I: 586 return VGPU10_OPCODE_FTOI; 587 case TGSI_OPCODE_IMAX: 588 return VGPU10_OPCODE_IMAX; 589 case TGSI_OPCODE_IMIN: 590 return VGPU10_OPCODE_IMIN; 591 case TGSI_OPCODE_UDIV: 592 case TGSI_OPCODE_UMOD: 593 case TGSI_OPCODE_MOD: 594 return VGPU10_OPCODE_UDIV; 595 case TGSI_OPCODE_IMUL_HI: 596 return VGPU10_OPCODE_IMUL; 597 case TGSI_OPCODE_INEG: 598 return VGPU10_OPCODE_INEG; 599 case TGSI_OPCODE_ISHR: 600 return VGPU10_OPCODE_ISHR; 601 case TGSI_OPCODE_ISGE: 602 return VGPU10_OPCODE_IGE; 603 case TGSI_OPCODE_ISLT: 604 return VGPU10_OPCODE_ILT; 605 case TGSI_OPCODE_F2U: 606 return VGPU10_OPCODE_FTOU; 607 case TGSI_OPCODE_UADD: 608 return VGPU10_OPCODE_IADD; 609 case TGSI_OPCODE_U2F: 610 return VGPU10_OPCODE_UTOF; 611 case TGSI_OPCODE_UCMP: 612 return VGPU10_OPCODE_MOVC; 613 case TGSI_OPCODE_UMAD: 614 return VGPU10_OPCODE_UMAD; 615 case TGSI_OPCODE_UMAX: 616 return VGPU10_OPCODE_UMAX; 617 case TGSI_OPCODE_UMIN: 618 return VGPU10_OPCODE_UMIN; 619 case TGSI_OPCODE_UMUL: 620 case TGSI_OPCODE_UMUL_HI: 621 return VGPU10_OPCODE_UMUL; 622 case TGSI_OPCODE_USEQ: 623 return VGPU10_OPCODE_IEQ; 624 case TGSI_OPCODE_USGE: 625 return VGPU10_OPCODE_UGE; 626 case TGSI_OPCODE_USHR: 627 return VGPU10_OPCODE_USHR; 628 case TGSI_OPCODE_USLT: 629 return VGPU10_OPCODE_ULT; 630 case TGSI_OPCODE_USNE: 631 return VGPU10_OPCODE_INE; 632 case TGSI_OPCODE_SWITCH: 633 return VGPU10_OPCODE_SWITCH; 634 case TGSI_OPCODE_CASE: 635 return VGPU10_OPCODE_CASE; 636 case TGSI_OPCODE_DEFAULT: 637 return VGPU10_OPCODE_DEFAULT; 638 case TGSI_OPCODE_ENDSWITCH: 639 return VGPU10_OPCODE_ENDSWITCH; 640 case TGSI_OPCODE_FSLT: 641 return VGPU10_OPCODE_LT; 642 case TGSI_OPCODE_ROUND: 643 return VGPU10_OPCODE_ROUND_NE; 644 default: 645 assert(!"Unexpected TGSI opcode in translate_opcode()"); 646 return VGPU10_OPCODE_NOP; 647 } 648} 649 650 651/** 652 * Translate a TGSI register file type into a VGPU10 operand type. 653 * \param array is the TGSI_FILE_TEMPORARY register an array? 654 */ 655static VGPU10_OPERAND_TYPE 656translate_register_file(enum tgsi_file_type file, boolean array) 657{ 658 switch (file) { 659 case TGSI_FILE_CONSTANT: 660 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 661 case TGSI_FILE_INPUT: 662 return VGPU10_OPERAND_TYPE_INPUT; 663 case TGSI_FILE_OUTPUT: 664 return VGPU10_OPERAND_TYPE_OUTPUT; 665 case TGSI_FILE_TEMPORARY: 666 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP 667 : VGPU10_OPERAND_TYPE_TEMP; 668 case TGSI_FILE_IMMEDIATE: 669 /* all immediates are 32-bit values at this time so 670 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time. 671 */ 672 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER; 673 case TGSI_FILE_SAMPLER: 674 return VGPU10_OPERAND_TYPE_SAMPLER; 675 case TGSI_FILE_SYSTEM_VALUE: 676 return VGPU10_OPERAND_TYPE_INPUT; 677 678 /* XXX TODO more cases to finish */ 679 680 default: 681 assert(!"Bad tgsi register file!"); 682 return VGPU10_OPERAND_TYPE_NULL; 683 } 684} 685 686 687/** 688 * Emit a null dst register 689 */ 690static void 691emit_null_dst_register(struct svga_shader_emitter_v10 *emit) 692{ 693 VGPU10OperandToken0 operand; 694 695 operand.value = 0; 696 operand.operandType = VGPU10_OPERAND_TYPE_NULL; 697 operand.numComponents = VGPU10_OPERAND_0_COMPONENT; 698 699 emit_dword(emit, operand.value); 700} 701 702 703/** 704 * If the given register is a temporary, return the array ID. 705 * Else return zero. 706 */ 707static unsigned 708get_temp_array_id(const struct svga_shader_emitter_v10 *emit, 709 unsigned file, unsigned index) 710{ 711 if (file == TGSI_FILE_TEMPORARY) { 712 return emit->temp_map[index].arrayId; 713 } 714 else { 715 return 0; 716 } 717} 718 719 720/** 721 * If the given register is a temporary, convert the index from a TGSI 722 * TEMPORARY index to a VGPU10 temp index. 723 */ 724static unsigned 725remap_temp_index(const struct svga_shader_emitter_v10 *emit, 726 unsigned file, unsigned index) 727{ 728 if (file == TGSI_FILE_TEMPORARY) { 729 return emit->temp_map[index].index; 730 } 731 else { 732 return index; 733 } 734} 735 736 737/** 738 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc). 739 * Note: the operandType field must already be initialized. 740 */ 741static VGPU10OperandToken0 742setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, 743 VGPU10OperandToken0 operand0, 744 unsigned file, 745 boolean indirect, boolean index2D, 746 unsigned tempArrayID) 747{ 748 unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 749 750 /* 751 * Compute index dimensions 752 */ 753 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 || 754 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 755 /* there's no swizzle for in-line immediates */ 756 indexDim = VGPU10_OPERAND_INDEX_0D; 757 assert(operand0.selectionMode == 0); 758 } 759 else { 760 if (index2D || 761 tempArrayID > 0 || 762 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 763 indexDim = VGPU10_OPERAND_INDEX_2D; 764 } 765 else { 766 indexDim = VGPU10_OPERAND_INDEX_1D; 767 } 768 } 769 770 /* 771 * Compute index representations (immediate, relative, etc). 772 */ 773 if (tempArrayID > 0) { 774 assert(file == TGSI_FILE_TEMPORARY); 775 /* First index is the array ID, second index is the array element */ 776 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 777 if (indirect) { 778 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 779 } 780 else { 781 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 782 } 783 } 784 else if (indirect) { 785 if (file == TGSI_FILE_CONSTANT) { 786 /* index[0] indicates which constant buffer while index[1] indicates 787 * the position in the constant buffer. 788 */ 789 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 790 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 791 } 792 else { 793 /* All other register files are 1-dimensional */ 794 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 795 } 796 } 797 else { 798 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 799 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 800 } 801 802 operand0.indexDimension = indexDim; 803 operand0.index0Representation = index0Rep; 804 operand0.index1Representation = index1Rep; 805 806 return operand0; 807} 808 809 810/** 811 * Emit the operand for expressing an address register for indirect indexing. 812 * Note that the address register is really just a temp register. 813 * \param addr_reg_index which address register to use 814 */ 815static void 816emit_indirect_register(struct svga_shader_emitter_v10 *emit, 817 unsigned addr_reg_index) 818{ 819 unsigned tmp_reg_index; 820 VGPU10OperandToken0 operand0; 821 822 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS); 823 824 tmp_reg_index = emit->address_reg_index[addr_reg_index]; 825 826 /* operand0 is a simple temporary register, selecting one component */ 827 operand0.value = 0; 828 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP; 829 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 830 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 831 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 832 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 833 operand0.swizzleX = 0; 834 operand0.swizzleY = 1; 835 operand0.swizzleZ = 2; 836 operand0.swizzleW = 3; 837 838 emit_dword(emit, operand0.value); 839 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index)); 840} 841 842 843/** 844 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens. 845 * \param emit the emitter context 846 * \param reg the TGSI dst register to translate 847 */ 848static void 849emit_dst_register(struct svga_shader_emitter_v10 *emit, 850 const struct tgsi_full_dst_register *reg) 851{ 852 unsigned file = reg->Register.File; 853 unsigned index = reg->Register.Index; 854 const unsigned sem_name = emit->info.output_semantic_name[index]; 855 const unsigned sem_index = emit->info.output_semantic_index[index]; 856 unsigned writemask = reg->Register.WriteMask; 857 const unsigned indirect = reg->Register.Indirect; 858 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 859 const unsigned index2d = reg->Register.Dimension; 860 VGPU10OperandToken0 operand0; 861 862 if (file == TGSI_FILE_OUTPUT) { 863 if (emit->unit == PIPE_SHADER_VERTEX || 864 emit->unit == PIPE_SHADER_GEOMETRY) { 865 if (index == emit->vposition.out_index && 866 emit->vposition.tmp_index != INVALID_INDEX) { 867 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the 868 * vertex position result in a temporary so that we can modify 869 * it in the post_helper() code. 870 */ 871 file = TGSI_FILE_TEMPORARY; 872 index = emit->vposition.tmp_index; 873 } 874 else if (sem_name == TGSI_SEMANTIC_CLIPDIST && 875 emit->clip_dist_tmp_index != INVALID_INDEX) { 876 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. 877 * We store the clip distance in a temporary first, then 878 * we'll copy it to the shadow copy and to CLIPDIST with the 879 * enabled planes mask in emit_clip_distance_instructions(). 880 */ 881 file = TGSI_FILE_TEMPORARY; 882 index = emit->clip_dist_tmp_index + sem_index; 883 } 884 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && 885 emit->clip_vertex_tmp_index != INVALID_INDEX) { 886 /* replace the CLIPVERTEX output register with a temporary */ 887 assert(emit->clip_mode == CLIP_VERTEX); 888 assert(sem_index == 0); 889 file = TGSI_FILE_TEMPORARY; 890 index = emit->clip_vertex_tmp_index; 891 } 892 } 893 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 894 if (sem_name == TGSI_SEMANTIC_POSITION) { 895 /* Fragment depth output register */ 896 operand0.value = 0; 897 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 898 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 899 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 900 emit_dword(emit, operand0.value); 901 return; 902 } 903 else if (index == emit->fs.color_out_index[0] && 904 emit->fs.color_tmp_index != INVALID_INDEX) { 905 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the 906 * fragment color result in a temporary so that we can read it 907 * it in the post_helper() code. 908 */ 909 file = TGSI_FILE_TEMPORARY; 910 index = emit->fs.color_tmp_index; 911 } 912 else { 913 /* Typically, for fragment shaders, the output register index 914 * matches the color semantic index. But not when we write to 915 * the fragment depth register. In that case, OUT[0] will be 916 * fragdepth and OUT[1] will be the 0th color output. We need 917 * to use the semantic index for color outputs. 918 */ 919 assert(sem_name == TGSI_SEMANTIC_COLOR); 920 index = emit->info.output_semantic_index[index]; 921 922 emit->num_output_writes++; 923 } 924 } 925 } 926 927 /* init operand tokens to all zero */ 928 operand0.value = 0; 929 930 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 931 932 /* the operand has a writemask */ 933 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 934 935 /* Which of the four dest components to write to. Note that we can use a 936 * simple assignment here since TGSI writemasks match VGPU10 writemasks. 937 */ 938 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X); 939 operand0.mask = writemask; 940 941 /* translate TGSI register file type to VGPU10 operand type */ 942 operand0.operandType = translate_register_file(file, tempArrayId > 0); 943 944 check_register_index(emit, operand0.operandType, index); 945 946 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 947 index2d, tempArrayId); 948 949 /* Emit tokens */ 950 emit_dword(emit, operand0.value); 951 if (tempArrayId > 0) { 952 emit_dword(emit, tempArrayId); 953 } 954 955 emit_dword(emit, remap_temp_index(emit, file, index)); 956 957 if (indirect) { 958 emit_indirect_register(emit, reg->Indirect.Index); 959 } 960} 961 962 963/** 964 * Translate a src register of a TGSI instruction and emit VGPU10 tokens. 965 */ 966static void 967emit_src_register(struct svga_shader_emitter_v10 *emit, 968 const struct tgsi_full_src_register *reg) 969{ 970 unsigned file = reg->Register.File; 971 unsigned index = reg->Register.Index; 972 const unsigned indirect = reg->Register.Indirect; 973 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 974 const unsigned index2d = reg->Register.Dimension; 975 const unsigned swizzleX = reg->Register.SwizzleX; 976 const unsigned swizzleY = reg->Register.SwizzleY; 977 const unsigned swizzleZ = reg->Register.SwizzleZ; 978 const unsigned swizzleW = reg->Register.SwizzleW; 979 const unsigned absolute = reg->Register.Absolute; 980 const unsigned negate = reg->Register.Negate; 981 bool is_prim_id = FALSE; 982 983 VGPU10OperandToken0 operand0; 984 VGPU10OperandToken1 operand1; 985 986 if (emit->unit == PIPE_SHADER_FRAGMENT && 987 file == TGSI_FILE_INPUT) { 988 if (index == emit->fs.face_input_index) { 989 /* Replace INPUT[FACE] with TEMP[FACE] */ 990 file = TGSI_FILE_TEMPORARY; 991 index = emit->fs.face_tmp_index; 992 } 993 else if (index == emit->fs.fragcoord_input_index) { 994 /* Replace INPUT[POSITION] with TEMP[POSITION] */ 995 file = TGSI_FILE_TEMPORARY; 996 index = emit->fs.fragcoord_tmp_index; 997 } 998 else { 999 /* We remap fragment shader inputs to that FS input indexes 1000 * match up with VS/GS output indexes. 1001 */ 1002 index = emit->linkage.input_map[index]; 1003 } 1004 } 1005 else if (emit->unit == PIPE_SHADER_GEOMETRY && 1006 file == TGSI_FILE_INPUT) { 1007 is_prim_id = (index == emit->gs.prim_id_index); 1008 index = emit->linkage.input_map[index]; 1009 } 1010 else if (emit->unit == PIPE_SHADER_VERTEX) { 1011 if (file == TGSI_FILE_INPUT) { 1012 /* if input is adjusted... */ 1013 if ((emit->key.vs.adjust_attrib_w_1 | 1014 emit->key.vs.adjust_attrib_itof | 1015 emit->key.vs.adjust_attrib_utof | 1016 emit->key.vs.attrib_is_bgra | 1017 emit->key.vs.attrib_puint_to_snorm | 1018 emit->key.vs.attrib_puint_to_uscaled | 1019 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) { 1020 file = TGSI_FILE_TEMPORARY; 1021 index = emit->vs.adjusted_input[index]; 1022 } 1023 } 1024 else if (file == TGSI_FILE_SYSTEM_VALUE) { 1025 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1026 index = emit->system_value_indexes[index]; 1027 } 1028 } 1029 1030 operand0.value = operand1.value = 0; 1031 1032 if (is_prim_id) { 1033 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 1034 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 1035 } 1036 else { 1037 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1038 operand0.operandType = translate_register_file(file, tempArrayId > 0); 1039 } 1040 1041 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 1042 index2d, tempArrayId); 1043 1044 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 && 1045 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 1046 /* there's no swizzle for in-line immediates */ 1047 if (swizzleX == swizzleY && 1048 swizzleX == swizzleZ && 1049 swizzleX == swizzleW) { 1050 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1051 } 1052 else { 1053 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1054 } 1055 1056 operand0.swizzleX = swizzleX; 1057 operand0.swizzleY = swizzleY; 1058 operand0.swizzleZ = swizzleZ; 1059 operand0.swizzleW = swizzleW; 1060 1061 if (absolute || negate) { 1062 operand0.extended = 1; 1063 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER; 1064 if (absolute && !negate) 1065 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS; 1066 if (!absolute && negate) 1067 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG; 1068 if (absolute && negate) 1069 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG; 1070 } 1071 } 1072 1073 /* Emit the operand tokens */ 1074 emit_dword(emit, operand0.value); 1075 if (operand0.extended) 1076 emit_dword(emit, operand1.value); 1077 1078 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) { 1079 /* Emit the four float/int in-line immediate values */ 1080 unsigned *c; 1081 assert(index < ARRAY_SIZE(emit->immediates)); 1082 assert(file == TGSI_FILE_IMMEDIATE); 1083 assert(swizzleX < 4); 1084 assert(swizzleY < 4); 1085 assert(swizzleZ < 4); 1086 assert(swizzleW < 4); 1087 c = (unsigned *) emit->immediates[index]; 1088 emit_dword(emit, c[swizzleX]); 1089 emit_dword(emit, c[swizzleY]); 1090 emit_dword(emit, c[swizzleZ]); 1091 emit_dword(emit, c[swizzleW]); 1092 } 1093 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) { 1094 /* Emit the register index(es) */ 1095 if (index2d || 1096 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 1097 emit_dword(emit, reg->Dimension.Index); 1098 } 1099 1100 if (tempArrayId > 0) { 1101 emit_dword(emit, tempArrayId); 1102 } 1103 1104 emit_dword(emit, remap_temp_index(emit, file, index)); 1105 1106 if (indirect) { 1107 emit_indirect_register(emit, reg->Indirect.Index); 1108 } 1109 } 1110} 1111 1112 1113/** 1114 * Emit a resource operand (for use with a SAMPLE instruction). 1115 */ 1116static void 1117emit_resource_register(struct svga_shader_emitter_v10 *emit, 1118 unsigned resource_number) 1119{ 1120 VGPU10OperandToken0 operand0; 1121 1122 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number); 1123 1124 /* init */ 1125 operand0.value = 0; 1126 1127 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 1128 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1129 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1130 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1131 operand0.swizzleX = VGPU10_COMPONENT_X; 1132 operand0.swizzleY = VGPU10_COMPONENT_Y; 1133 operand0.swizzleZ = VGPU10_COMPONENT_Z; 1134 operand0.swizzleW = VGPU10_COMPONENT_W; 1135 1136 emit_dword(emit, operand0.value); 1137 emit_dword(emit, resource_number); 1138} 1139 1140 1141/** 1142 * Emit a sampler operand (for use with a SAMPLE instruction). 1143 */ 1144static void 1145emit_sampler_register(struct svga_shader_emitter_v10 *emit, 1146 unsigned sampler_number) 1147{ 1148 VGPU10OperandToken0 operand0; 1149 1150 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); 1151 1152 /* init */ 1153 operand0.value = 0; 1154 1155 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 1156 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1157 1158 emit_dword(emit, operand0.value); 1159 emit_dword(emit, sampler_number); 1160} 1161 1162 1163/** 1164 * Emit an operand which reads the IS_FRONT_FACING register. 1165 */ 1166static void 1167emit_face_register(struct svga_shader_emitter_v10 *emit) 1168{ 1169 VGPU10OperandToken0 operand0; 1170 unsigned index = emit->linkage.input_map[emit->fs.face_input_index]; 1171 1172 /* init */ 1173 operand0.value = 0; 1174 1175 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT; 1176 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1177 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1178 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1179 1180 operand0.swizzleX = VGPU10_COMPONENT_X; 1181 operand0.swizzleY = VGPU10_COMPONENT_X; 1182 operand0.swizzleZ = VGPU10_COMPONENT_X; 1183 operand0.swizzleW = VGPU10_COMPONENT_X; 1184 1185 emit_dword(emit, operand0.value); 1186 emit_dword(emit, index); 1187} 1188 1189 1190/** 1191 * Emit the token for a VGPU10 opcode. 1192 * \param saturate clamp result to [0,1]? 1193 */ 1194static void 1195emit_opcode(struct svga_shader_emitter_v10 *emit, 1196 unsigned vgpu10_opcode, boolean saturate) 1197{ 1198 VGPU10OpcodeToken0 token0; 1199 1200 token0.value = 0; /* init all fields to zero */ 1201 token0.opcodeType = vgpu10_opcode; 1202 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1203 token0.saturate = saturate; 1204 1205 emit_dword(emit, token0.value); 1206} 1207 1208 1209/** 1210 * Emit the token for a VGPU10 resinfo instruction. 1211 * \param modifier return type modifier, _uint or _rcpFloat. 1212 * TODO: We may want to remove this parameter if it will 1213 * only ever be used as _uint. 1214 */ 1215static void 1216emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit, 1217 VGPU10_RESINFO_RETURN_TYPE modifier) 1218{ 1219 VGPU10OpcodeToken0 token0; 1220 1221 token0.value = 0; /* init all fields to zero */ 1222 token0.opcodeType = VGPU10_OPCODE_RESINFO; 1223 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1224 token0.resinfoReturnType = modifier; 1225 1226 emit_dword(emit, token0.value); 1227} 1228 1229 1230/** 1231 * Emit opcode tokens for a texture sample instruction. Texture instructions 1232 * can be rather complicated (texel offsets, etc) so we have this specialized 1233 * function. 1234 */ 1235static void 1236emit_sample_opcode(struct svga_shader_emitter_v10 *emit, 1237 unsigned vgpu10_opcode, boolean saturate, 1238 const int offsets[3]) 1239{ 1240 VGPU10OpcodeToken0 token0; 1241 VGPU10OpcodeToken1 token1; 1242 1243 token0.value = 0; /* init all fields to zero */ 1244 token0.opcodeType = vgpu10_opcode; 1245 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1246 token0.saturate = saturate; 1247 1248 if (offsets[0] || offsets[1] || offsets[2]) { 1249 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1250 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1251 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1252 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1253 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1254 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1255 1256 token0.extended = 1; 1257 token1.value = 0; 1258 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS; 1259 token1.offsetU = offsets[0]; 1260 token1.offsetV = offsets[1]; 1261 token1.offsetW = offsets[2]; 1262 } 1263 1264 emit_dword(emit, token0.value); 1265 if (token0.extended) { 1266 emit_dword(emit, token1.value); 1267 } 1268} 1269 1270 1271/** 1272 * Emit a DISCARD opcode token. 1273 * If nonzero is set, we'll discard the fragment if the X component is not 0. 1274 * Otherwise, we'll discard the fragment if the X component is 0. 1275 */ 1276static void 1277emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero) 1278{ 1279 VGPU10OpcodeToken0 opcode0; 1280 1281 opcode0.value = 0; 1282 opcode0.opcodeType = VGPU10_OPCODE_DISCARD; 1283 if (nonzero) 1284 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 1285 1286 emit_dword(emit, opcode0.value); 1287} 1288 1289 1290/** 1291 * We need to call this before we begin emitting a VGPU10 instruction. 1292 */ 1293static void 1294begin_emit_instruction(struct svga_shader_emitter_v10 *emit) 1295{ 1296 assert(emit->inst_start_token == 0); 1297 /* Save location of the instruction's VGPU10OpcodeToken0 token. 1298 * Note, we can't save a pointer because it would become invalid if 1299 * we have to realloc the output buffer. 1300 */ 1301 emit->inst_start_token = emit_get_num_tokens(emit); 1302} 1303 1304 1305/** 1306 * We need to call this after we emit the last token of a VGPU10 instruction. 1307 * This function patches in the opcode token's instructionLength field. 1308 */ 1309static void 1310end_emit_instruction(struct svga_shader_emitter_v10 *emit) 1311{ 1312 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; 1313 unsigned inst_length; 1314 1315 assert(emit->inst_start_token > 0); 1316 1317 if (emit->discard_instruction) { 1318 /* Back up the emit->ptr to where this instruction started so 1319 * that we discard the current instruction. 1320 */ 1321 emit->ptr = (char *) (tokens + emit->inst_start_token); 1322 } 1323 else { 1324 /* Compute instruction length and patch that into the start of 1325 * the instruction. 1326 */ 1327 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token; 1328 1329 assert(inst_length > 0); 1330 1331 tokens[emit->inst_start_token].instructionLength = inst_length; 1332 } 1333 1334 emit->inst_start_token = 0; /* reset to zero for error checking */ 1335 emit->discard_instruction = FALSE; 1336} 1337 1338 1339/** 1340 * Return index for a free temporary register. 1341 */ 1342static unsigned 1343get_temp_index(struct svga_shader_emitter_v10 *emit) 1344{ 1345 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS); 1346 return emit->num_shader_temps + emit->internal_temp_count++; 1347} 1348 1349 1350/** 1351 * Release the temporaries which were generated by get_temp_index(). 1352 */ 1353static void 1354free_temp_indexes(struct svga_shader_emitter_v10 *emit) 1355{ 1356 emit->internal_temp_count = 0; 1357} 1358 1359 1360/** 1361 * Create a tgsi_full_src_register. 1362 */ 1363static struct tgsi_full_src_register 1364make_src_reg(unsigned file, unsigned index) 1365{ 1366 struct tgsi_full_src_register reg; 1367 1368 memset(®, 0, sizeof(reg)); 1369 reg.Register.File = file; 1370 reg.Register.Index = index; 1371 reg.Register.SwizzleX = TGSI_SWIZZLE_X; 1372 reg.Register.SwizzleY = TGSI_SWIZZLE_Y; 1373 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1374 reg.Register.SwizzleW = TGSI_SWIZZLE_W; 1375 return reg; 1376} 1377 1378 1379/** 1380 * Create a tgsi_full_src_register for a temporary. 1381 */ 1382static struct tgsi_full_src_register 1383make_src_temp_reg(unsigned index) 1384{ 1385 return make_src_reg(TGSI_FILE_TEMPORARY, index); 1386} 1387 1388 1389/** 1390 * Create a tgsi_full_src_register for a constant. 1391 */ 1392static struct tgsi_full_src_register 1393make_src_const_reg(unsigned index) 1394{ 1395 return make_src_reg(TGSI_FILE_CONSTANT, index); 1396} 1397 1398 1399/** 1400 * Create a tgsi_full_src_register for an immediate constant. 1401 */ 1402static struct tgsi_full_src_register 1403make_src_immediate_reg(unsigned index) 1404{ 1405 return make_src_reg(TGSI_FILE_IMMEDIATE, index); 1406} 1407 1408 1409/** 1410 * Create a tgsi_full_dst_register. 1411 */ 1412static struct tgsi_full_dst_register 1413make_dst_reg(unsigned file, unsigned index) 1414{ 1415 struct tgsi_full_dst_register reg; 1416 1417 memset(®, 0, sizeof(reg)); 1418 reg.Register.File = file; 1419 reg.Register.Index = index; 1420 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW; 1421 return reg; 1422} 1423 1424 1425/** 1426 * Create a tgsi_full_dst_register for a temporary. 1427 */ 1428static struct tgsi_full_dst_register 1429make_dst_temp_reg(unsigned index) 1430{ 1431 return make_dst_reg(TGSI_FILE_TEMPORARY, index); 1432} 1433 1434 1435/** 1436 * Create a tgsi_full_dst_register for an output. 1437 */ 1438static struct tgsi_full_dst_register 1439make_dst_output_reg(unsigned index) 1440{ 1441 return make_dst_reg(TGSI_FILE_OUTPUT, index); 1442} 1443 1444 1445/** 1446 * Create negated tgsi_full_src_register. 1447 */ 1448static struct tgsi_full_src_register 1449negate_src(const struct tgsi_full_src_register *reg) 1450{ 1451 struct tgsi_full_src_register neg = *reg; 1452 neg.Register.Negate = !reg->Register.Negate; 1453 return neg; 1454} 1455 1456/** 1457 * Create absolute value of a tgsi_full_src_register. 1458 */ 1459static struct tgsi_full_src_register 1460absolute_src(const struct tgsi_full_src_register *reg) 1461{ 1462 struct tgsi_full_src_register absolute = *reg; 1463 absolute.Register.Absolute = 1; 1464 return absolute; 1465} 1466 1467 1468/** Return the named swizzle term from the src register */ 1469static inline unsigned 1470get_swizzle(const struct tgsi_full_src_register *reg, unsigned term) 1471{ 1472 switch (term) { 1473 case TGSI_SWIZZLE_X: 1474 return reg->Register.SwizzleX; 1475 case TGSI_SWIZZLE_Y: 1476 return reg->Register.SwizzleY; 1477 case TGSI_SWIZZLE_Z: 1478 return reg->Register.SwizzleZ; 1479 case TGSI_SWIZZLE_W: 1480 return reg->Register.SwizzleW; 1481 default: 1482 assert(!"Bad swizzle"); 1483 return TGSI_SWIZZLE_X; 1484 } 1485} 1486 1487 1488/** 1489 * Create swizzled tgsi_full_src_register. 1490 */ 1491static struct tgsi_full_src_register 1492swizzle_src(const struct tgsi_full_src_register *reg, 1493 unsigned swizzleX, unsigned swizzleY, 1494 unsigned swizzleZ, unsigned swizzleW) 1495{ 1496 struct tgsi_full_src_register swizzled = *reg; 1497 /* Note: we swizzle the current swizzle */ 1498 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX); 1499 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY); 1500 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ); 1501 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW); 1502 return swizzled; 1503} 1504 1505 1506/** 1507 * Create swizzled tgsi_full_src_register where all the swizzle 1508 * terms are the same. 1509 */ 1510static struct tgsi_full_src_register 1511scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle) 1512{ 1513 struct tgsi_full_src_register swizzled = *reg; 1514 /* Note: we swizzle the current swizzle */ 1515 swizzled.Register.SwizzleX = 1516 swizzled.Register.SwizzleY = 1517 swizzled.Register.SwizzleZ = 1518 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle); 1519 return swizzled; 1520} 1521 1522 1523/** 1524 * Create new tgsi_full_dst_register with writemask. 1525 * \param mask bitmask of TGSI_WRITEMASK_[XYZW] 1526 */ 1527static struct tgsi_full_dst_register 1528writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask) 1529{ 1530 struct tgsi_full_dst_register masked = *reg; 1531 masked.Register.WriteMask = mask; 1532 return masked; 1533} 1534 1535 1536/** 1537 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW. 1538 */ 1539static boolean 1540same_swizzle_terms(const struct tgsi_full_src_register *reg) 1541{ 1542 return (reg->Register.SwizzleX == reg->Register.SwizzleY && 1543 reg->Register.SwizzleY == reg->Register.SwizzleZ && 1544 reg->Register.SwizzleZ == reg->Register.SwizzleW); 1545} 1546 1547 1548/** 1549 * Search the vector for the value 'x' and return its position. 1550 */ 1551static int 1552find_imm_in_vec4(const union tgsi_immediate_data vec[4], 1553 union tgsi_immediate_data x) 1554{ 1555 unsigned i; 1556 for (i = 0; i < 4; i++) { 1557 if (vec[i].Int == x.Int) 1558 return i; 1559 } 1560 return -1; 1561} 1562 1563 1564/** 1565 * Helper used by make_immediate_reg(), make_immediate_reg_4(). 1566 */ 1567static int 1568find_immediate(struct svga_shader_emitter_v10 *emit, 1569 union tgsi_immediate_data x, unsigned startIndex) 1570{ 1571 const unsigned endIndex = emit->num_immediates; 1572 unsigned i; 1573 1574 assert(emit->immediates_emitted); 1575 1576 /* Search immediates for x, y, z, w */ 1577 for (i = startIndex; i < endIndex; i++) { 1578 if (x.Int == emit->immediates[i][0].Int || 1579 x.Int == emit->immediates[i][1].Int || 1580 x.Int == emit->immediates[i][2].Int || 1581 x.Int == emit->immediates[i][3].Int) { 1582 return i; 1583 } 1584 } 1585 /* Should never try to use an immediate value that wasn't pre-declared */ 1586 assert(!"find_immediate() failed!"); 1587 return -1; 1588} 1589 1590 1591/** 1592 * Return a tgsi_full_src_register for an immediate/literal 1593 * union tgsi_immediate_data[4] value. 1594 * Note: the values must have been previously declared/allocated in 1595 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same 1596 * vec4 immediate. 1597 */ 1598static struct tgsi_full_src_register 1599make_immediate_reg_4(struct svga_shader_emitter_v10 *emit, 1600 const union tgsi_immediate_data imm[4]) 1601{ 1602 struct tgsi_full_src_register reg; 1603 unsigned i; 1604 1605 for (i = 0; i < emit->num_common_immediates; i++) { 1606 /* search for first component value */ 1607 int immpos = find_immediate(emit, imm[0], i); 1608 int x, y, z, w; 1609 1610 assert(immpos >= 0); 1611 1612 /* find remaining components within the immediate vector */ 1613 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]); 1614 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]); 1615 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]); 1616 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]); 1617 1618 if (x >=0 && y >= 0 && z >= 0 && w >= 0) { 1619 /* found them all */ 1620 memset(®, 0, sizeof(reg)); 1621 reg.Register.File = TGSI_FILE_IMMEDIATE; 1622 reg.Register.Index = immpos; 1623 reg.Register.SwizzleX = x; 1624 reg.Register.SwizzleY = y; 1625 reg.Register.SwizzleZ = z; 1626 reg.Register.SwizzleW = w; 1627 return reg; 1628 } 1629 /* else, keep searching */ 1630 } 1631 1632 assert(!"Failed to find immediate register!"); 1633 1634 /* Just return IMM[0].xxxx */ 1635 memset(®, 0, sizeof(reg)); 1636 reg.Register.File = TGSI_FILE_IMMEDIATE; 1637 return reg; 1638} 1639 1640 1641/** 1642 * Return a tgsi_full_src_register for an immediate/literal 1643 * union tgsi_immediate_data value of the form {value, value, value, value}. 1644 * \sa make_immediate_reg_4() regarding allowed values. 1645 */ 1646static struct tgsi_full_src_register 1647make_immediate_reg(struct svga_shader_emitter_v10 *emit, 1648 union tgsi_immediate_data value) 1649{ 1650 struct tgsi_full_src_register reg; 1651 int immpos = find_immediate(emit, value, 0); 1652 1653 assert(immpos >= 0); 1654 1655 memset(®, 0, sizeof(reg)); 1656 reg.Register.File = TGSI_FILE_IMMEDIATE; 1657 reg.Register.Index = immpos; 1658 reg.Register.SwizzleX = 1659 reg.Register.SwizzleY = 1660 reg.Register.SwizzleZ = 1661 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value); 1662 1663 return reg; 1664} 1665 1666 1667/** 1668 * Return a tgsi_full_src_register for an immediate/literal float[4] value. 1669 * \sa make_immediate_reg_4() regarding allowed values. 1670 */ 1671static struct tgsi_full_src_register 1672make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit, 1673 float x, float y, float z, float w) 1674{ 1675 union tgsi_immediate_data imm[4]; 1676 imm[0].Float = x; 1677 imm[1].Float = y; 1678 imm[2].Float = z; 1679 imm[3].Float = w; 1680 return make_immediate_reg_4(emit, imm); 1681} 1682 1683 1684/** 1685 * Return a tgsi_full_src_register for an immediate/literal float value 1686 * of the form {value, value, value, value}. 1687 * \sa make_immediate_reg_4() regarding allowed values. 1688 */ 1689static struct tgsi_full_src_register 1690make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value) 1691{ 1692 union tgsi_immediate_data imm; 1693 imm.Float = value; 1694 return make_immediate_reg(emit, imm); 1695} 1696 1697 1698/** 1699 * Return a tgsi_full_src_register for an immediate/literal int[4] vector. 1700 */ 1701static struct tgsi_full_src_register 1702make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit, 1703 int x, int y, int z, int w) 1704{ 1705 union tgsi_immediate_data imm[4]; 1706 imm[0].Int = x; 1707 imm[1].Int = y; 1708 imm[2].Int = z; 1709 imm[3].Int = w; 1710 return make_immediate_reg_4(emit, imm); 1711} 1712 1713 1714/** 1715 * Return a tgsi_full_src_register for an immediate/literal int value 1716 * of the form {value, value, value, value}. 1717 * \sa make_immediate_reg_4() regarding allowed values. 1718 */ 1719static struct tgsi_full_src_register 1720make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value) 1721{ 1722 union tgsi_immediate_data imm; 1723 imm.Int = value; 1724 return make_immediate_reg(emit, imm); 1725} 1726 1727 1728/** 1729 * Allocate space for a union tgsi_immediate_data[4] immediate. 1730 * \return the index/position of the immediate. 1731 */ 1732static unsigned 1733alloc_immediate_4(struct svga_shader_emitter_v10 *emit, 1734 const union tgsi_immediate_data imm[4]) 1735{ 1736 unsigned n = emit->num_immediates++; 1737 assert(!emit->immediates_emitted); 1738 assert(n < ARRAY_SIZE(emit->immediates)); 1739 emit->immediates[n][0] = imm[0]; 1740 emit->immediates[n][1] = imm[1]; 1741 emit->immediates[n][2] = imm[2]; 1742 emit->immediates[n][3] = imm[3]; 1743 return n; 1744} 1745 1746 1747/** 1748 * Allocate space for a float[4] immediate. 1749 * \return the index/position of the immediate. 1750 */ 1751static unsigned 1752alloc_immediate_float4(struct svga_shader_emitter_v10 *emit, 1753 float x, float y, float z, float w) 1754{ 1755 union tgsi_immediate_data imm[4]; 1756 imm[0].Float = x; 1757 imm[1].Float = y; 1758 imm[2].Float = z; 1759 imm[3].Float = w; 1760 return alloc_immediate_4(emit, imm); 1761} 1762 1763 1764/** 1765 * Allocate space for an int[4] immediate. 1766 * \return the index/position of the immediate. 1767 */ 1768static unsigned 1769alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, 1770 int x, int y, int z, int w) 1771{ 1772 union tgsi_immediate_data imm[4]; 1773 imm[0].Int = x; 1774 imm[1].Int = y; 1775 imm[2].Int = z; 1776 imm[3].Int = w; 1777 return alloc_immediate_4(emit, imm); 1778} 1779 1780 1781/** 1782 * Allocate a shader input to store a system value. 1783 */ 1784static unsigned 1785alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) 1786{ 1787 const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index; 1788 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1789 emit->system_value_indexes[index] = n; 1790 return n; 1791} 1792 1793 1794/** 1795 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10. 1796 */ 1797static boolean 1798emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, 1799 const struct tgsi_full_immediate *imm) 1800{ 1801 /* We don't actually emit any code here. We just save the 1802 * immediate values and emit them later. 1803 */ 1804 alloc_immediate_4(emit, imm->u); 1805 return TRUE; 1806} 1807 1808 1809/** 1810 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block 1811 * containing all the immediate values previously allocated 1812 * with alloc_immediate_4(). 1813 */ 1814static boolean 1815emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit) 1816{ 1817 VGPU10OpcodeToken0 token; 1818 1819 assert(!emit->immediates_emitted); 1820 1821 token.value = 0; 1822 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA; 1823 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER; 1824 1825 /* Note: no begin/end_emit_instruction() calls */ 1826 emit_dword(emit, token.value); 1827 emit_dword(emit, 2 + 4 * emit->num_immediates); 1828 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates); 1829 1830 emit->immediates_emitted = TRUE; 1831 1832 return TRUE; 1833} 1834 1835 1836/** 1837 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10 1838 * interpolation mode. 1839 * \return a VGPU10_INTERPOLATION_x value 1840 */ 1841static unsigned 1842translate_interpolation(const struct svga_shader_emitter_v10 *emit, 1843 unsigned interp, unsigned interpolate_loc) 1844{ 1845 if (interp == TGSI_INTERPOLATE_COLOR) { 1846 interp = emit->key.fs.flatshade ? 1847 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE; 1848 } 1849 1850 switch (interp) { 1851 case TGSI_INTERPOLATE_CONSTANT: 1852 return VGPU10_INTERPOLATION_CONSTANT; 1853 case TGSI_INTERPOLATE_LINEAR: 1854 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1855 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID : 1856 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; 1857 case TGSI_INTERPOLATE_PERSPECTIVE: 1858 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1859 VGPU10_INTERPOLATION_LINEAR_CENTROID : 1860 VGPU10_INTERPOLATION_LINEAR; 1861 default: 1862 assert(!"Unexpected interpolation mode"); 1863 return VGPU10_INTERPOLATION_CONSTANT; 1864 } 1865} 1866 1867 1868/** 1869 * Translate a TGSI property to VGPU10. 1870 * Don't emit any instructions yet, only need to gather the primitive property information. 1871 * The output primitive topology might be changed later. The final property instructions 1872 * will be emitted as part of the pre-helper code. 1873 */ 1874static boolean 1875emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, 1876 const struct tgsi_full_property *prop) 1877{ 1878 static const VGPU10_PRIMITIVE primType[] = { 1879 VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */ 1880 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */ 1881 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */ 1882 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */ 1883 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */ 1884 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */ 1885 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */ 1886 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */ 1887 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1888 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1889 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1890 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1891 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1892 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1893 }; 1894 1895 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = { 1896 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */ 1897 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */ 1898 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */ 1899 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */ 1900 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */ 1901 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */ 1902 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */ 1903 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */ 1904 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1905 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1906 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1907 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1908 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1909 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1910 }; 1911 1912 static const unsigned inputArraySize[] = { 1913 0, /* VGPU10_PRIMITIVE_UNDEFINED */ 1914 1, /* VGPU10_PRIMITIVE_POINT */ 1915 2, /* VGPU10_PRIMITIVE_LINE */ 1916 3, /* VGPU10_PRIMITIVE_TRIANGLE */ 1917 0, 1918 0, 1919 4, /* VGPU10_PRIMITIVE_LINE_ADJ */ 1920 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */ 1921 }; 1922 1923 switch (prop->Property.PropertyName) { 1924 case TGSI_PROPERTY_GS_INPUT_PRIM: 1925 assert(prop->u[0].Data < ARRAY_SIZE(primType)); 1926 emit->gs.prim_type = primType[prop->u[0].Data]; 1927 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED); 1928 emit->gs.input_size = inputArraySize[emit->gs.prim_type]; 1929 break; 1930 1931 case TGSI_PROPERTY_GS_OUTPUT_PRIM: 1932 assert(prop->u[0].Data < ARRAY_SIZE(primTopology)); 1933 emit->gs.prim_topology = primTopology[prop->u[0].Data]; 1934 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED); 1935 break; 1936 1937 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: 1938 emit->gs.max_out_vertices = prop->u[0].Data; 1939 break; 1940 1941 default: 1942 break; 1943 } 1944 1945 return TRUE; 1946} 1947 1948 1949static void 1950emit_property_instruction(struct svga_shader_emitter_v10 *emit, 1951 VGPU10OpcodeToken0 opcode0, unsigned nData, 1952 unsigned data) 1953{ 1954 begin_emit_instruction(emit); 1955 emit_dword(emit, opcode0.value); 1956 if (nData) 1957 emit_dword(emit, data); 1958 end_emit_instruction(emit); 1959} 1960 1961 1962/** 1963 * Emit property instructions 1964 */ 1965static void 1966emit_property_instructions(struct svga_shader_emitter_v10 *emit) 1967{ 1968 VGPU10OpcodeToken0 opcode0; 1969 1970 assert(emit->unit == PIPE_SHADER_GEOMETRY); 1971 1972 /* emit input primitive type declaration */ 1973 opcode0.value = 0; 1974 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE; 1975 opcode0.primitive = emit->gs.prim_type; 1976 emit_property_instruction(emit, opcode0, 0, 0); 1977 1978 /* emit output primitive topology declaration */ 1979 opcode0.value = 0; 1980 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; 1981 opcode0.primitiveTopology = emit->gs.prim_topology; 1982 emit_property_instruction(emit, opcode0, 0, 0); 1983 1984 /* emit max output vertices */ 1985 opcode0.value = 0; 1986 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT; 1987 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices); 1988} 1989 1990 1991/** 1992 * Emit a vgpu10 declaration "instruction". 1993 * \param index the register index 1994 * \param size array size of the operand. In most cases, it is 1, 1995 * but for inputs to geometry shader, the array size varies 1996 * depending on the primitive type. 1997 */ 1998static void 1999emit_decl_instruction(struct svga_shader_emitter_v10 *emit, 2000 VGPU10OpcodeToken0 opcode0, 2001 VGPU10OperandToken0 operand0, 2002 VGPU10NameToken name_token, 2003 unsigned index, unsigned size) 2004{ 2005 assert(opcode0.opcodeType); 2006 assert(operand0.mask); 2007 2008 begin_emit_instruction(emit); 2009 emit_dword(emit, opcode0.value); 2010 2011 emit_dword(emit, operand0.value); 2012 2013 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { 2014 /* Next token is the index of the register to declare */ 2015 emit_dword(emit, index); 2016 } 2017 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { 2018 /* Next token is the size of the register */ 2019 emit_dword(emit, size); 2020 2021 /* Followed by the index of the register */ 2022 emit_dword(emit, index); 2023 } 2024 2025 if (name_token.value) { 2026 emit_dword(emit, name_token.value); 2027 } 2028 2029 end_emit_instruction(emit); 2030} 2031 2032 2033/** 2034 * Emit the declaration for a shader input. 2035 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx 2036 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x 2037 * \param dim index dimension 2038 * \param index the input register index 2039 * \param size array size of the operand. In most cases, it is 1, 2040 * but for inputs to geometry shader, the array size varies 2041 * depending on the primitive type. 2042 * \param name one of VGPU10_NAME_x 2043 * \parma numComp number of components 2044 * \param selMode component selection mode 2045 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2046 * \param interpMode interpolation mode 2047 */ 2048static void 2049emit_input_declaration(struct svga_shader_emitter_v10 *emit, 2050 unsigned opcodeType, unsigned operandType, 2051 unsigned dim, unsigned index, unsigned size, 2052 unsigned name, unsigned numComp, 2053 unsigned selMode, unsigned usageMask, 2054 unsigned interpMode) 2055{ 2056 VGPU10OpcodeToken0 opcode0; 2057 VGPU10OperandToken0 operand0; 2058 VGPU10NameToken name_token; 2059 2060 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2061 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || 2062 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || 2063 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || 2064 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); 2065 assert(operandType == VGPU10_OPERAND_TYPE_INPUT || 2066 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID); 2067 assert(numComp <= VGPU10_OPERAND_4_COMPONENT); 2068 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); 2069 assert(dim <= VGPU10_OPERAND_INDEX_3D); 2070 assert(name == VGPU10_NAME_UNDEFINED || 2071 name == VGPU10_NAME_POSITION || 2072 name == VGPU10_NAME_INSTANCE_ID || 2073 name == VGPU10_NAME_VERTEX_ID || 2074 name == VGPU10_NAME_PRIMITIVE_ID || 2075 name == VGPU10_NAME_IS_FRONT_FACE); 2076 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || 2077 interpMode == VGPU10_INTERPOLATION_CONSTANT || 2078 interpMode == VGPU10_INTERPOLATION_LINEAR || 2079 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || 2080 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || 2081 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID); 2082 2083 check_register_index(emit, opcodeType, index); 2084 2085 opcode0.value = operand0.value = name_token.value = 0; 2086 2087 opcode0.opcodeType = opcodeType; 2088 opcode0.interpolationMode = interpMode; 2089 2090 operand0.operandType = operandType; 2091 operand0.numComponents = numComp; 2092 operand0.selectionMode = selMode; 2093 operand0.mask = usageMask; 2094 operand0.indexDimension = dim; 2095 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2096 if (dim == VGPU10_OPERAND_INDEX_2D) 2097 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2098 2099 name_token.name = name; 2100 2101 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); 2102} 2103 2104 2105/** 2106 * Emit the declaration for a shader output. 2107 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx 2108 * \param index the output register index 2109 * \param name one of VGPU10_NAME_x 2110 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2111 */ 2112static void 2113emit_output_declaration(struct svga_shader_emitter_v10 *emit, 2114 unsigned type, unsigned index, 2115 unsigned name, unsigned usageMask) 2116{ 2117 VGPU10OpcodeToken0 opcode0; 2118 VGPU10OperandToken0 operand0; 2119 VGPU10NameToken name_token; 2120 2121 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2122 assert(type == VGPU10_OPCODE_DCL_OUTPUT || 2123 type == VGPU10_OPCODE_DCL_OUTPUT_SGV || 2124 type == VGPU10_OPCODE_DCL_OUTPUT_SIV); 2125 assert(name == VGPU10_NAME_UNDEFINED || 2126 name == VGPU10_NAME_POSITION || 2127 name == VGPU10_NAME_PRIMITIVE_ID || 2128 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || 2129 name == VGPU10_NAME_CLIP_DISTANCE); 2130 2131 check_register_index(emit, type, index); 2132 2133 opcode0.value = operand0.value = name_token.value = 0; 2134 2135 opcode0.opcodeType = type; 2136 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; 2137 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2138 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2139 operand0.mask = usageMask; 2140 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2141 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2142 2143 name_token.name = name; 2144 2145 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); 2146} 2147 2148 2149/** 2150 * Emit the declaration for the fragment depth output. 2151 */ 2152static void 2153emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) 2154{ 2155 VGPU10OpcodeToken0 opcode0; 2156 VGPU10OperandToken0 operand0; 2157 VGPU10NameToken name_token; 2158 2159 assert(emit->unit == PIPE_SHADER_FRAGMENT); 2160 2161 opcode0.value = operand0.value = name_token.value = 0; 2162 2163 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; 2164 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 2165 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 2166 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 2167 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2168 2169 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); 2170} 2171 2172 2173/** 2174 * Emit the declaration for a system value input/output. 2175 */ 2176static void 2177emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, 2178 unsigned semantic_name, unsigned index) 2179{ 2180 switch (semantic_name) { 2181 case TGSI_SEMANTIC_INSTANCEID: 2182 index = alloc_system_value_index(emit, index); 2183 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2184 VGPU10_OPERAND_TYPE_INPUT, 2185 VGPU10_OPERAND_INDEX_1D, 2186 index, 1, 2187 VGPU10_NAME_INSTANCE_ID, 2188 VGPU10_OPERAND_4_COMPONENT, 2189 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2190 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2191 VGPU10_INTERPOLATION_UNDEFINED); 2192 break; 2193 case TGSI_SEMANTIC_VERTEXID: 2194 index = alloc_system_value_index(emit, index); 2195 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2196 VGPU10_OPERAND_TYPE_INPUT, 2197 VGPU10_OPERAND_INDEX_1D, 2198 index, 1, 2199 VGPU10_NAME_VERTEX_ID, 2200 VGPU10_OPERAND_4_COMPONENT, 2201 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2202 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2203 VGPU10_INTERPOLATION_UNDEFINED); 2204 break; 2205 default: 2206 ; /* XXX */ 2207 } 2208} 2209 2210/** 2211 * Translate a TGSI declaration to VGPU10. 2212 */ 2213static boolean 2214emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, 2215 const struct tgsi_full_declaration *decl) 2216{ 2217 switch (decl->Declaration.File) { 2218 case TGSI_FILE_INPUT: 2219 /* do nothing - see emit_input_declarations() */ 2220 return TRUE; 2221 2222 case TGSI_FILE_OUTPUT: 2223 assert(decl->Range.First == decl->Range.Last); 2224 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; 2225 return TRUE; 2226 2227 case TGSI_FILE_TEMPORARY: 2228 /* Don't declare the temps here. Just keep track of how many 2229 * and emit the declaration later. 2230 */ 2231 if (decl->Declaration.Array) { 2232 /* Indexed temporary array. Save the start index of the array 2233 * and the size of the array. 2234 */ 2235 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); 2236 unsigned i; 2237 2238 assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); 2239 2240 /* Save this array so we can emit the declaration for it later */ 2241 emit->temp_arrays[arrayID].start = decl->Range.First; 2242 emit->temp_arrays[arrayID].size = 2243 decl->Range.Last - decl->Range.First + 1; 2244 2245 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); 2246 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); 2247 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); 2248 2249 /* Fill in the temp_map entries for this array */ 2250 for (i = decl->Range.First; i <= decl->Range.Last; i++) { 2251 emit->temp_map[i].arrayId = arrayID; 2252 emit->temp_map[i].index = i - decl->Range.First; 2253 } 2254 } 2255 2256 /* for all temps, indexed or not, keep track of highest index */ 2257 emit->num_shader_temps = MAX2(emit->num_shader_temps, 2258 decl->Range.Last + 1); 2259 return TRUE; 2260 2261 case TGSI_FILE_CONSTANT: 2262 /* Don't declare constants here. Just keep track and emit later. */ 2263 { 2264 unsigned constbuf = 0, num_consts; 2265 if (decl->Declaration.Dimension) { 2266 constbuf = decl->Dim.Index2D; 2267 } 2268 /* We throw an assertion here when, in fact, the shader should never 2269 * have linked due to constbuf index out of bounds, so we shouldn't 2270 * have reached here. 2271 */ 2272 assert(constbuf < ARRAY_SIZE(emit->num_shader_consts)); 2273 2274 num_consts = MAX2(emit->num_shader_consts[constbuf], 2275 decl->Range.Last + 1); 2276 2277 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 2278 debug_printf("Warning: constant buffer is declared to size [%u]" 2279 " but [%u] is the limit.\n", 2280 num_consts, 2281 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2282 } 2283 /* The linker doesn't enforce the max UBO size so we clamp here */ 2284 emit->num_shader_consts[constbuf] = 2285 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2286 } 2287 return TRUE; 2288 2289 case TGSI_FILE_IMMEDIATE: 2290 assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); 2291 return FALSE; 2292 2293 case TGSI_FILE_SYSTEM_VALUE: 2294 emit_system_value_declaration(emit, decl->Semantic.Name, 2295 decl->Range.First); 2296 return TRUE; 2297 2298 case TGSI_FILE_SAMPLER: 2299 /* Don't declare samplers here. Just keep track and emit later. */ 2300 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); 2301 return TRUE; 2302 2303#if 0 2304 case TGSI_FILE_RESOURCE: 2305 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ 2306 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ 2307 assert(!"TGSI_FILE_RESOURCE not handled yet"); 2308 return FALSE; 2309#endif 2310 2311 case TGSI_FILE_ADDRESS: 2312 emit->num_address_regs = MAX2(emit->num_address_regs, 2313 decl->Range.Last + 1); 2314 return TRUE; 2315 2316 case TGSI_FILE_SAMPLER_VIEW: 2317 { 2318 unsigned unit = decl->Range.First; 2319 assert(decl->Range.First == decl->Range.Last); 2320 emit->sampler_target[unit] = decl->SamplerView.Resource; 2321 /* Note: we can ignore YZW return types for now */ 2322 emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX; 2323 } 2324 return TRUE; 2325 2326 default: 2327 assert(!"Unexpected type of declaration"); 2328 return FALSE; 2329 } 2330} 2331 2332 2333 2334/** 2335 * Emit all input declarations. 2336 */ 2337static boolean 2338emit_input_declarations(struct svga_shader_emitter_v10 *emit) 2339{ 2340 unsigned i; 2341 2342 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2343 2344 for (i = 0; i < emit->linkage.num_inputs; i++) { 2345 unsigned semantic_name = emit->info.input_semantic_name[i]; 2346 unsigned usage_mask = emit->info.input_usage_mask[i]; 2347 unsigned index = emit->linkage.input_map[i]; 2348 unsigned type, interpolationMode, name; 2349 2350 if (usage_mask == 0) 2351 continue; /* register is not actually used */ 2352 2353 if (semantic_name == TGSI_SEMANTIC_POSITION) { 2354 /* fragment position input */ 2355 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2356 interpolationMode = VGPU10_INTERPOLATION_LINEAR; 2357 name = VGPU10_NAME_POSITION; 2358 if (usage_mask & TGSI_WRITEMASK_W) { 2359 /* we need to replace use of 'w' with '1/w' */ 2360 emit->fs.fragcoord_input_index = i; 2361 } 2362 } 2363 else if (semantic_name == TGSI_SEMANTIC_FACE) { 2364 /* fragment front-facing input */ 2365 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2366 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2367 name = VGPU10_NAME_IS_FRONT_FACE; 2368 emit->fs.face_input_index = i; 2369 } 2370 else if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2371 /* primitive ID */ 2372 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2373 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2374 name = VGPU10_NAME_PRIMITIVE_ID; 2375 } 2376 else { 2377 /* general fragment input */ 2378 type = VGPU10_OPCODE_DCL_INPUT_PS; 2379 interpolationMode = 2380 translate_interpolation(emit, 2381 emit->info.input_interpolate[i], 2382 emit->info.input_interpolate_loc[i]); 2383 2384 /* keeps track if flat interpolation mode is being used */ 2385 emit->uses_flat_interp = emit->uses_flat_interp || 2386 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); 2387 2388 name = VGPU10_NAME_UNDEFINED; 2389 } 2390 2391 emit_input_declaration(emit, type, 2392 VGPU10_OPERAND_TYPE_INPUT, 2393 VGPU10_OPERAND_INDEX_1D, index, 1, 2394 name, 2395 VGPU10_OPERAND_4_COMPONENT, 2396 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2397 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2398 interpolationMode); 2399 } 2400 } 2401 else if (emit->unit == PIPE_SHADER_GEOMETRY) { 2402 2403 for (i = 0; i < emit->info.num_inputs; i++) { 2404 unsigned semantic_name = emit->info.input_semantic_name[i]; 2405 unsigned usage_mask = emit->info.input_usage_mask[i]; 2406 unsigned index = emit->linkage.input_map[i]; 2407 unsigned opcodeType, operandType; 2408 unsigned numComp, selMode; 2409 unsigned name; 2410 unsigned dim; 2411 2412 if (usage_mask == 0) 2413 continue; /* register is not actually used */ 2414 2415 opcodeType = VGPU10_OPCODE_DCL_INPUT; 2416 operandType = VGPU10_OPERAND_TYPE_INPUT; 2417 numComp = VGPU10_OPERAND_4_COMPONENT; 2418 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2419 name = VGPU10_NAME_UNDEFINED; 2420 2421 /* all geometry shader inputs are two dimensional except gl_PrimitiveID */ 2422 dim = VGPU10_OPERAND_INDEX_2D; 2423 2424 if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2425 /* Primitive ID */ 2426 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 2427 dim = VGPU10_OPERAND_INDEX_0D; 2428 numComp = VGPU10_OPERAND_0_COMPONENT; 2429 selMode = 0; 2430 2431 /* also save the register index so we can check for 2432 * primitive id when emit src register. We need to modify the 2433 * operand type, index dimension when emit primitive id src reg. 2434 */ 2435 emit->gs.prim_id_index = i; 2436 } 2437 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2438 /* vertex position input */ 2439 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; 2440 name = VGPU10_NAME_POSITION; 2441 } 2442 2443 emit_input_declaration(emit, opcodeType, operandType, 2444 dim, index, 2445 emit->gs.input_size, 2446 name, 2447 numComp, selMode, 2448 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2449 VGPU10_INTERPOLATION_UNDEFINED); 2450 } 2451 } 2452 else { 2453 assert(emit->unit == PIPE_SHADER_VERTEX); 2454 2455 for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) { 2456 unsigned usage_mask = emit->info.input_usage_mask[i]; 2457 unsigned index = i; 2458 2459 if (usage_mask == 0) 2460 continue; /* register is not actually used */ 2461 2462 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 2463 VGPU10_OPERAND_TYPE_INPUT, 2464 VGPU10_OPERAND_INDEX_1D, index, 1, 2465 VGPU10_NAME_UNDEFINED, 2466 VGPU10_OPERAND_4_COMPONENT, 2467 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2468 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2469 VGPU10_INTERPOLATION_UNDEFINED); 2470 } 2471 } 2472 2473 return TRUE; 2474} 2475 2476 2477/** 2478 * Emit all output declarations. 2479 */ 2480static boolean 2481emit_output_declarations(struct svga_shader_emitter_v10 *emit) 2482{ 2483 unsigned i; 2484 2485 for (i = 0; i < emit->info.num_outputs; i++) { 2486 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ 2487 const unsigned semantic_name = emit->info.output_semantic_name[i]; 2488 const unsigned semantic_index = emit->info.output_semantic_index[i]; 2489 unsigned index = i; 2490 2491 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2492 if (semantic_name == TGSI_SEMANTIC_COLOR) { 2493 assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index)); 2494 2495 emit->fs.color_out_index[semantic_index] = index; 2496 2497 /* The semantic index is the shader's color output/buffer index */ 2498 emit_output_declaration(emit, 2499 VGPU10_OPCODE_DCL_OUTPUT, semantic_index, 2500 VGPU10_NAME_UNDEFINED, 2501 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2502 2503 if (semantic_index == 0) { 2504 if (emit->key.fs.write_color0_to_n_cbufs > 1) { 2505 /* Emit declarations for the additional color outputs 2506 * for broadcasting. 2507 */ 2508 unsigned j; 2509 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { 2510 /* Allocate a new output index */ 2511 unsigned idx = emit->info.num_outputs + j - 1; 2512 emit->fs.color_out_index[j] = idx; 2513 emit_output_declaration(emit, 2514 VGPU10_OPCODE_DCL_OUTPUT, idx, 2515 VGPU10_NAME_UNDEFINED, 2516 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2517 emit->info.output_semantic_index[idx] = j; 2518 } 2519 } 2520 } 2521 else { 2522 assert(!emit->key.fs.write_color0_to_n_cbufs); 2523 } 2524 } 2525 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2526 /* Fragment depth output */ 2527 emit_fragdepth_output_declaration(emit); 2528 } 2529 else { 2530 assert(!"Bad output semantic name"); 2531 } 2532 } 2533 else { 2534 /* VS or GS */ 2535 unsigned name, type; 2536 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2537 2538 switch (semantic_name) { 2539 case TGSI_SEMANTIC_POSITION: 2540 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2541 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2542 name = VGPU10_NAME_POSITION; 2543 /* Save the index of the vertex position output register */ 2544 emit->vposition.out_index = index; 2545 break; 2546 case TGSI_SEMANTIC_CLIPDIST: 2547 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2548 name = VGPU10_NAME_CLIP_DISTANCE; 2549 /* save the starting index of the clip distance output register */ 2550 if (semantic_index == 0) 2551 emit->clip_dist_out_index = index; 2552 writemask = emit->output_usage_mask[index]; 2553 writemask = apply_clip_plane_mask(emit, writemask, semantic_index); 2554 if (writemask == 0x0) { 2555 continue; /* discard this do-nothing declaration */ 2556 } 2557 break; 2558 case TGSI_SEMANTIC_PRIMID: 2559 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2560 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2561 name = VGPU10_NAME_PRIMITIVE_ID; 2562 break; 2563 case TGSI_SEMANTIC_LAYER: 2564 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2565 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2566 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; 2567 break; 2568 case TGSI_SEMANTIC_CLIPVERTEX: 2569 type = VGPU10_OPCODE_DCL_OUTPUT; 2570 name = VGPU10_NAME_UNDEFINED; 2571 emit->clip_vertex_out_index = index; 2572 break; 2573 default: 2574 /* generic output */ 2575 type = VGPU10_OPCODE_DCL_OUTPUT; 2576 name = VGPU10_NAME_UNDEFINED; 2577 } 2578 2579 emit_output_declaration(emit, type, index, name, writemask); 2580 } 2581 } 2582 2583 if (emit->vposition.so_index != INVALID_INDEX && 2584 emit->vposition.out_index != INVALID_INDEX) { 2585 2586 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2587 2588 /* Emit the declaration for the non-adjusted vertex position 2589 * for stream output purpose 2590 */ 2591 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2592 emit->vposition.so_index, 2593 VGPU10_NAME_UNDEFINED, 2594 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2595 } 2596 2597 if (emit->clip_dist_so_index != INVALID_INDEX && 2598 emit->clip_dist_out_index != INVALID_INDEX) { 2599 2600 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2601 2602 /* Emit the declaration for the clip distance shadow copy which 2603 * will be used for stream output purpose and for clip distance 2604 * varying variable 2605 */ 2606 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2607 emit->clip_dist_so_index, 2608 VGPU10_NAME_UNDEFINED, 2609 emit->output_usage_mask[emit->clip_dist_out_index]); 2610 2611 if (emit->info.num_written_clipdistance > 4) { 2612 /* for the second clip distance register, each handles 4 planes */ 2613 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2614 emit->clip_dist_so_index + 1, 2615 VGPU10_NAME_UNDEFINED, 2616 emit->output_usage_mask[emit->clip_dist_out_index+1]); 2617 } 2618 } 2619 2620 return TRUE; 2621} 2622 2623 2624/** 2625 * Emit the declaration for the temporary registers. 2626 */ 2627static boolean 2628emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) 2629{ 2630 unsigned total_temps, reg, i; 2631 2632 total_temps = emit->num_shader_temps; 2633 2634 /* If there is indirect access to non-indexable temps in the shader, 2635 * convert those temps to indexable temps. This works around a bug 2636 * in the GLSL->TGSI translator exposed in piglit test 2637 * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test. 2638 * Internal temps added by the driver remain as non-indexable temps. 2639 */ 2640 if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) && 2641 emit->num_temp_arrays == 0) { 2642 unsigned arrayID; 2643 2644 arrayID = 1; 2645 emit->num_temp_arrays = arrayID + 1; 2646 emit->temp_arrays[arrayID].start = 0; 2647 emit->temp_arrays[arrayID].size = total_temps; 2648 2649 /* Fill in the temp_map entries for this temp array */ 2650 for (i = 0; i < total_temps; i++) { 2651 emit->temp_map[i].arrayId = arrayID; 2652 emit->temp_map[i].index = i; 2653 } 2654 } 2655 2656 /* Allocate extra temps for specially-implemented instructions, 2657 * such as LIT. 2658 */ 2659 total_temps += MAX_INTERNAL_TEMPS; 2660 2661 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { 2662 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || 2663 emit->key.clip_plane_enable || 2664 emit->vposition.so_index != INVALID_INDEX) { 2665 emit->vposition.tmp_index = total_temps; 2666 total_temps += 1; 2667 } 2668 2669 if (emit->unit == PIPE_SHADER_VERTEX) { 2670 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | 2671 emit->key.vs.adjust_attrib_itof | 2672 emit->key.vs.adjust_attrib_utof | 2673 emit->key.vs.attrib_is_bgra | 2674 emit->key.vs.attrib_puint_to_snorm | 2675 emit->key.vs.attrib_puint_to_uscaled | 2676 emit->key.vs.attrib_puint_to_sscaled); 2677 while (attrib_mask) { 2678 unsigned index = u_bit_scan(&attrib_mask); 2679 emit->vs.adjusted_input[index] = total_temps++; 2680 } 2681 } 2682 2683 if (emit->clip_mode == CLIP_DISTANCE) { 2684 /* We need to write the clip distance to a temporary register 2685 * first. Then it will be copied to the shadow copy for 2686 * the clip distance varying variable and stream output purpose. 2687 * It will also be copied to the actual CLIPDIST register 2688 * according to the enabled clip planes 2689 */ 2690 emit->clip_dist_tmp_index = total_temps++; 2691 if (emit->info.num_written_clipdistance > 4) 2692 total_temps++; /* second clip register */ 2693 } 2694 else if (emit->clip_mode == CLIP_VERTEX) { 2695 /* We need to convert the TGSI CLIPVERTEX output to one or more 2696 * clip distances. Allocate a temp reg for the clipvertex here. 2697 */ 2698 assert(emit->info.writes_clipvertex > 0); 2699 emit->clip_vertex_tmp_index = total_temps; 2700 total_temps++; 2701 } 2702 } 2703 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 2704 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS || 2705 emit->key.fs.white_fragments || 2706 emit->key.fs.write_color0_to_n_cbufs > 1) { 2707 /* Allocate a temp to hold the output color */ 2708 emit->fs.color_tmp_index = total_temps; 2709 total_temps += 1; 2710 } 2711 2712 if (emit->fs.face_input_index != INVALID_INDEX) { 2713 /* Allocate a temp for the +/-1 face register */ 2714 emit->fs.face_tmp_index = total_temps; 2715 total_temps += 1; 2716 } 2717 2718 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 2719 /* Allocate a temp for modified fragment position register */ 2720 emit->fs.fragcoord_tmp_index = total_temps; 2721 total_temps += 1; 2722 } 2723 } 2724 2725 for (i = 0; i < emit->num_address_regs; i++) { 2726 emit->address_reg_index[i] = total_temps++; 2727 } 2728 2729 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10 2730 * temp indexes. Basically, we compact all the non-array temp register 2731 * indexes into a consecutive series. 2732 * 2733 * Before, we may have some TGSI declarations like: 2734 * DCL TEMP[0..1], LOCAL 2735 * DCL TEMP[2..4], ARRAY(1), LOCAL 2736 * DCL TEMP[5..7], ARRAY(2), LOCAL 2737 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things 2738 * 2739 * After, we'll have a map like this: 2740 * temp_map[0] = { array 0, index 0 } 2741 * temp_map[1] = { array 0, index 1 } 2742 * temp_map[2] = { array 1, index 0 } 2743 * temp_map[3] = { array 1, index 1 } 2744 * temp_map[4] = { array 1, index 2 } 2745 * temp_map[5] = { array 2, index 0 } 2746 * temp_map[6] = { array 2, index 1 } 2747 * temp_map[7] = { array 2, index 2 } 2748 * temp_map[8] = { array 0, index 2 } 2749 * temp_map[9] = { array 0, index 3 } 2750 * 2751 * We'll declare two arrays of 3 elements, plus a set of four non-indexed 2752 * temps numbered 0..3 2753 * 2754 * Any time we emit a temporary register index, we'll have to use the 2755 * temp_map[] table to convert the TGSI index to the VGPU10 index. 2756 * 2757 * Finally, we recompute the total_temps value here. 2758 */ 2759 reg = 0; 2760 for (i = 0; i < total_temps; i++) { 2761 if (emit->temp_map[i].arrayId == 0) { 2762 emit->temp_map[i].index = reg++; 2763 } 2764 } 2765 2766 if (0) { 2767 debug_printf("total_temps %u\n", total_temps); 2768 for (i = 0; i < total_temps; i++) { 2769 debug_printf("temp %u -> array %u index %u\n", 2770 i, emit->temp_map[i].arrayId, emit->temp_map[i].index); 2771 } 2772 } 2773 2774 total_temps = reg; 2775 2776 /* Emit declaration of ordinary temp registers */ 2777 if (total_temps > 0) { 2778 VGPU10OpcodeToken0 opcode0; 2779 2780 opcode0.value = 0; 2781 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS; 2782 2783 begin_emit_instruction(emit); 2784 emit_dword(emit, opcode0.value); 2785 emit_dword(emit, total_temps); 2786 end_emit_instruction(emit); 2787 } 2788 2789 /* Emit declarations for indexable temp arrays. Skip 0th entry since 2790 * it's unused. 2791 */ 2792 for (i = 1; i < emit->num_temp_arrays; i++) { 2793 unsigned num_temps = emit->temp_arrays[i].size; 2794 2795 if (num_temps > 0) { 2796 VGPU10OpcodeToken0 opcode0; 2797 2798 opcode0.value = 0; 2799 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP; 2800 2801 begin_emit_instruction(emit); 2802 emit_dword(emit, opcode0.value); 2803 emit_dword(emit, i); /* which array */ 2804 emit_dword(emit, num_temps); 2805 emit_dword(emit, 4); /* num components */ 2806 end_emit_instruction(emit); 2807 2808 total_temps += num_temps; 2809 } 2810 } 2811 2812 /* Check that the grand total of all regular and indexed temps is 2813 * under the limit. 2814 */ 2815 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1); 2816 2817 return TRUE; 2818} 2819 2820 2821static boolean 2822emit_constant_declaration(struct svga_shader_emitter_v10 *emit) 2823{ 2824 VGPU10OpcodeToken0 opcode0; 2825 VGPU10OperandToken0 operand0; 2826 unsigned total_consts, i; 2827 2828 opcode0.value = 0; 2829 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER; 2830 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED; 2831 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */ 2832 2833 operand0.value = 0; 2834 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2835 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D; 2836 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2837 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2838 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 2839 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 2840 operand0.swizzleX = 0; 2841 operand0.swizzleY = 1; 2842 operand0.swizzleZ = 2; 2843 operand0.swizzleW = 3; 2844 2845 /** 2846 * Emit declaration for constant buffer [0]. We also allocate 2847 * room for the extra constants here. 2848 */ 2849 total_consts = emit->num_shader_consts[0]; 2850 2851 /* Now, allocate constant slots for the "extra" constants */ 2852 2853 /* Vertex position scale/translation */ 2854 if (emit->vposition.need_prescale) { 2855 emit->vposition.prescale_scale_index = total_consts++; 2856 emit->vposition.prescale_trans_index = total_consts++; 2857 } 2858 2859 if (emit->unit == PIPE_SHADER_VERTEX) { 2860 if (emit->key.vs.undo_viewport) { 2861 emit->vs.viewport_index = total_consts++; 2862 } 2863 } 2864 2865 /* user-defined clip planes */ 2866 if (emit->key.clip_plane_enable) { 2867 unsigned n = util_bitcount(emit->key.clip_plane_enable); 2868 assert(emit->unit == PIPE_SHADER_VERTEX || 2869 emit->unit == PIPE_SHADER_GEOMETRY); 2870 for (i = 0; i < n; i++) { 2871 emit->clip_plane_const[i] = total_consts++; 2872 } 2873 } 2874 2875 /* Texcoord scale factors for RECT textures */ 2876 { 2877 for (i = 0; i < emit->num_samplers; i++) { 2878 if (emit->key.tex[i].unnormalized) { 2879 emit->texcoord_scale_index[i] = total_consts++; 2880 } 2881 } 2882 } 2883 2884 /* Texture buffer sizes */ 2885 for (i = 0; i < emit->num_samplers; i++) { 2886 if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) { 2887 emit->texture_buffer_size_index[i] = total_consts++; 2888 } 2889 } 2890 2891 if (total_consts > 0) { 2892 begin_emit_instruction(emit); 2893 emit_dword(emit, opcode0.value); 2894 emit_dword(emit, operand0.value); 2895 emit_dword(emit, 0); /* which const buffer slot */ 2896 emit_dword(emit, total_consts); 2897 end_emit_instruction(emit); 2898 } 2899 2900 /* Declare remaining constant buffers (UBOs) */ 2901 for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) { 2902 if (emit->num_shader_consts[i] > 0) { 2903 begin_emit_instruction(emit); 2904 emit_dword(emit, opcode0.value); 2905 emit_dword(emit, operand0.value); 2906 emit_dword(emit, i); /* which const buffer slot */ 2907 emit_dword(emit, emit->num_shader_consts[i]); 2908 end_emit_instruction(emit); 2909 } 2910 } 2911 2912 return TRUE; 2913} 2914 2915 2916/** 2917 * Emit declarations for samplers. 2918 */ 2919static boolean 2920emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) 2921{ 2922 unsigned i; 2923 2924 for (i = 0; i < emit->num_samplers; i++) { 2925 VGPU10OpcodeToken0 opcode0; 2926 VGPU10OperandToken0 operand0; 2927 2928 opcode0.value = 0; 2929 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER; 2930 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT; 2931 2932 operand0.value = 0; 2933 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 2934 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 2935 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2936 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2937 2938 begin_emit_instruction(emit); 2939 emit_dword(emit, opcode0.value); 2940 emit_dword(emit, operand0.value); 2941 emit_dword(emit, i); 2942 end_emit_instruction(emit); 2943 } 2944 2945 return TRUE; 2946} 2947 2948 2949/** 2950 * Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x. 2951 */ 2952static unsigned 2953tgsi_texture_to_resource_dimension(unsigned target, boolean is_array) 2954{ 2955 switch (target) { 2956 case TGSI_TEXTURE_BUFFER: 2957 return VGPU10_RESOURCE_DIMENSION_BUFFER; 2958 case TGSI_TEXTURE_1D: 2959 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2960 case TGSI_TEXTURE_2D: 2961 case TGSI_TEXTURE_RECT: 2962 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2963 case TGSI_TEXTURE_3D: 2964 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; 2965 case TGSI_TEXTURE_CUBE: 2966 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 2967 case TGSI_TEXTURE_SHADOW1D: 2968 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2969 case TGSI_TEXTURE_SHADOW2D: 2970 case TGSI_TEXTURE_SHADOWRECT: 2971 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2972 case TGSI_TEXTURE_1D_ARRAY: 2973 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2974 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY 2975 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2976 case TGSI_TEXTURE_2D_ARRAY: 2977 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2978 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY 2979 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2980 case TGSI_TEXTURE_SHADOWCUBE: 2981 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 2982 case TGSI_TEXTURE_2D_MSAA: 2983 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 2984 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2985 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY 2986 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 2987 case TGSI_TEXTURE_CUBE_ARRAY: 2988 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY; 2989 default: 2990 assert(!"Unexpected resource type"); 2991 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2992 } 2993} 2994 2995 2996/** 2997 * Given a tgsi_return_type, return true iff it is an integer type. 2998 */ 2999static boolean 3000is_integer_type(enum tgsi_return_type type) 3001{ 3002 switch (type) { 3003 case TGSI_RETURN_TYPE_SINT: 3004 case TGSI_RETURN_TYPE_UINT: 3005 return TRUE; 3006 case TGSI_RETURN_TYPE_FLOAT: 3007 case TGSI_RETURN_TYPE_UNORM: 3008 case TGSI_RETURN_TYPE_SNORM: 3009 return FALSE; 3010 case TGSI_RETURN_TYPE_COUNT: 3011 default: 3012 assert(!"is_integer_type: Unknown tgsi_return_type"); 3013 return FALSE; 3014 } 3015} 3016 3017 3018/** 3019 * Emit declarations for resources. 3020 * XXX When we're sure that all TGSI shaders will be generated with 3021 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may 3022 * rework this code. 3023 */ 3024static boolean 3025emit_resource_declarations(struct svga_shader_emitter_v10 *emit) 3026{ 3027 unsigned i; 3028 3029 /* Emit resource decl for each sampler */ 3030 for (i = 0; i < emit->num_samplers; i++) { 3031 VGPU10OpcodeToken0 opcode0; 3032 VGPU10OperandToken0 operand0; 3033 VGPU10ResourceReturnTypeToken return_type; 3034 VGPU10_RESOURCE_RETURN_TYPE rt; 3035 3036 opcode0.value = 0; 3037 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; 3038 opcode0.resourceDimension = 3039 tgsi_texture_to_resource_dimension(emit->sampler_target[i], 3040 emit->key.tex[i].is_array); 3041 operand0.value = 0; 3042 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 3043 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 3044 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 3045 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3046 3047#if 1 3048 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */ 3049 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1); 3050 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1); 3051 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1); 3052 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1); 3053 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1); 3054 assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT); 3055 rt = emit->sampler_return_type[i] + 1; 3056#else 3057 switch (emit->sampler_return_type[i]) { 3058 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break; 3059 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break; 3060 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break; 3061 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break; 3062 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break; 3063 case TGSI_RETURN_TYPE_COUNT: 3064 default: 3065 rt = VGPU10_RETURN_TYPE_FLOAT; 3066 assert(!"emit_resource_declarations: Unknown tgsi_return_type"); 3067 } 3068#endif 3069 3070 return_type.value = 0; 3071 return_type.component0 = rt; 3072 return_type.component1 = rt; 3073 return_type.component2 = rt; 3074 return_type.component3 = rt; 3075 3076 begin_emit_instruction(emit); 3077 emit_dword(emit, opcode0.value); 3078 emit_dword(emit, operand0.value); 3079 emit_dword(emit, i); 3080 emit_dword(emit, return_type.value); 3081 end_emit_instruction(emit); 3082 } 3083 3084 return TRUE; 3085} 3086 3087static void 3088emit_instruction_op1(struct svga_shader_emitter_v10 *emit, 3089 unsigned opcode, 3090 const struct tgsi_full_dst_register *dst, 3091 const struct tgsi_full_src_register *src, 3092 boolean saturate) 3093{ 3094 begin_emit_instruction(emit); 3095 emit_opcode(emit, opcode, saturate); 3096 emit_dst_register(emit, dst); 3097 emit_src_register(emit, src); 3098 end_emit_instruction(emit); 3099} 3100 3101static void 3102emit_instruction_op2(struct svga_shader_emitter_v10 *emit, 3103 unsigned opcode, 3104 const struct tgsi_full_dst_register *dst, 3105 const struct tgsi_full_src_register *src1, 3106 const struct tgsi_full_src_register *src2, 3107 boolean saturate) 3108{ 3109 begin_emit_instruction(emit); 3110 emit_opcode(emit, opcode, saturate); 3111 emit_dst_register(emit, dst); 3112 emit_src_register(emit, src1); 3113 emit_src_register(emit, src2); 3114 end_emit_instruction(emit); 3115} 3116 3117static void 3118emit_instruction_op3(struct svga_shader_emitter_v10 *emit, 3119 unsigned opcode, 3120 const struct tgsi_full_dst_register *dst, 3121 const struct tgsi_full_src_register *src1, 3122 const struct tgsi_full_src_register *src2, 3123 const struct tgsi_full_src_register *src3, 3124 boolean saturate) 3125{ 3126 begin_emit_instruction(emit); 3127 emit_opcode(emit, opcode, saturate); 3128 emit_dst_register(emit, dst); 3129 emit_src_register(emit, src1); 3130 emit_src_register(emit, src2); 3131 emit_src_register(emit, src3); 3132 end_emit_instruction(emit); 3133} 3134 3135/** 3136 * Emit the actual clip distance instructions to be used for clipping 3137 * by copying the clip distance from the temporary registers to the 3138 * CLIPDIST registers written with the enabled planes mask. 3139 * Also copy the clip distance from the temporary to the clip distance 3140 * shadow copy register which will be referenced by the input shader 3141 */ 3142static void 3143emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) 3144{ 3145 struct tgsi_full_src_register tmp_clip_dist_src; 3146 struct tgsi_full_dst_register clip_dist_dst; 3147 3148 unsigned i; 3149 unsigned clip_plane_enable = emit->key.clip_plane_enable; 3150 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; 3151 int num_written_clipdist = emit->info.num_written_clipdistance; 3152 3153 assert(emit->clip_dist_out_index != INVALID_INDEX); 3154 assert(emit->clip_dist_tmp_index != INVALID_INDEX); 3155 3156 /** 3157 * Temporary reset the temporary clip dist register index so 3158 * that the copy to the real clip dist register will not 3159 * attempt to copy to the temporary register again 3160 */ 3161 emit->clip_dist_tmp_index = INVALID_INDEX; 3162 3163 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) { 3164 3165 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); 3166 3167 /** 3168 * copy to the shadow copy for use by varying variable and 3169 * stream output. All clip distances 3170 * will be written regardless of the enabled clipping planes. 3171 */ 3172 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3173 emit->clip_dist_so_index + i); 3174 3175 /* MOV clip_dist_so, tmp_clip_dist */ 3176 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3177 &tmp_clip_dist_src, FALSE); 3178 3179 /** 3180 * copy those clip distances to enabled clipping planes 3181 * to CLIPDIST registers for clipping 3182 */ 3183 if (clip_plane_enable & 0xf) { 3184 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3185 emit->clip_dist_out_index + i); 3186 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf); 3187 3188 /* MOV CLIPDIST, tmp_clip_dist */ 3189 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3190 &tmp_clip_dist_src, FALSE); 3191 } 3192 /* four clip planes per clip register */ 3193 clip_plane_enable >>= 4; 3194 } 3195 /** 3196 * set the temporary clip dist register index back to the 3197 * temporary index for the next vertex 3198 */ 3199 emit->clip_dist_tmp_index = clip_dist_tmp_index; 3200} 3201 3202/* Declare clip distance output registers for user-defined clip planes 3203 * or the TGSI_CLIPVERTEX output. 3204 */ 3205static void 3206emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) 3207{ 3208 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3209 unsigned index = emit->num_outputs; 3210 unsigned plane_mask; 3211 3212 assert(emit->unit == PIPE_SHADER_VERTEX || 3213 emit->unit == PIPE_SHADER_GEOMETRY); 3214 assert(num_clip_planes <= 8); 3215 3216 if (emit->clip_mode != CLIP_LEGACY && 3217 emit->clip_mode != CLIP_VERTEX) { 3218 return; 3219 } 3220 3221 if (num_clip_planes == 0) 3222 return; 3223 3224 /* Declare one or two clip output registers. The number of components 3225 * in the mask reflects the number of clip planes. For example, if 5 3226 * clip planes are needed, we'll declare outputs similar to: 3227 * dcl_output_siv o2.xyzw, clip_distance 3228 * dcl_output_siv o3.x, clip_distance 3229 */ 3230 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */ 3231 3232 plane_mask = (1 << num_clip_planes) - 1; 3233 if (plane_mask & 0xf) { 3234 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3235 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, 3236 VGPU10_NAME_CLIP_DISTANCE, cmask); 3237 emit->num_outputs++; 3238 } 3239 if (plane_mask & 0xf0) { 3240 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3241 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, 3242 VGPU10_NAME_CLIP_DISTANCE, cmask); 3243 emit->num_outputs++; 3244 } 3245} 3246 3247 3248/** 3249 * Emit the instructions for writing to the clip distance registers 3250 * to handle legacy/automatic clip planes. 3251 * For each clip plane, the distance is the dot product of the vertex 3252 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients. 3253 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE 3254 * output registers already declared. 3255 */ 3256static void 3257emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, 3258 unsigned vpos_tmp_index) 3259{ 3260 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3261 3262 assert(emit->clip_mode == CLIP_LEGACY); 3263 assert(num_clip_planes <= 8); 3264 3265 assert(emit->unit == PIPE_SHADER_VERTEX || 3266 emit->unit == PIPE_SHADER_GEOMETRY); 3267 3268 for (i = 0; i < num_clip_planes; i++) { 3269 struct tgsi_full_dst_register dst; 3270 struct tgsi_full_src_register plane_src, vpos_src; 3271 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3272 unsigned comp = i % 4; 3273 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3274 3275 /* create dst, src regs */ 3276 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3277 dst = writemask_dst(&dst, writemask); 3278 3279 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3280 vpos_src = make_src_temp_reg(vpos_tmp_index); 3281 3282 /* DP4 clip_dist, plane, vpos */ 3283 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3284 &plane_src, &vpos_src, FALSE); 3285 } 3286} 3287 3288 3289/** 3290 * Emit the instructions for computing the clip distance results from 3291 * the clip vertex temporary. 3292 * For each clip plane, the distance is the dot product of the clip vertex 3293 * position (found in a temp reg) and the clip plane coefficients. 3294 */ 3295static void 3296emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) 3297{ 3298 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable); 3299 unsigned i; 3300 struct tgsi_full_dst_register dst; 3301 struct tgsi_full_src_register clipvert_src; 3302 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index; 3303 3304 assert(emit->unit == PIPE_SHADER_VERTEX || 3305 emit->unit == PIPE_SHADER_GEOMETRY); 3306 3307 assert(emit->clip_mode == CLIP_VERTEX); 3308 3309 clipvert_src = make_src_temp_reg(clip_vertex_tmp); 3310 3311 for (i = 0; i < num_clip; i++) { 3312 struct tgsi_full_src_register plane_src; 3313 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3314 unsigned comp = i % 4; 3315 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3316 3317 /* create dst, src regs */ 3318 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3319 dst = writemask_dst(&dst, writemask); 3320 3321 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3322 3323 /* DP4 clip_dist, plane, vpos */ 3324 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3325 &plane_src, &clipvert_src, FALSE); 3326 } 3327 3328 /* copy temporary clip vertex register to the clip vertex register */ 3329 3330 assert(emit->clip_vertex_out_index != INVALID_INDEX); 3331 3332 /** 3333 * temporary reset the temporary clip vertex register index so 3334 * that copy to the clip vertex register will not attempt 3335 * to copy to the temporary register again 3336 */ 3337 emit->clip_vertex_tmp_index = INVALID_INDEX; 3338 3339 /* MOV clip_vertex, clip_vertex_tmp */ 3340 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index); 3341 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 3342 &dst, &clipvert_src, FALSE); 3343 3344 /** 3345 * set the temporary clip vertex register index back to the 3346 * temporary index for the next vertex 3347 */ 3348 emit->clip_vertex_tmp_index = clip_vertex_tmp; 3349} 3350 3351/** 3352 * Emit code to convert RGBA to BGRA 3353 */ 3354static void 3355emit_swap_r_b(struct svga_shader_emitter_v10 *emit, 3356 const struct tgsi_full_dst_register *dst, 3357 const struct tgsi_full_src_register *src) 3358{ 3359 struct tgsi_full_src_register bgra_src = 3360 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W); 3361 3362 begin_emit_instruction(emit); 3363 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 3364 emit_dst_register(emit, dst); 3365 emit_src_register(emit, &bgra_src); 3366 end_emit_instruction(emit); 3367} 3368 3369 3370/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */ 3371static void 3372emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit, 3373 const struct tgsi_full_dst_register *dst, 3374 const struct tgsi_full_src_register *src) 3375{ 3376 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f); 3377 struct tgsi_full_src_register two = 3378 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f); 3379 struct tgsi_full_src_register neg_two = 3380 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 3381 3382 unsigned val_tmp = get_temp_index(emit); 3383 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp); 3384 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp); 3385 3386 unsigned bias_tmp = get_temp_index(emit); 3387 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp); 3388 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp); 3389 3390 /* val = src * 2.0 */ 3391 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, 3392 src, &two, FALSE); 3393 3394 /* bias = src > 0.5 */ 3395 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, 3396 src, &half, FALSE); 3397 3398 /* bias = bias & -2.0 */ 3399 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst, 3400 &bias_src, &neg_two, FALSE); 3401 3402 /* dst = val + bias */ 3403 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst, 3404 &val_src, &bias_src, FALSE); 3405 3406 free_temp_indexes(emit); 3407} 3408 3409 3410/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */ 3411static void 3412emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit, 3413 const struct tgsi_full_dst_register *dst, 3414 const struct tgsi_full_src_register *src) 3415{ 3416 struct tgsi_full_src_register scale = 3417 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f); 3418 3419 /* dst = src * scale */ 3420 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE); 3421} 3422 3423 3424/** Convert from R32_UINT to 10_10_10_2_sscaled */ 3425static void 3426emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, 3427 const struct tgsi_full_dst_register *dst, 3428 const struct tgsi_full_src_register *src) 3429{ 3430 struct tgsi_full_src_register lshift = 3431 make_immediate_reg_int4(emit, 22, 12, 2, 0); 3432 struct tgsi_full_src_register rshift = 3433 make_immediate_reg_int4(emit, 22, 22, 22, 30); 3434 3435 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X); 3436 3437 unsigned tmp = get_temp_index(emit); 3438 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3439 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3440 3441 /* 3442 * r = (pixel << 22) >> 22; # signed int in [511, -512] 3443 * g = (pixel << 12) >> 22; # signed int in [511, -512] 3444 * b = (pixel << 2) >> 22; # signed int in [511, -512] 3445 * a = (pixel << 0) >> 30; # signed int in [1, -2] 3446 * dst = i_to_f(r,g,b,a); # convert to float 3447 */ 3448 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst, 3449 &src_xxxx, &lshift, FALSE); 3450 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst, 3451 &tmp_src, &rshift, FALSE); 3452 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE); 3453 3454 free_temp_indexes(emit); 3455} 3456 3457 3458/** 3459 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction. 3460 */ 3461static boolean 3462emit_arl_uarl(struct svga_shader_emitter_v10 *emit, 3463 const struct tgsi_full_instruction *inst) 3464{ 3465 unsigned index = inst->Dst[0].Register.Index; 3466 struct tgsi_full_dst_register dst; 3467 unsigned opcode; 3468 3469 assert(index < MAX_VGPU10_ADDR_REGS); 3470 dst = make_dst_temp_reg(emit->address_reg_index[index]); 3471 3472 /* ARL dst, s0 3473 * Translates into: 3474 * FTOI address_tmp, s0 3475 * 3476 * UARL dst, s0 3477 * Translates into: 3478 * MOV address_tmp, s0 3479 */ 3480 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL) 3481 opcode = VGPU10_OPCODE_FTOI; 3482 else 3483 opcode = VGPU10_OPCODE_MOV; 3484 3485 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE); 3486 3487 return TRUE; 3488} 3489 3490 3491/** 3492 * Emit code for TGSI_OPCODE_CAL instruction. 3493 */ 3494static boolean 3495emit_cal(struct svga_shader_emitter_v10 *emit, 3496 const struct tgsi_full_instruction *inst) 3497{ 3498 unsigned label = inst->Label.Label; 3499 VGPU10OperandToken0 operand; 3500 operand.value = 0; 3501 operand.operandType = VGPU10_OPERAND_TYPE_LABEL; 3502 3503 begin_emit_instruction(emit); 3504 emit_dword(emit, operand.value); 3505 emit_dword(emit, label); 3506 end_emit_instruction(emit); 3507 3508 return TRUE; 3509} 3510 3511 3512/** 3513 * Emit code for TGSI_OPCODE_IABS instruction. 3514 */ 3515static boolean 3516emit_iabs(struct svga_shader_emitter_v10 *emit, 3517 const struct tgsi_full_instruction *inst) 3518{ 3519 /* dst.x = (src0.x < 0) ? -src0.x : src0.x 3520 * dst.y = (src0.y < 0) ? -src0.y : src0.y 3521 * dst.z = (src0.z < 0) ? -src0.z : src0.z 3522 * dst.w = (src0.w < 0) ? -src0.w : src0.w 3523 * 3524 * Translates into 3525 * IMAX dst, src, neg(src) 3526 */ 3527 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); 3528 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], 3529 &inst->Src[0], &neg_src, FALSE); 3530 3531 return TRUE; 3532} 3533 3534 3535/** 3536 * Emit code for TGSI_OPCODE_CMP instruction. 3537 */ 3538static boolean 3539emit_cmp(struct svga_shader_emitter_v10 *emit, 3540 const struct tgsi_full_instruction *inst) 3541{ 3542 /* dst.x = (src0.x < 0) ? src1.x : src2.x 3543 * dst.y = (src0.y < 0) ? src1.y : src2.y 3544 * dst.z = (src0.z < 0) ? src1.z : src2.z 3545 * dst.w = (src0.w < 0) ? src1.w : src2.w 3546 * 3547 * Translates into 3548 * LT tmp, src0, 0.0 3549 * MOVC dst, tmp, src1, src2 3550 */ 3551 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3552 unsigned tmp = get_temp_index(emit); 3553 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3554 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3555 3556 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, 3557 &inst->Src[0], &zero, FALSE); 3558 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], 3559 &tmp_src, &inst->Src[1], &inst->Src[2], 3560 inst->Instruction.Saturate); 3561 3562 free_temp_indexes(emit); 3563 3564 return TRUE; 3565} 3566 3567 3568/** 3569 * Emit code for TGSI_OPCODE_DP2A instruction. 3570 */ 3571static boolean 3572emit_dp2a(struct svga_shader_emitter_v10 *emit, 3573 const struct tgsi_full_instruction *inst) 3574{ 3575 /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x 3576 * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x 3577 * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x 3578 * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x 3579 * Translate into 3580 * MAD tmp.x, s0.y, s1.y, s2.x 3581 * MAD tmp.x, s0.x, s1.x, tmp.x 3582 * MOV dst.xyzw, tmp.xxxx 3583 */ 3584 unsigned tmp = get_temp_index(emit); 3585 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3586 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3587 3588 struct tgsi_full_src_register tmp_src_xxxx = 3589 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3590 struct tgsi_full_dst_register tmp_dst_x = 3591 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3592 3593 struct tgsi_full_src_register src0_xxxx = 3594 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 3595 struct tgsi_full_src_register src0_yyyy = 3596 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 3597 struct tgsi_full_src_register src1_xxxx = 3598 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 3599 struct tgsi_full_src_register src1_yyyy = 3600 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 3601 struct tgsi_full_src_register src2_xxxx = 3602 scalar_src(&inst->Src[2], TGSI_SWIZZLE_X); 3603 3604 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy, 3605 &src1_yyyy, &src2_xxxx, FALSE); 3606 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx, 3607 &src1_xxxx, &tmp_src_xxxx, FALSE); 3608 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 3609 &tmp_src_xxxx, inst->Instruction.Saturate); 3610 3611 free_temp_indexes(emit); 3612 3613 return TRUE; 3614} 3615 3616 3617/** 3618 * Emit code for TGSI_OPCODE_DPH instruction. 3619 */ 3620static boolean 3621emit_dph(struct svga_shader_emitter_v10 *emit, 3622 const struct tgsi_full_instruction *inst) 3623{ 3624 /* 3625 * DP3 tmp, s0, s1 3626 * ADD dst, tmp, s1.wwww 3627 */ 3628 3629 struct tgsi_full_src_register s1_wwww = 3630 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, 3631 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 3632 3633 unsigned tmp = get_temp_index(emit); 3634 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3635 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3636 3637 /* DP3 tmp, s0, s1 */ 3638 emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0], 3639 &inst->Src[1], FALSE); 3640 3641 /* ADD dst, tmp, s1.wwww */ 3642 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src, 3643 &s1_wwww, inst->Instruction.Saturate); 3644 3645 free_temp_indexes(emit); 3646 3647 return TRUE; 3648} 3649 3650 3651/** 3652 * Emit code for TGSI_OPCODE_DST instruction. 3653 */ 3654static boolean 3655emit_dst(struct svga_shader_emitter_v10 *emit, 3656 const struct tgsi_full_instruction *inst) 3657{ 3658 /* 3659 * dst.x = 1 3660 * dst.y = src0.y * src1.y 3661 * dst.z = src0.z 3662 * dst.w = src1.w 3663 */ 3664 3665 struct tgsi_full_src_register s0_yyyy = 3666 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 3667 struct tgsi_full_src_register s0_zzzz = 3668 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 3669 struct tgsi_full_src_register s1_yyyy = 3670 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 3671 struct tgsi_full_src_register s1_wwww = 3672 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W); 3673 3674 /* 3675 * If dst and either src0 and src1 are the same we need 3676 * to create a temporary for it and insert a extra move. 3677 */ 3678 unsigned tmp_move = get_temp_index(emit); 3679 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3680 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3681 3682 /* MOV dst.x, 1.0 */ 3683 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3684 struct tgsi_full_dst_register dst_x = 3685 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3686 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3687 3688 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 3689 } 3690 3691 /* MUL dst.y, s0.y, s1.y */ 3692 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3693 struct tgsi_full_dst_register dst_y = 3694 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3695 3696 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, 3697 &s1_yyyy, inst->Instruction.Saturate); 3698 } 3699 3700 /* MOV dst.z, s0.z */ 3701 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3702 struct tgsi_full_dst_register dst_z = 3703 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3704 3705 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz, 3706 inst->Instruction.Saturate); 3707 } 3708 3709 /* MOV dst.w, s1.w */ 3710 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3711 struct tgsi_full_dst_register dst_w = 3712 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3713 3714 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww, 3715 inst->Instruction.Saturate); 3716 } 3717 3718 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3719 FALSE); 3720 free_temp_indexes(emit); 3721 3722 return TRUE; 3723} 3724 3725 3726 3727/** 3728 * Emit code for TGSI_OPCODE_ENDPRIM (GS only) 3729 */ 3730static boolean 3731emit_endprim(struct svga_shader_emitter_v10 *emit, 3732 const struct tgsi_full_instruction *inst) 3733{ 3734 assert(emit->unit == PIPE_SHADER_GEOMETRY); 3735 3736 /* We can't use emit_simple() because the TGSI instruction has one 3737 * operand (vertex stream number) which we must ignore for VGPU10. 3738 */ 3739 begin_emit_instruction(emit); 3740 emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); 3741 end_emit_instruction(emit); 3742 return TRUE; 3743} 3744 3745 3746/** 3747 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction. 3748 */ 3749static boolean 3750emit_ex2(struct svga_shader_emitter_v10 *emit, 3751 const struct tgsi_full_instruction *inst) 3752{ 3753 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x 3754 * while VGPU10 computes four values. 3755 * 3756 * dst = EX2(src): 3757 * dst.xyzw = 2.0 ^ src.x 3758 */ 3759 3760 struct tgsi_full_src_register src_xxxx = 3761 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3762 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3763 3764 /* EXP tmp, s0.xxxx */ 3765 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, 3766 inst->Instruction.Saturate); 3767 3768 return TRUE; 3769} 3770 3771 3772/** 3773 * Emit code for TGSI_OPCODE_EXP instruction. 3774 */ 3775static boolean 3776emit_exp(struct svga_shader_emitter_v10 *emit, 3777 const struct tgsi_full_instruction *inst) 3778{ 3779 /* 3780 * dst.x = 2 ^ floor(s0.x) 3781 * dst.y = s0.x - floor(s0.x) 3782 * dst.z = 2 ^ s0.x 3783 * dst.w = 1.0 3784 */ 3785 3786 struct tgsi_full_src_register src_xxxx = 3787 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 3788 unsigned tmp = get_temp_index(emit); 3789 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3790 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3791 3792 /* 3793 * If dst and src are the same we need to create 3794 * a temporary for it and insert a extra move. 3795 */ 3796 unsigned tmp_move = get_temp_index(emit); 3797 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3798 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3799 3800 /* only use X component of temp reg */ 3801 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3802 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3803 3804 /* ROUND_NI tmp.x, s0.x */ 3805 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 3806 &src_xxxx, FALSE); /* round to -infinity */ 3807 3808 /* EXP dst.x, tmp.x */ 3809 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3810 struct tgsi_full_dst_register dst_x = 3811 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3812 3813 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, 3814 inst->Instruction.Saturate); 3815 } 3816 3817 /* ADD dst.y, s0.x, -tmp */ 3818 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3819 struct tgsi_full_dst_register dst_y = 3820 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3821 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src); 3822 3823 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, 3824 &neg_tmp_src, inst->Instruction.Saturate); 3825 } 3826 3827 /* EXP dst.z, s0.x */ 3828 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3829 struct tgsi_full_dst_register dst_z = 3830 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3831 3832 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, 3833 inst->Instruction.Saturate); 3834 } 3835 3836 /* MOV dst.w, 1.0 */ 3837 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3838 struct tgsi_full_dst_register dst_w = 3839 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3840 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3841 3842 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, 3843 FALSE); 3844 } 3845 3846 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3847 FALSE); 3848 3849 free_temp_indexes(emit); 3850 3851 return TRUE; 3852} 3853 3854 3855/** 3856 * Emit code for TGSI_OPCODE_IF instruction. 3857 */ 3858static boolean 3859emit_if(struct svga_shader_emitter_v10 *emit, 3860 const struct tgsi_full_instruction *inst) 3861{ 3862 VGPU10OpcodeToken0 opcode0; 3863 3864 /* The src register should be a scalar */ 3865 assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY && 3866 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ && 3867 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW); 3868 3869 /* The only special thing here is that we need to set the 3870 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if 3871 * src.x is non-zero. 3872 */ 3873 opcode0.value = 0; 3874 opcode0.opcodeType = VGPU10_OPCODE_IF; 3875 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 3876 3877 begin_emit_instruction(emit); 3878 emit_dword(emit, opcode0.value); 3879 emit_src_register(emit, &inst->Src[0]); 3880 end_emit_instruction(emit); 3881 3882 return TRUE; 3883} 3884 3885 3886/** 3887 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of 3888 * the register components are negative). 3889 */ 3890static boolean 3891emit_kill_if(struct svga_shader_emitter_v10 *emit, 3892 const struct tgsi_full_instruction *inst) 3893{ 3894 unsigned tmp = get_temp_index(emit); 3895 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3896 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3897 3898 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3899 3900 struct tgsi_full_dst_register tmp_dst_x = 3901 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3902 struct tgsi_full_src_register tmp_src_xxxx = 3903 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3904 3905 /* tmp = src[0] < 0.0 */ 3906 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 3907 &zero, FALSE); 3908 3909 if (!same_swizzle_terms(&inst->Src[0])) { 3910 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to 3911 * logically OR the swizzle terms. Most uses of KILL_IF only 3912 * test one channel so it's good to avoid these extra steps. 3913 */ 3914 struct tgsi_full_src_register tmp_src_yyyy = 3915 scalar_src(&tmp_src, TGSI_SWIZZLE_Y); 3916 struct tgsi_full_src_register tmp_src_zzzz = 3917 scalar_src(&tmp_src, TGSI_SWIZZLE_Z); 3918 struct tgsi_full_src_register tmp_src_wwww = 3919 scalar_src(&tmp_src, TGSI_SWIZZLE_W); 3920 3921 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3922 &tmp_src_yyyy, FALSE); 3923 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3924 &tmp_src_zzzz, FALSE); 3925 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3926 &tmp_src_wwww, FALSE); 3927 } 3928 3929 begin_emit_instruction(emit); 3930 emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */ 3931 emit_src_register(emit, &tmp_src_xxxx); 3932 end_emit_instruction(emit); 3933 3934 free_temp_indexes(emit); 3935 3936 return TRUE; 3937} 3938 3939 3940/** 3941 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard). 3942 */ 3943static boolean 3944emit_kill(struct svga_shader_emitter_v10 *emit, 3945 const struct tgsi_full_instruction *inst) 3946{ 3947 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3948 3949 /* DISCARD if 0.0 is zero */ 3950 begin_emit_instruction(emit); 3951 emit_discard_opcode(emit, FALSE); 3952 emit_src_register(emit, &zero); 3953 end_emit_instruction(emit); 3954 3955 return TRUE; 3956} 3957 3958 3959/** 3960 * Emit code for TGSI_OPCODE_LG2 instruction. 3961 */ 3962static boolean 3963emit_lg2(struct svga_shader_emitter_v10 *emit, 3964 const struct tgsi_full_instruction *inst) 3965{ 3966 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x 3967 * while VGPU10 computes four values. 3968 * 3969 * dst = LG2(src): 3970 * dst.xyzw = log2(src.x) 3971 */ 3972 3973 struct tgsi_full_src_register src_xxxx = 3974 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3975 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3976 3977 /* LOG tmp, s0.xxxx */ 3978 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx, 3979 inst->Instruction.Saturate); 3980 3981 return TRUE; 3982} 3983 3984 3985/** 3986 * Emit code for TGSI_OPCODE_LIT instruction. 3987 */ 3988static boolean 3989emit_lit(struct svga_shader_emitter_v10 *emit, 3990 const struct tgsi_full_instruction *inst) 3991{ 3992 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3993 3994 /* 3995 * If dst and src are the same we need to create 3996 * a temporary for it and insert a extra move. 3997 */ 3998 unsigned tmp_move = get_temp_index(emit); 3999 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 4000 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 4001 4002 /* 4003 * dst.x = 1 4004 * dst.y = max(src.x, 0) 4005 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 4006 * dst.w = 1 4007 */ 4008 4009 /* MOV dst.x, 1.0 */ 4010 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4011 struct tgsi_full_dst_register dst_x = 4012 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 4013 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 4014 } 4015 4016 /* MOV dst.w, 1.0 */ 4017 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4018 struct tgsi_full_dst_register dst_w = 4019 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 4020 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 4021 } 4022 4023 /* MAX dst.y, src.x, 0.0 */ 4024 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4025 struct tgsi_full_dst_register dst_y = 4026 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 4027 struct tgsi_full_src_register zero = 4028 make_immediate_reg_float(emit, 0.0f); 4029 struct tgsi_full_src_register src_xxxx = 4030 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4031 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4032 4033 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, 4034 &zero, inst->Instruction.Saturate); 4035 } 4036 4037 /* 4038 * tmp1 = clamp(src.w, -128, 128); 4039 * MAX tmp1, src.w, -128 4040 * MIN tmp1, tmp1, 128 4041 * 4042 * tmp2 = max(tmp2, 0); 4043 * MAX tmp2, src.y, 0 4044 * 4045 * tmp1 = pow(tmp2, tmp1); 4046 * LOG tmp2, tmp2 4047 * MUL tmp1, tmp2, tmp1 4048 * EXP tmp1, tmp1 4049 * 4050 * tmp1 = (src.w == 0) ? 1 : tmp1; 4051 * EQ tmp2, 0, src.w 4052 * MOVC tmp1, tmp2, 1.0, tmp1 4053 * 4054 * dst.z = (0 < src.x) ? tmp1 : 0; 4055 * LT tmp2, 0, src.x 4056 * MOVC dst.z, tmp2, tmp1, 0.0 4057 */ 4058 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4059 struct tgsi_full_dst_register dst_z = 4060 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 4061 4062 unsigned tmp1 = get_temp_index(emit); 4063 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4064 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4065 unsigned tmp2 = get_temp_index(emit); 4066 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4067 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4068 4069 struct tgsi_full_src_register src_xxxx = 4070 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4071 struct tgsi_full_src_register src_yyyy = 4072 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 4073 struct tgsi_full_src_register src_wwww = 4074 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 4075 4076 struct tgsi_full_src_register zero = 4077 make_immediate_reg_float(emit, 0.0f); 4078 struct tgsi_full_src_register lowerbound = 4079 make_immediate_reg_float(emit, -128.0f); 4080 struct tgsi_full_src_register upperbound = 4081 make_immediate_reg_float(emit, 128.0f); 4082 4083 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, 4084 &lowerbound, FALSE); 4085 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, 4086 &upperbound, FALSE); 4087 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, 4088 &zero, FALSE); 4089 4090 /* POW tmp1, tmp2, tmp1 */ 4091 /* LOG tmp2, tmp2 */ 4092 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src, 4093 FALSE); 4094 4095 /* MUL tmp1, tmp2, tmp1 */ 4096 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, 4097 &tmp1_src, FALSE); 4098 4099 /* EXP tmp1, tmp1 */ 4100 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src, 4101 FALSE); 4102 4103 /* EQ tmp2, 0, src.w */ 4104 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, 4105 &src_wwww, FALSE); 4106 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ 4107 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, 4108 &tmp2_src, &one, &tmp1_src, FALSE); 4109 4110 /* LT tmp2, 0, src.x */ 4111 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, 4112 &src_xxxx, FALSE); 4113 /* MOVC dst.z, tmp2, tmp1, 0.0 */ 4114 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, 4115 &tmp2_src, &tmp1_src, &zero, FALSE); 4116 } 4117 4118 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 4119 FALSE); 4120 free_temp_indexes(emit); 4121 4122 return TRUE; 4123} 4124 4125 4126/** 4127 * Emit code for TGSI_OPCODE_LOG instruction. 4128 */ 4129static boolean 4130emit_log(struct svga_shader_emitter_v10 *emit, 4131 const struct tgsi_full_instruction *inst) 4132{ 4133 /* 4134 * dst.x = floor(lg2(abs(s0.x))) 4135 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) 4136 * dst.z = lg2(abs(s0.x)) 4137 * dst.w = 1.0 4138 */ 4139 4140 struct tgsi_full_src_register src_xxxx = 4141 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4142 unsigned tmp = get_temp_index(emit); 4143 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4144 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4145 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); 4146 4147 /* only use X component of temp reg */ 4148 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4149 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4150 4151 /* LOG tmp.x, abs(s0.x) */ 4152 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 4153 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, 4154 &abs_src_xxxx, FALSE); 4155 } 4156 4157 /* MOV dst.z, tmp.x */ 4158 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4159 struct tgsi_full_dst_register dst_z = 4160 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); 4161 4162 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, 4163 &tmp_src, inst->Instruction.Saturate); 4164 } 4165 4166 /* FLR tmp.x, tmp.x */ 4167 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { 4168 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 4169 &tmp_src, FALSE); 4170 } 4171 4172 /* MOV dst.x, tmp.x */ 4173 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4174 struct tgsi_full_dst_register dst_x = 4175 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 4176 4177 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src, 4178 inst->Instruction.Saturate); 4179 } 4180 4181 /* EXP tmp.x, tmp.x */ 4182 /* DIV dst.y, abs(s0.x), tmp.x */ 4183 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4184 struct tgsi_full_dst_register dst_y = 4185 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 4186 4187 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src, 4188 FALSE); 4189 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, 4190 &tmp_src, inst->Instruction.Saturate); 4191 } 4192 4193 /* MOV dst.w, 1.0 */ 4194 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4195 struct tgsi_full_dst_register dst_w = 4196 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); 4197 struct tgsi_full_src_register one = 4198 make_immediate_reg_float(emit, 1.0f); 4199 4200 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 4201 } 4202 4203 free_temp_indexes(emit); 4204 4205 return TRUE; 4206} 4207 4208 4209/** 4210 * Emit code for TGSI_OPCODE_LRP instruction. 4211 */ 4212static boolean 4213emit_lrp(struct svga_shader_emitter_v10 *emit, 4214 const struct tgsi_full_instruction *inst) 4215{ 4216 /* dst = LRP(s0, s1, s2): 4217 * dst = s0 * (s1 - s2) + s2 4218 * Translates into: 4219 * SUB tmp, s1, s2; tmp = s1 - s2 4220 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 4221 */ 4222 unsigned tmp = get_temp_index(emit); 4223 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); 4224 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); 4225 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); 4226 4227 /* ADD tmp, s1, -s2 */ 4228 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp, 4229 &inst->Src[1], &neg_src2, FALSE); 4230 4231 /* MAD dst, s1, tmp, s3 */ 4232 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], 4233 &inst->Src[0], &src_tmp, &inst->Src[2], 4234 inst->Instruction.Saturate); 4235 4236 free_temp_indexes(emit); 4237 4238 return TRUE; 4239} 4240 4241 4242/** 4243 * Emit code for TGSI_OPCODE_POW instruction. 4244 */ 4245static boolean 4246emit_pow(struct svga_shader_emitter_v10 *emit, 4247 const struct tgsi_full_instruction *inst) 4248{ 4249 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and 4250 * src1.x while VGPU10 computes four values. 4251 * 4252 * dst = POW(src0, src1): 4253 * dst.xyzw = src0.x ^ src1.x 4254 */ 4255 unsigned tmp = get_temp_index(emit); 4256 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4257 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4258 struct tgsi_full_src_register src0_xxxx = 4259 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4260 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4261 struct tgsi_full_src_register src1_xxxx = 4262 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4263 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4264 4265 /* LOG tmp, s0.xxxx */ 4266 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx, 4267 FALSE); 4268 4269 /* MUL tmp, tmp, s1.xxxx */ 4270 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src, 4271 &src1_xxxx, FALSE); 4272 4273 /* EXP tmp, s0.xxxx */ 4274 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], 4275 &tmp_src, inst->Instruction.Saturate); 4276 4277 /* free tmp */ 4278 free_temp_indexes(emit); 4279 4280 return TRUE; 4281} 4282 4283 4284/** 4285 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. 4286 */ 4287static boolean 4288emit_rcp(struct svga_shader_emitter_v10 *emit, 4289 const struct tgsi_full_instruction *inst) 4290{ 4291 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4292 4293 unsigned tmp = get_temp_index(emit); 4294 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4295 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4296 4297 struct tgsi_full_dst_register tmp_dst_x = 4298 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4299 struct tgsi_full_src_register tmp_src_xxxx = 4300 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4301 4302 /* DIV tmp.x, 1.0, s0 */ 4303 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one, 4304 &inst->Src[0], FALSE); 4305 4306 /* MOV dst, tmp.xxxx */ 4307 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4308 &tmp_src_xxxx, inst->Instruction.Saturate); 4309 4310 free_temp_indexes(emit); 4311 4312 return TRUE; 4313} 4314 4315 4316/** 4317 * Emit code for TGSI_OPCODE_RSQ instruction. 4318 */ 4319static boolean 4320emit_rsq(struct svga_shader_emitter_v10 *emit, 4321 const struct tgsi_full_instruction *inst) 4322{ 4323 /* dst = RSQ(src): 4324 * dst.xyzw = 1 / sqrt(src.x) 4325 * Translates into: 4326 * RSQ tmp, src.x 4327 * MOV dst, tmp.xxxx 4328 */ 4329 4330 unsigned tmp = get_temp_index(emit); 4331 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4332 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4333 4334 struct tgsi_full_dst_register tmp_dst_x = 4335 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4336 struct tgsi_full_src_register tmp_src_xxxx = 4337 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4338 4339 /* RSQ tmp, src.x */ 4340 emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x, 4341 &inst->Src[0], FALSE); 4342 4343 /* MOV dst, tmp.xxxx */ 4344 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4345 &tmp_src_xxxx, inst->Instruction.Saturate); 4346 4347 /* free tmp */ 4348 free_temp_indexes(emit); 4349 4350 return TRUE; 4351} 4352 4353 4354/** 4355 * Emit code for TGSI_OPCODE_SCS instruction. 4356 */ 4357static boolean 4358emit_scs(struct svga_shader_emitter_v10 *emit, 4359 const struct tgsi_full_instruction *inst) 4360{ 4361 /* dst.x = cos(src.x) 4362 * dst.y = sin(src.x) 4363 * dst.z = 0.0 4364 * dst.w = 1.0 4365 */ 4366 struct tgsi_full_dst_register dst_x = 4367 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 4368 struct tgsi_full_dst_register dst_y = 4369 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 4370 struct tgsi_full_dst_register dst_zw = 4371 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW); 4372 4373 struct tgsi_full_src_register zero_one = 4374 make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f); 4375 4376 begin_emit_instruction(emit); 4377 emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate); 4378 emit_dst_register(emit, &dst_y); 4379 emit_dst_register(emit, &dst_x); 4380 emit_src_register(emit, &inst->Src[0]); 4381 end_emit_instruction(emit); 4382 4383 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 4384 &dst_zw, &zero_one, inst->Instruction.Saturate); 4385 4386 return TRUE; 4387} 4388 4389 4390/** 4391 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. 4392 */ 4393static boolean 4394emit_seq(struct svga_shader_emitter_v10 *emit, 4395 const struct tgsi_full_instruction *inst) 4396{ 4397 /* dst = SEQ(s0, s1): 4398 * dst = s0 == s1 ? 1.0 : 0.0 (per component) 4399 * Translates into: 4400 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4401 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4402 */ 4403 unsigned tmp = get_temp_index(emit); 4404 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4405 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4406 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4407 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4408 4409 /* EQ tmp, s0, s1 */ 4410 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], 4411 &inst->Src[1], FALSE); 4412 4413 /* MOVC dst, tmp, one, zero */ 4414 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4415 &one, &zero, FALSE); 4416 4417 free_temp_indexes(emit); 4418 4419 return TRUE; 4420} 4421 4422 4423/** 4424 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. 4425 */ 4426static boolean 4427emit_sge(struct svga_shader_emitter_v10 *emit, 4428 const struct tgsi_full_instruction *inst) 4429{ 4430 /* dst = SGE(s0, s1): 4431 * dst = s0 >= s1 ? 1.0 : 0.0 (per component) 4432 * Translates into: 4433 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) 4434 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4435 */ 4436 unsigned tmp = get_temp_index(emit); 4437 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4438 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4439 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4440 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4441 4442 /* GE tmp, s0, s1 */ 4443 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], 4444 &inst->Src[1], FALSE); 4445 4446 /* MOVC dst, tmp, one, zero */ 4447 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4448 &one, &zero, FALSE); 4449 4450 free_temp_indexes(emit); 4451 4452 return TRUE; 4453} 4454 4455 4456/** 4457 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. 4458 */ 4459static boolean 4460emit_sgt(struct svga_shader_emitter_v10 *emit, 4461 const struct tgsi_full_instruction *inst) 4462{ 4463 /* dst = SGT(s0, s1): 4464 * dst = s0 > s1 ? 1.0 : 0.0 (per component) 4465 * Translates into: 4466 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) 4467 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4468 */ 4469 unsigned tmp = get_temp_index(emit); 4470 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4471 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4472 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4473 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4474 4475 /* LT tmp, s1, s0 */ 4476 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], 4477 &inst->Src[0], FALSE); 4478 4479 /* MOVC dst, tmp, one, zero */ 4480 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4481 &one, &zero, FALSE); 4482 4483 free_temp_indexes(emit); 4484 4485 return TRUE; 4486} 4487 4488 4489/** 4490 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. 4491 */ 4492static boolean 4493emit_sincos(struct svga_shader_emitter_v10 *emit, 4494 const struct tgsi_full_instruction *inst) 4495{ 4496 unsigned tmp = get_temp_index(emit); 4497 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4498 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4499 4500 struct tgsi_full_src_register tmp_src_xxxx = 4501 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4502 struct tgsi_full_dst_register tmp_dst_x = 4503 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4504 4505 begin_emit_instruction(emit); 4506 emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); 4507 4508 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) 4509 { 4510 emit_dst_register(emit, &tmp_dst_x); /* first destination register */ 4511 emit_null_dst_register(emit); /* second destination register */ 4512 } 4513 else { 4514 emit_null_dst_register(emit); 4515 emit_dst_register(emit, &tmp_dst_x); 4516 } 4517 4518 emit_src_register(emit, &inst->Src[0]); 4519 end_emit_instruction(emit); 4520 4521 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4522 &tmp_src_xxxx, inst->Instruction.Saturate); 4523 4524 free_temp_indexes(emit); 4525 4526 return TRUE; 4527} 4528 4529 4530/** 4531 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. 4532 */ 4533static boolean 4534emit_sle(struct svga_shader_emitter_v10 *emit, 4535 const struct tgsi_full_instruction *inst) 4536{ 4537 /* dst = SLE(s0, s1): 4538 * dst = s0 <= s1 ? 1.0 : 0.0 (per component) 4539 * Translates into: 4540 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) 4541 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4542 */ 4543 unsigned tmp = get_temp_index(emit); 4544 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4545 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4546 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4547 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4548 4549 /* GE tmp, s1, s0 */ 4550 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], 4551 &inst->Src[0], FALSE); 4552 4553 /* MOVC dst, tmp, one, zero */ 4554 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4555 &one, &zero, FALSE); 4556 4557 free_temp_indexes(emit); 4558 4559 return TRUE; 4560} 4561 4562 4563/** 4564 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. 4565 */ 4566static boolean 4567emit_slt(struct svga_shader_emitter_v10 *emit, 4568 const struct tgsi_full_instruction *inst) 4569{ 4570 /* dst = SLT(s0, s1): 4571 * dst = s0 < s1 ? 1.0 : 0.0 (per component) 4572 * Translates into: 4573 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) 4574 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4575 */ 4576 unsigned tmp = get_temp_index(emit); 4577 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4578 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4579 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4580 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4581 4582 /* LT tmp, s0, s1 */ 4583 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 4584 &inst->Src[1], FALSE); 4585 4586 /* MOVC dst, tmp, one, zero */ 4587 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4588 &one, &zero, FALSE); 4589 4590 free_temp_indexes(emit); 4591 4592 return TRUE; 4593} 4594 4595 4596/** 4597 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. 4598 */ 4599static boolean 4600emit_sne(struct svga_shader_emitter_v10 *emit, 4601 const struct tgsi_full_instruction *inst) 4602{ 4603 /* dst = SNE(s0, s1): 4604 * dst = s0 != s1 ? 1.0 : 0.0 (per component) 4605 * Translates into: 4606 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4607 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4608 */ 4609 unsigned tmp = get_temp_index(emit); 4610 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4611 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4612 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4613 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4614 4615 /* NE tmp, s0, s1 */ 4616 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], 4617 &inst->Src[1], FALSE); 4618 4619 /* MOVC dst, tmp, one, zero */ 4620 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4621 &one, &zero, FALSE); 4622 4623 free_temp_indexes(emit); 4624 4625 return TRUE; 4626} 4627 4628 4629/** 4630 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. 4631 */ 4632static boolean 4633emit_ssg(struct svga_shader_emitter_v10 *emit, 4634 const struct tgsi_full_instruction *inst) 4635{ 4636 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 4637 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 4638 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 4639 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 4640 * Translates into: 4641 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) 4642 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) 4643 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) 4644 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) 4645 */ 4646 struct tgsi_full_src_register zero = 4647 make_immediate_reg_float(emit, 0.0f); 4648 struct tgsi_full_src_register one = 4649 make_immediate_reg_float(emit, 1.0f); 4650 struct tgsi_full_src_register neg_one = 4651 make_immediate_reg_float(emit, -1.0f); 4652 4653 unsigned tmp1 = get_temp_index(emit); 4654 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4655 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4656 4657 unsigned tmp2 = get_temp_index(emit); 4658 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4659 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4660 4661 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], 4662 &zero, FALSE); 4663 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, 4664 &neg_one, &zero, FALSE); 4665 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, 4666 &inst->Src[0], FALSE); 4667 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, 4668 &one, &tmp2_src, FALSE); 4669 4670 free_temp_indexes(emit); 4671 4672 return TRUE; 4673} 4674 4675 4676/** 4677 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. 4678 */ 4679static boolean 4680emit_issg(struct svga_shader_emitter_v10 *emit, 4681 const struct tgsi_full_instruction *inst) 4682{ 4683 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 4684 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 4685 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 4686 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 4687 * Translates into: 4688 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) 4689 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) 4690 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) 4691 */ 4692 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4693 4694 unsigned tmp1 = get_temp_index(emit); 4695 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4696 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4697 4698 unsigned tmp2 = get_temp_index(emit); 4699 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4700 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4701 4702 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); 4703 4704 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, 4705 &inst->Src[0], &zero, FALSE); 4706 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, 4707 &zero, &inst->Src[0], FALSE); 4708 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], 4709 &tmp1_src, &neg_tmp2, FALSE); 4710 4711 free_temp_indexes(emit); 4712 4713 return TRUE; 4714} 4715 4716 4717/** 4718 * Emit code for TGSI_OPCODE_SUB instruction. 4719 */ 4720static boolean 4721emit_sub(struct svga_shader_emitter_v10 *emit, 4722 const struct tgsi_full_instruction *inst) 4723{ 4724 /* dst = SUB(s0, s1): 4725 * dst = s0 - s1 4726 * Translates into: 4727 * ADD dst, s0, neg(s1) 4728 */ 4729 struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]); 4730 4731 /* ADD dst, s0, neg(s1) */ 4732 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], 4733 &inst->Src[0], &neg_src1, 4734 inst->Instruction.Saturate); 4735 4736 return TRUE; 4737} 4738 4739 4740/** 4741 * Emit a comparison instruction. The dest register will get 4742 * 0 or ~0 values depending on the outcome of comparing src0 to src1. 4743 */ 4744static void 4745emit_comparison(struct svga_shader_emitter_v10 *emit, 4746 SVGA3dCmpFunc func, 4747 const struct tgsi_full_dst_register *dst, 4748 const struct tgsi_full_src_register *src0, 4749 const struct tgsi_full_src_register *src1) 4750{ 4751 struct tgsi_full_src_register immediate; 4752 VGPU10OpcodeToken0 opcode0; 4753 boolean swapSrc = FALSE; 4754 4755 /* Sanity checks for svga vs. gallium enums */ 4756 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); 4757 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); 4758 4759 opcode0.value = 0; 4760 4761 switch (func) { 4762 case SVGA3D_CMP_NEVER: 4763 immediate = make_immediate_reg_int(emit, 0); 4764 /* MOV dst, {0} */ 4765 begin_emit_instruction(emit); 4766 emit_dword(emit, VGPU10_OPCODE_MOV); 4767 emit_dst_register(emit, dst); 4768 emit_src_register(emit, &immediate); 4769 end_emit_instruction(emit); 4770 return; 4771 case SVGA3D_CMP_ALWAYS: 4772 immediate = make_immediate_reg_int(emit, -1); 4773 /* MOV dst, {-1} */ 4774 begin_emit_instruction(emit); 4775 emit_dword(emit, VGPU10_OPCODE_MOV); 4776 emit_dst_register(emit, dst); 4777 emit_src_register(emit, &immediate); 4778 end_emit_instruction(emit); 4779 return; 4780 case SVGA3D_CMP_LESS: 4781 opcode0.opcodeType = VGPU10_OPCODE_LT; 4782 break; 4783 case SVGA3D_CMP_EQUAL: 4784 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4785 break; 4786 case SVGA3D_CMP_LESSEQUAL: 4787 opcode0.opcodeType = VGPU10_OPCODE_GE; 4788 swapSrc = TRUE; 4789 break; 4790 case SVGA3D_CMP_GREATER: 4791 opcode0.opcodeType = VGPU10_OPCODE_LT; 4792 swapSrc = TRUE; 4793 break; 4794 case SVGA3D_CMP_NOTEQUAL: 4795 opcode0.opcodeType = VGPU10_OPCODE_NE; 4796 break; 4797 case SVGA3D_CMP_GREATEREQUAL: 4798 opcode0.opcodeType = VGPU10_OPCODE_GE; 4799 break; 4800 default: 4801 assert(!"Unexpected comparison mode"); 4802 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4803 } 4804 4805 begin_emit_instruction(emit); 4806 emit_dword(emit, opcode0.value); 4807 emit_dst_register(emit, dst); 4808 if (swapSrc) { 4809 emit_src_register(emit, src1); 4810 emit_src_register(emit, src0); 4811 } 4812 else { 4813 emit_src_register(emit, src0); 4814 emit_src_register(emit, src1); 4815 } 4816 end_emit_instruction(emit); 4817} 4818 4819 4820/** 4821 * Get texel/address offsets for a texture instruction. 4822 */ 4823static void 4824get_texel_offsets(const struct svga_shader_emitter_v10 *emit, 4825 const struct tgsi_full_instruction *inst, int offsets[3]) 4826{ 4827 if (inst->Texture.NumOffsets == 1) { 4828 /* According to OpenGL Shader Language spec the offsets are only 4829 * fetched from a previously-declared immediate/literal. 4830 */ 4831 const struct tgsi_texture_offset *off = inst->TexOffsets; 4832 const unsigned index = off[0].Index; 4833 const unsigned swizzleX = off[0].SwizzleX; 4834 const unsigned swizzleY = off[0].SwizzleY; 4835 const unsigned swizzleZ = off[0].SwizzleZ; 4836 const union tgsi_immediate_data *imm = emit->immediates[index]; 4837 4838 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); 4839 4840 offsets[0] = imm[swizzleX].Int; 4841 offsets[1] = imm[swizzleY].Int; 4842 offsets[2] = imm[swizzleZ].Int; 4843 } 4844 else { 4845 offsets[0] = offsets[1] = offsets[2] = 0; 4846 } 4847} 4848 4849 4850/** 4851 * Set up the coordinate register for texture sampling. 4852 * When we're sampling from a RECT texture we have to scale the 4853 * unnormalized coordinate to a normalized coordinate. 4854 * We do that by multiplying the coordinate by an "extra" constant. 4855 * An alternative would be to use the RESINFO instruction to query the 4856 * texture's size. 4857 */ 4858static struct tgsi_full_src_register 4859setup_texcoord(struct svga_shader_emitter_v10 *emit, 4860 unsigned unit, 4861 const struct tgsi_full_src_register *coord) 4862{ 4863 if (emit->key.tex[unit].unnormalized) { 4864 unsigned scale_index = emit->texcoord_scale_index[unit]; 4865 unsigned tmp = get_temp_index(emit); 4866 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4867 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4868 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); 4869 4870 /* MUL tmp, coord, const[] */ 4871 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, 4872 coord, &scale_src, FALSE); 4873 return tmp_src; 4874 } 4875 else { 4876 /* use texcoord as-is */ 4877 return *coord; 4878 } 4879} 4880 4881 4882/** 4883 * For SAMPLE_C instructions, emit the extra src register which indicates 4884 * the reference/comparision value. 4885 */ 4886static void 4887emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, 4888 unsigned target, 4889 const struct tgsi_full_src_register *coord) 4890{ 4891 struct tgsi_full_src_register coord_src_ref; 4892 unsigned component; 4893 4894 assert(tgsi_is_shadow_target(target)); 4895 4896 assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */ 4897 if (target == TGSI_TEXTURE_SHADOW2D_ARRAY || 4898 target == TGSI_TEXTURE_SHADOWCUBE) 4899 component = TGSI_SWIZZLE_W; 4900 else 4901 component = TGSI_SWIZZLE_Z; 4902 4903 coord_src_ref = scalar_src(coord, component); 4904 4905 emit_src_register(emit, &coord_src_ref); 4906} 4907 4908 4909/** 4910 * Info for implementing texture swizzles. 4911 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() 4912 * functions use this to encapsulate the extra steps needed to perform 4913 * a texture swizzle, or shadow/depth comparisons. 4914 * The shadow/depth comparison is only done here if for the cases where 4915 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). 4916 */ 4917struct tex_swizzle_info 4918{ 4919 boolean swizzled; 4920 boolean shadow_compare; 4921 unsigned unit; 4922 unsigned texture_target; /**< TGSI_TEXTURE_x */ 4923 struct tgsi_full_src_register tmp_src; 4924 struct tgsi_full_dst_register tmp_dst; 4925 const struct tgsi_full_dst_register *inst_dst; 4926 const struct tgsi_full_src_register *coord_src; 4927}; 4928 4929 4930/** 4931 * Do setup for handling texture swizzles or shadow compares. 4932 * \param unit the texture unit 4933 * \param inst the TGSI texture instruction 4934 * \param shadow_compare do shadow/depth comparison? 4935 * \param swz returns the swizzle info 4936 */ 4937static void 4938begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, 4939 unsigned unit, 4940 const struct tgsi_full_instruction *inst, 4941 boolean shadow_compare, 4942 struct tex_swizzle_info *swz) 4943{ 4944 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || 4945 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || 4946 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || 4947 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); 4948 4949 swz->shadow_compare = shadow_compare; 4950 swz->texture_target = inst->Texture.Texture; 4951 4952 if (swz->swizzled || shadow_compare) { 4953 /* Allocate temp register for the result of the SAMPLE instruction 4954 * and the source of the MOV/compare/swizzle instructions. 4955 */ 4956 unsigned tmp = get_temp_index(emit); 4957 swz->tmp_src = make_src_temp_reg(tmp); 4958 swz->tmp_dst = make_dst_temp_reg(tmp); 4959 4960 swz->unit = unit; 4961 } 4962 swz->inst_dst = &inst->Dst[0]; 4963 swz->coord_src = &inst->Src[0]; 4964} 4965 4966 4967/** 4968 * Returns the register to put the SAMPLE instruction results into. 4969 * This will either be the original instruction dst reg (if no swizzle 4970 * and no shadow comparison) or a temporary reg if there is a swizzle. 4971 */ 4972static const struct tgsi_full_dst_register * 4973get_tex_swizzle_dst(const struct tex_swizzle_info *swz) 4974{ 4975 return (swz->swizzled || swz->shadow_compare) 4976 ? &swz->tmp_dst : swz->inst_dst; 4977} 4978 4979 4980/** 4981 * This emits the MOV instruction that actually implements a texture swizzle 4982 * and/or shadow comparison. 4983 */ 4984static void 4985end_tex_swizzle(struct svga_shader_emitter_v10 *emit, 4986 const struct tex_swizzle_info *swz) 4987{ 4988 if (swz->shadow_compare) { 4989 /* Emit extra instructions to compare the fetched texel value against 4990 * a texture coordinate component. The result of the comparison 4991 * is 0.0 or 1.0. 4992 */ 4993 struct tgsi_full_src_register coord_src; 4994 struct tgsi_full_src_register texel_src = 4995 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); 4996 struct tgsi_full_src_register one = 4997 make_immediate_reg_float(emit, 1.0f); 4998 /* convert gallium comparison func to SVGA comparison func */ 4999 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; 5000 5001 assert(emit->unit == PIPE_SHADER_FRAGMENT); 5002 5003 switch (swz->texture_target) { 5004 case TGSI_TEXTURE_SHADOW2D: 5005 case TGSI_TEXTURE_SHADOWRECT: 5006 case TGSI_TEXTURE_SHADOW1D_ARRAY: 5007 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); 5008 break; 5009 case TGSI_TEXTURE_SHADOW1D: 5010 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y); 5011 break; 5012 case TGSI_TEXTURE_SHADOWCUBE: 5013 case TGSI_TEXTURE_SHADOW2D_ARRAY: 5014 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W); 5015 break; 5016 default: 5017 assert(!"Unexpected texture target in end_tex_swizzle()"); 5018 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); 5019 } 5020 5021 /* COMPARE tmp, coord, texel */ 5022 /* XXX it would seem that the texel and coord arguments should 5023 * be transposed here, but piglit tests indicate otherwise. 5024 */ 5025 emit_comparison(emit, compare_func, 5026 &swz->tmp_dst, &texel_src, &coord_src); 5027 5028 /* AND dest, tmp, {1.0} */ 5029 begin_emit_instruction(emit); 5030 emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); 5031 if (swz->swizzled) { 5032 emit_dst_register(emit, &swz->tmp_dst); 5033 } 5034 else { 5035 emit_dst_register(emit, swz->inst_dst); 5036 } 5037 emit_src_register(emit, &swz->tmp_src); 5038 emit_src_register(emit, &one); 5039 end_emit_instruction(emit); 5040 } 5041 5042 if (swz->swizzled) { 5043 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; 5044 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; 5045 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; 5046 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; 5047 unsigned writemask_0 = 0, writemask_1 = 0; 5048 boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]); 5049 5050 /* Swizzle w/out zero/one terms */ 5051 struct tgsi_full_src_register src_swizzled = 5052 swizzle_src(&swz->tmp_src, 5053 swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X, 5054 swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y, 5055 swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z, 5056 swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W); 5057 5058 /* MOV dst, color(tmp).<swizzle> */ 5059 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 5060 swz->inst_dst, &src_swizzled, FALSE); 5061 5062 /* handle swizzle zero terms */ 5063 writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) | 5064 ((swz_g == PIPE_SWIZZLE_0) << 1) | 5065 ((swz_b == PIPE_SWIZZLE_0) << 2) | 5066 ((swz_a == PIPE_SWIZZLE_0) << 3)); 5067 5068 if (writemask_0) { 5069 struct tgsi_full_src_register zero = int_tex ? 5070 make_immediate_reg_int(emit, 0) : 5071 make_immediate_reg_float(emit, 0.0f); 5072 struct tgsi_full_dst_register dst = 5073 writemask_dst(swz->inst_dst, writemask_0); 5074 5075 /* MOV dst.writemask_0, {0,0,0,0} */ 5076 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 5077 &dst, &zero, FALSE); 5078 } 5079 5080 /* handle swizzle one terms */ 5081 writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) | 5082 ((swz_g == PIPE_SWIZZLE_1) << 1) | 5083 ((swz_b == PIPE_SWIZZLE_1) << 2) | 5084 ((swz_a == PIPE_SWIZZLE_1) << 3)); 5085 5086 if (writemask_1) { 5087 struct tgsi_full_src_register one = int_tex ? 5088 make_immediate_reg_int(emit, 1) : 5089 make_immediate_reg_float(emit, 1.0f); 5090 struct tgsi_full_dst_register dst = 5091 writemask_dst(swz->inst_dst, writemask_1); 5092 5093 /* MOV dst.writemask_1, {1,1,1,1} */ 5094 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE); 5095 } 5096 } 5097} 5098 5099 5100/** 5101 * Emit code for TGSI_OPCODE_SAMPLE instruction. 5102 */ 5103static boolean 5104emit_sample(struct svga_shader_emitter_v10 *emit, 5105 const struct tgsi_full_instruction *inst) 5106{ 5107 const unsigned resource_unit = inst->Src[1].Register.Index; 5108 const unsigned sampler_unit = inst->Src[2].Register.Index; 5109 struct tgsi_full_src_register coord; 5110 int offsets[3]; 5111 struct tex_swizzle_info swz_info; 5112 5113 begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); 5114 5115 get_texel_offsets(emit, inst, offsets); 5116 5117 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); 5118 5119 /* SAMPLE dst, coord(s0), resource, sampler */ 5120 begin_emit_instruction(emit); 5121 5122 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, 5123 inst->Instruction.Saturate, offsets); 5124 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5125 emit_src_register(emit, &coord); 5126 emit_resource_register(emit, resource_unit); 5127 emit_sampler_register(emit, sampler_unit); 5128 end_emit_instruction(emit); 5129 5130 end_tex_swizzle(emit, &swz_info); 5131 5132 free_temp_indexes(emit); 5133 5134 return TRUE; 5135} 5136 5137 5138/** 5139 * Check if a texture instruction is valid. 5140 * An example of an invalid texture instruction is doing shadow comparison 5141 * with an integer-valued texture. 5142 * If we detect an invalid texture instruction, we replace it with: 5143 * MOV dst, {1,1,1,1}; 5144 * \return TRUE if valid, FALSE if invalid. 5145 */ 5146static boolean 5147is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, 5148 const struct tgsi_full_instruction *inst) 5149{ 5150 const unsigned unit = inst->Src[1].Register.Index; 5151 const unsigned target = inst->Texture.Texture; 5152 boolean valid = TRUE; 5153 5154 if (tgsi_is_shadow_target(target) && 5155 is_integer_type(emit->sampler_return_type[unit])) { 5156 debug_printf("Invalid SAMPLE_C with an integer texture!\n"); 5157 valid = FALSE; 5158 } 5159 /* XXX might check for other conditions in the future here */ 5160 5161 if (!valid) { 5162 /* emit a MOV dst, {1,1,1,1} instruction. */ 5163 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 5164 begin_emit_instruction(emit); 5165 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5166 emit_dst_register(emit, &inst->Dst[0]); 5167 emit_src_register(emit, &one); 5168 end_emit_instruction(emit); 5169 } 5170 5171 return valid; 5172} 5173 5174 5175/** 5176 * Emit code for TGSI_OPCODE_TEX (simple texture lookup) 5177 */ 5178static boolean 5179emit_tex(struct svga_shader_emitter_v10 *emit, 5180 const struct tgsi_full_instruction *inst) 5181{ 5182 const uint unit = inst->Src[1].Register.Index; 5183 unsigned target = inst->Texture.Texture; 5184 unsigned opcode; 5185 struct tgsi_full_src_register coord; 5186 int offsets[3]; 5187 struct tex_swizzle_info swz_info; 5188 5189 /* check that the sampler returns a float */ 5190 if (!is_valid_tex_instruction(emit, inst)) 5191 return TRUE; 5192 5193 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5194 5195 get_texel_offsets(emit, inst, offsets); 5196 5197 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5198 5199 /* SAMPLE dst, coord(s0), resource, sampler */ 5200 begin_emit_instruction(emit); 5201 5202 if (tgsi_is_shadow_target(target)) 5203 opcode = VGPU10_OPCODE_SAMPLE_C; 5204 else 5205 opcode = VGPU10_OPCODE_SAMPLE; 5206 5207 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5208 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5209 emit_src_register(emit, &coord); 5210 emit_resource_register(emit, unit); 5211 emit_sampler_register(emit, unit); 5212 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5213 emit_tex_compare_refcoord(emit, target, &coord); 5214 } 5215 end_emit_instruction(emit); 5216 5217 end_tex_swizzle(emit, &swz_info); 5218 5219 free_temp_indexes(emit); 5220 5221 return TRUE; 5222} 5223 5224 5225/** 5226 * Emit code for TGSI_OPCODE_TXP (projective texture) 5227 */ 5228static boolean 5229emit_txp(struct svga_shader_emitter_v10 *emit, 5230 const struct tgsi_full_instruction *inst) 5231{ 5232 const uint unit = inst->Src[1].Register.Index; 5233 unsigned target = inst->Texture.Texture; 5234 unsigned opcode; 5235 int offsets[3]; 5236 unsigned tmp = get_temp_index(emit); 5237 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 5238 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 5239 struct tgsi_full_src_register src0_wwww = 5240 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5241 struct tgsi_full_src_register coord; 5242 struct tex_swizzle_info swz_info; 5243 5244 /* check that the sampler returns a float */ 5245 if (!is_valid_tex_instruction(emit, inst)) 5246 return TRUE; 5247 5248 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5249 5250 get_texel_offsets(emit, inst, offsets); 5251 5252 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5253 5254 /* DIV tmp, coord, coord.wwww */ 5255 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, 5256 &coord, &src0_wwww, FALSE); 5257 5258 /* SAMPLE dst, coord(tmp), resource, sampler */ 5259 begin_emit_instruction(emit); 5260 5261 if (tgsi_is_shadow_target(target)) 5262 opcode = VGPU10_OPCODE_SAMPLE_C; 5263 else 5264 opcode = VGPU10_OPCODE_SAMPLE; 5265 5266 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5267 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5268 emit_src_register(emit, &tmp_src); /* projected coord */ 5269 emit_resource_register(emit, unit); 5270 emit_sampler_register(emit, unit); 5271 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5272 emit_tex_compare_refcoord(emit, target, &tmp_src); 5273 } 5274 end_emit_instruction(emit); 5275 5276 end_tex_swizzle(emit, &swz_info); 5277 5278 free_temp_indexes(emit); 5279 5280 return TRUE; 5281} 5282 5283 5284/* 5285 * Emit code for TGSI_OPCODE_XPD instruction. 5286 */ 5287static boolean 5288emit_xpd(struct svga_shader_emitter_v10 *emit, 5289 const struct tgsi_full_instruction *inst) 5290{ 5291 /* dst.x = src0.y * src1.z - src1.y * src0.z 5292 * dst.y = src0.z * src1.x - src1.z * src0.x 5293 * dst.z = src0.x * src1.y - src1.x * src0.y 5294 * dst.w = 1 5295 */ 5296 struct tgsi_full_src_register s0_xxxx = 5297 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 5298 struct tgsi_full_src_register s0_yyyy = 5299 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 5300 struct tgsi_full_src_register s0_zzzz = 5301 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 5302 5303 struct tgsi_full_src_register s1_xxxx = 5304 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 5305 struct tgsi_full_src_register s1_yyyy = 5306 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 5307 struct tgsi_full_src_register s1_zzzz = 5308 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z); 5309 5310 unsigned tmp1 = get_temp_index(emit); 5311 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 5312 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 5313 5314 unsigned tmp2 = get_temp_index(emit); 5315 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 5316 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 5317 struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src); 5318 5319 unsigned tmp3 = get_temp_index(emit); 5320 struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3); 5321 struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3); 5322 struct tgsi_full_dst_register tmp3_dst_x = 5323 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X); 5324 struct tgsi_full_dst_register tmp3_dst_y = 5325 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y); 5326 struct tgsi_full_dst_register tmp3_dst_z = 5327 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z); 5328 struct tgsi_full_dst_register tmp3_dst_w = 5329 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W); 5330 5331 /* Note: we put all the intermediate computations into tmp3 in case 5332 * the XPD dest register is that same as one of the src regs (in which 5333 * case we could clobber a src reg before we're done with it) . 5334 * 5335 * Note: we could get by with just one temp register instead of three 5336 * since we're doing scalar operations and there's enough room in one 5337 * temp for everything. 5338 */ 5339 5340 /* MUL tmp1, src0.y, src1.z */ 5341 /* MUL tmp2, src1.y, src0.z */ 5342 /* ADD tmp3.x, tmp1, -tmp2 */ 5343 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 5344 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, 5345 &s0_yyyy, &s1_zzzz, FALSE); 5346 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, 5347 &s1_yyyy, &s0_zzzz, FALSE); 5348 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x, 5349 &tmp1_src, &neg_tmp2_src, FALSE); 5350 } 5351 5352 /* MUL tmp1, src0.z, src1.x */ 5353 /* MUL tmp2, src1.z, src0.x */ 5354 /* ADD tmp3.y, tmp1, -tmp2 */ 5355 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 5356 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz, 5357 &s1_xxxx, FALSE); 5358 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz, 5359 &s0_xxxx, FALSE); 5360 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y, 5361 &tmp1_src, &neg_tmp2_src, FALSE); 5362 } 5363 5364 /* MUL tmp1, src0.x, src1.y */ 5365 /* MUL tmp2, src1.x, src0.y */ 5366 /* ADD tmp3.z, tmp1, -tmp2 */ 5367 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 5368 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx, 5369 &s1_yyyy, FALSE); 5370 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx, 5371 &s0_yyyy, FALSE); 5372 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z, 5373 &tmp1_src, &neg_tmp2_src, FALSE); 5374 } 5375 5376 /* MOV tmp3.w, 1.0 */ 5377 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 5378 struct tgsi_full_src_register one = 5379 make_immediate_reg_float(emit, 1.0f); 5380 5381 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE); 5382 } 5383 5384 /* MOV dst, tmp3 */ 5385 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src, 5386 inst->Instruction.Saturate); 5387 5388 5389 free_temp_indexes(emit); 5390 5391 return TRUE; 5392} 5393 5394 5395/** 5396 * Emit code for TGSI_OPCODE_TXD (explicit derivatives) 5397 */ 5398static boolean 5399emit_txd(struct svga_shader_emitter_v10 *emit, 5400 const struct tgsi_full_instruction *inst) 5401{ 5402 const uint unit = inst->Src[3].Register.Index; 5403 unsigned target = inst->Texture.Texture; 5404 int offsets[3]; 5405 struct tgsi_full_src_register coord; 5406 struct tex_swizzle_info swz_info; 5407 5408 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5409 &swz_info); 5410 5411 get_texel_offsets(emit, inst, offsets); 5412 5413 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5414 5415 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ 5416 begin_emit_instruction(emit); 5417 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, 5418 inst->Instruction.Saturate, offsets); 5419 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5420 emit_src_register(emit, &coord); 5421 emit_resource_register(emit, unit); 5422 emit_sampler_register(emit, unit); 5423 emit_src_register(emit, &inst->Src[1]); /* Xderiv */ 5424 emit_src_register(emit, &inst->Src[2]); /* Yderiv */ 5425 end_emit_instruction(emit); 5426 5427 end_tex_swizzle(emit, &swz_info); 5428 5429 free_temp_indexes(emit); 5430 5431 return TRUE; 5432} 5433 5434 5435/** 5436 * Emit code for TGSI_OPCODE_TXF (texel fetch) 5437 */ 5438static boolean 5439emit_txf(struct svga_shader_emitter_v10 *emit, 5440 const struct tgsi_full_instruction *inst) 5441{ 5442 const uint unit = inst->Src[1].Register.Index; 5443 const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture); 5444 int offsets[3]; 5445 struct tex_swizzle_info swz_info; 5446 5447 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5448 5449 get_texel_offsets(emit, inst, offsets); 5450 5451 if (msaa) { 5452 /* Fetch one sample from an MSAA texture */ 5453 struct tgsi_full_src_register sampleIndex = 5454 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5455 /* LD_MS dst, coord(s0), resource, sampleIndex */ 5456 begin_emit_instruction(emit); 5457 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, 5458 inst->Instruction.Saturate, offsets); 5459 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5460 emit_src_register(emit, &inst->Src[0]); 5461 emit_resource_register(emit, unit); 5462 emit_src_register(emit, &sampleIndex); 5463 end_emit_instruction(emit); 5464 } 5465 else { 5466 /* Fetch one texel specified by integer coordinate */ 5467 /* LD dst, coord(s0), resource */ 5468 begin_emit_instruction(emit); 5469 emit_sample_opcode(emit, VGPU10_OPCODE_LD, 5470 inst->Instruction.Saturate, offsets); 5471 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5472 emit_src_register(emit, &inst->Src[0]); 5473 emit_resource_register(emit, unit); 5474 end_emit_instruction(emit); 5475 } 5476 5477 end_tex_swizzle(emit, &swz_info); 5478 5479 free_temp_indexes(emit); 5480 5481 return TRUE; 5482} 5483 5484 5485/** 5486 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) 5487 * or TGSI_OPCODE_TXB2 (for cube shadow maps). 5488 */ 5489static boolean 5490emit_txl_txb(struct svga_shader_emitter_v10 *emit, 5491 const struct tgsi_full_instruction *inst) 5492{ 5493 unsigned target = inst->Texture.Texture; 5494 unsigned opcode, unit; 5495 int offsets[3]; 5496 struct tgsi_full_src_register coord, lod_bias; 5497 struct tex_swizzle_info swz_info; 5498 5499 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || 5500 inst->Instruction.Opcode == TGSI_OPCODE_TXB || 5501 inst->Instruction.Opcode == TGSI_OPCODE_TXB2); 5502 5503 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { 5504 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 5505 unit = inst->Src[2].Register.Index; 5506 } 5507 else { 5508 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5509 unit = inst->Src[1].Register.Index; 5510 } 5511 5512 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5513 &swz_info); 5514 5515 get_texel_offsets(emit, inst, offsets); 5516 5517 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5518 5519 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ 5520 begin_emit_instruction(emit); 5521 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { 5522 opcode = VGPU10_OPCODE_SAMPLE_L; 5523 } 5524 else { 5525 opcode = VGPU10_OPCODE_SAMPLE_B; 5526 } 5527 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5528 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5529 emit_src_register(emit, &coord); 5530 emit_resource_register(emit, unit); 5531 emit_sampler_register(emit, unit); 5532 emit_src_register(emit, &lod_bias); 5533 end_emit_instruction(emit); 5534 5535 end_tex_swizzle(emit, &swz_info); 5536 5537 free_temp_indexes(emit); 5538 5539 return TRUE; 5540} 5541 5542 5543/** 5544 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. 5545 */ 5546static boolean 5547emit_txq(struct svga_shader_emitter_v10 *emit, 5548 const struct tgsi_full_instruction *inst) 5549{ 5550 const uint unit = inst->Src[1].Register.Index; 5551 5552 if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) { 5553 /* RESINFO does not support querying texture buffers, so we instead 5554 * store texture buffer sizes in shader constants, then copy them to 5555 * implement TXQ instead of emitting RESINFO. 5556 * MOV dst, const[texture_buffer_size_index[unit]] 5557 */ 5558 struct tgsi_full_src_register size_src = 5559 make_src_const_reg(emit->texture_buffer_size_index[unit]); 5560 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src, 5561 FALSE); 5562 } else { 5563 /* RESINFO dst, srcMipLevel, resource */ 5564 begin_emit_instruction(emit); 5565 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); 5566 emit_dst_register(emit, &inst->Dst[0]); 5567 emit_src_register(emit, &inst->Src[0]); 5568 emit_resource_register(emit, unit); 5569 end_emit_instruction(emit); 5570 } 5571 5572 free_temp_indexes(emit); 5573 5574 return TRUE; 5575} 5576 5577 5578/** 5579 * Emit a simple instruction (like ADD, MUL, MIN, etc). 5580 */ 5581static boolean 5582emit_simple(struct svga_shader_emitter_v10 *emit, 5583 const struct tgsi_full_instruction *inst) 5584{ 5585 const unsigned opcode = inst->Instruction.Opcode; 5586 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5587 unsigned i; 5588 5589 begin_emit_instruction(emit); 5590 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5591 inst->Instruction.Saturate); 5592 for (i = 0; i < op->num_dst; i++) { 5593 emit_dst_register(emit, &inst->Dst[i]); 5594 } 5595 for (i = 0; i < op->num_src; i++) { 5596 emit_src_register(emit, &inst->Src[i]); 5597 } 5598 end_emit_instruction(emit); 5599 5600 return TRUE; 5601} 5602 5603 5604/** 5605 * We only special case the MOV instruction to try to detect constant 5606 * color writes in the fragment shader. 5607 */ 5608static boolean 5609emit_mov(struct svga_shader_emitter_v10 *emit, 5610 const struct tgsi_full_instruction *inst) 5611{ 5612 const struct tgsi_full_src_register *src = &inst->Src[0]; 5613 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 5614 5615 if (emit->unit == PIPE_SHADER_FRAGMENT && 5616 dst->Register.File == TGSI_FILE_OUTPUT && 5617 dst->Register.Index == 0 && 5618 src->Register.File == TGSI_FILE_CONSTANT && 5619 !src->Register.Indirect) { 5620 emit->constant_color_output = TRUE; 5621 } 5622 5623 return emit_simple(emit, inst); 5624} 5625 5626 5627/** 5628 * Emit a simple VGPU10 instruction which writes to multiple dest registers, 5629 * where TGSI only uses one dest register. 5630 */ 5631static boolean 5632emit_simple_1dst(struct svga_shader_emitter_v10 *emit, 5633 const struct tgsi_full_instruction *inst, 5634 unsigned dst_count, 5635 unsigned dst_index) 5636{ 5637 const unsigned opcode = inst->Instruction.Opcode; 5638 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5639 unsigned i; 5640 5641 begin_emit_instruction(emit); 5642 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5643 inst->Instruction.Saturate); 5644 5645 for (i = 0; i < dst_count; i++) { 5646 if (i == dst_index) { 5647 emit_dst_register(emit, &inst->Dst[0]); 5648 } else { 5649 emit_null_dst_register(emit); 5650 } 5651 } 5652 5653 for (i = 0; i < op->num_src; i++) { 5654 emit_src_register(emit, &inst->Src[i]); 5655 } 5656 end_emit_instruction(emit); 5657 5658 return TRUE; 5659} 5660 5661 5662/** 5663 * Translate a single TGSI instruction to VGPU10. 5664 */ 5665static boolean 5666emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, 5667 unsigned inst_number, 5668 const struct tgsi_full_instruction *inst) 5669{ 5670 const unsigned opcode = inst->Instruction.Opcode; 5671 5672 switch (opcode) { 5673 case TGSI_OPCODE_ADD: 5674 case TGSI_OPCODE_AND: 5675 case TGSI_OPCODE_BGNLOOP: 5676 case TGSI_OPCODE_BRK: 5677 case TGSI_OPCODE_CEIL: 5678 case TGSI_OPCODE_CONT: 5679 case TGSI_OPCODE_DDX: 5680 case TGSI_OPCODE_DDY: 5681 case TGSI_OPCODE_DIV: 5682 case TGSI_OPCODE_DP2: 5683 case TGSI_OPCODE_DP3: 5684 case TGSI_OPCODE_DP4: 5685 case TGSI_OPCODE_ELSE: 5686 case TGSI_OPCODE_ENDIF: 5687 case TGSI_OPCODE_ENDLOOP: 5688 case TGSI_OPCODE_ENDSUB: 5689 case TGSI_OPCODE_F2I: 5690 case TGSI_OPCODE_F2U: 5691 case TGSI_OPCODE_FLR: 5692 case TGSI_OPCODE_FRC: 5693 case TGSI_OPCODE_FSEQ: 5694 case TGSI_OPCODE_FSGE: 5695 case TGSI_OPCODE_FSLT: 5696 case TGSI_OPCODE_FSNE: 5697 case TGSI_OPCODE_I2F: 5698 case TGSI_OPCODE_IMAX: 5699 case TGSI_OPCODE_IMIN: 5700 case TGSI_OPCODE_INEG: 5701 case TGSI_OPCODE_ISGE: 5702 case TGSI_OPCODE_ISHR: 5703 case TGSI_OPCODE_ISLT: 5704 case TGSI_OPCODE_MAD: 5705 case TGSI_OPCODE_MAX: 5706 case TGSI_OPCODE_MIN: 5707 case TGSI_OPCODE_MUL: 5708 case TGSI_OPCODE_NOP: 5709 case TGSI_OPCODE_NOT: 5710 case TGSI_OPCODE_OR: 5711 case TGSI_OPCODE_RET: 5712 case TGSI_OPCODE_UADD: 5713 case TGSI_OPCODE_USEQ: 5714 case TGSI_OPCODE_USGE: 5715 case TGSI_OPCODE_USLT: 5716 case TGSI_OPCODE_UMIN: 5717 case TGSI_OPCODE_UMAD: 5718 case TGSI_OPCODE_UMAX: 5719 case TGSI_OPCODE_ROUND: 5720 case TGSI_OPCODE_SQRT: 5721 case TGSI_OPCODE_SHL: 5722 case TGSI_OPCODE_TRUNC: 5723 case TGSI_OPCODE_U2F: 5724 case TGSI_OPCODE_UCMP: 5725 case TGSI_OPCODE_USHR: 5726 case TGSI_OPCODE_USNE: 5727 case TGSI_OPCODE_XOR: 5728 /* simple instructions */ 5729 return emit_simple(emit, inst); 5730 5731 case TGSI_OPCODE_MOV: 5732 return emit_mov(emit, inst); 5733 case TGSI_OPCODE_EMIT: 5734 return emit_vertex(emit, inst); 5735 case TGSI_OPCODE_ENDPRIM: 5736 return emit_endprim(emit, inst); 5737 case TGSI_OPCODE_IABS: 5738 return emit_iabs(emit, inst); 5739 case TGSI_OPCODE_ARL: 5740 /* fall-through */ 5741 case TGSI_OPCODE_UARL: 5742 return emit_arl_uarl(emit, inst); 5743 case TGSI_OPCODE_BGNSUB: 5744 /* no-op */ 5745 return TRUE; 5746 case TGSI_OPCODE_CAL: 5747 return emit_cal(emit, inst); 5748 case TGSI_OPCODE_CMP: 5749 return emit_cmp(emit, inst); 5750 case TGSI_OPCODE_COS: 5751 return emit_sincos(emit, inst); 5752 case TGSI_OPCODE_DP2A: 5753 return emit_dp2a(emit, inst); 5754 case TGSI_OPCODE_DPH: 5755 return emit_dph(emit, inst); 5756 case TGSI_OPCODE_DST: 5757 return emit_dst(emit, inst); 5758 case TGSI_OPCODE_EX2: 5759 return emit_ex2(emit, inst); 5760 case TGSI_OPCODE_EXP: 5761 return emit_exp(emit, inst); 5762 case TGSI_OPCODE_IF: 5763 return emit_if(emit, inst); 5764 case TGSI_OPCODE_KILL: 5765 return emit_kill(emit, inst); 5766 case TGSI_OPCODE_KILL_IF: 5767 return emit_kill_if(emit, inst); 5768 case TGSI_OPCODE_LG2: 5769 return emit_lg2(emit, inst); 5770 case TGSI_OPCODE_LIT: 5771 return emit_lit(emit, inst); 5772 case TGSI_OPCODE_LOG: 5773 return emit_log(emit, inst); 5774 case TGSI_OPCODE_LRP: 5775 return emit_lrp(emit, inst); 5776 case TGSI_OPCODE_POW: 5777 return emit_pow(emit, inst); 5778 case TGSI_OPCODE_RCP: 5779 return emit_rcp(emit, inst); 5780 case TGSI_OPCODE_RSQ: 5781 return emit_rsq(emit, inst); 5782 case TGSI_OPCODE_SAMPLE: 5783 return emit_sample(emit, inst); 5784 case TGSI_OPCODE_SCS: 5785 return emit_scs(emit, inst); 5786 case TGSI_OPCODE_SEQ: 5787 return emit_seq(emit, inst); 5788 case TGSI_OPCODE_SGE: 5789 return emit_sge(emit, inst); 5790 case TGSI_OPCODE_SGT: 5791 return emit_sgt(emit, inst); 5792 case TGSI_OPCODE_SIN: 5793 return emit_sincos(emit, inst); 5794 case TGSI_OPCODE_SLE: 5795 return emit_sle(emit, inst); 5796 case TGSI_OPCODE_SLT: 5797 return emit_slt(emit, inst); 5798 case TGSI_OPCODE_SNE: 5799 return emit_sne(emit, inst); 5800 case TGSI_OPCODE_SSG: 5801 return emit_ssg(emit, inst); 5802 case TGSI_OPCODE_ISSG: 5803 return emit_issg(emit, inst); 5804 case TGSI_OPCODE_SUB: 5805 return emit_sub(emit, inst); 5806 case TGSI_OPCODE_TEX: 5807 return emit_tex(emit, inst); 5808 case TGSI_OPCODE_TXP: 5809 return emit_txp(emit, inst); 5810 case TGSI_OPCODE_TXB: 5811 case TGSI_OPCODE_TXB2: 5812 case TGSI_OPCODE_TXL: 5813 return emit_txl_txb(emit, inst); 5814 case TGSI_OPCODE_TXD: 5815 return emit_txd(emit, inst); 5816 case TGSI_OPCODE_TXF: 5817 return emit_txf(emit, inst); 5818 case TGSI_OPCODE_TXQ: 5819 return emit_txq(emit, inst); 5820 case TGSI_OPCODE_UIF: 5821 return emit_if(emit, inst); 5822 case TGSI_OPCODE_XPD: 5823 return emit_xpd(emit, inst); 5824 case TGSI_OPCODE_UMUL_HI: 5825 case TGSI_OPCODE_IMUL_HI: 5826 case TGSI_OPCODE_UDIV: 5827 case TGSI_OPCODE_IDIV: 5828 /* These cases use only the FIRST of two destination registers */ 5829 return emit_simple_1dst(emit, inst, 2, 0); 5830 case TGSI_OPCODE_UMUL: 5831 case TGSI_OPCODE_UMOD: 5832 case TGSI_OPCODE_MOD: 5833 /* These cases use only the SECOND of two destination registers */ 5834 return emit_simple_1dst(emit, inst, 2, 1); 5835 case TGSI_OPCODE_END: 5836 if (!emit_post_helpers(emit)) 5837 return FALSE; 5838 return emit_simple(emit, inst); 5839 5840 default: 5841 debug_printf("Unimplemented tgsi instruction %s\n", 5842 tgsi_get_opcode_name(opcode)); 5843 return FALSE; 5844 } 5845 5846 return TRUE; 5847} 5848 5849 5850/** 5851 * Emit the extra instructions to adjust the vertex position. 5852 * There are two possible adjustments: 5853 * 1. Converting from Gallium to VGPU10 coordinate space by applying the 5854 * "prescale" and "pretranslate" values. 5855 * 2. Undoing the viewport transformation when we use the swtnl/draw path. 5856 * \param vs_pos_tmp_index which temporary register contains the vertex pos. 5857 */ 5858static void 5859emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, 5860 unsigned vs_pos_tmp_index) 5861{ 5862 struct tgsi_full_src_register tmp_pos_src; 5863 struct tgsi_full_dst_register pos_dst; 5864 5865 /* Don't bother to emit any extra vertex instructions if vertex position is 5866 * not written out 5867 */ 5868 if (emit->vposition.out_index == INVALID_INDEX) 5869 return; 5870 5871 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index); 5872 pos_dst = make_dst_output_reg(emit->vposition.out_index); 5873 5874 /* If non-adjusted vertex position register index 5875 * is valid, copy the vertex position from the temporary 5876 * vertex position register before it is modified by the 5877 * prescale computation. 5878 */ 5879 if (emit->vposition.so_index != INVALID_INDEX) { 5880 struct tgsi_full_dst_register pos_so_dst = 5881 make_dst_output_reg(emit->vposition.so_index); 5882 5883 /* MOV pos_so, tmp_pos */ 5884 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, 5885 &tmp_pos_src, FALSE); 5886 } 5887 5888 if (emit->vposition.need_prescale) { 5889 /* This code adjusts the vertex position to match the VGPU10 convention. 5890 * If p is the position computed by the shader (usually by applying the 5891 * modelview and projection matrices), the new position q is computed by: 5892 * 5893 * q.x = p.w * trans.x + p.x * scale.x 5894 * q.y = p.w * trans.y + p.y * scale.y 5895 * q.z = p.w * trans.z + p.z * scale.z; 5896 * q.w = p.w * trans.w + p.w; 5897 */ 5898 struct tgsi_full_src_register tmp_pos_src_w = 5899 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5900 struct tgsi_full_dst_register tmp_pos_dst = 5901 make_dst_temp_reg(vs_pos_tmp_index); 5902 struct tgsi_full_dst_register tmp_pos_dst_xyz = 5903 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ); 5904 5905 struct tgsi_full_src_register prescale_scale = 5906 make_src_const_reg(emit->vposition.prescale_scale_index); 5907 struct tgsi_full_src_register prescale_trans = 5908 make_src_const_reg(emit->vposition.prescale_trans_index); 5909 5910 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */ 5911 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz, 5912 &tmp_pos_src, &prescale_scale, FALSE); 5913 5914 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */ 5915 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w, 5916 &prescale_trans, &tmp_pos_src, FALSE); 5917 } 5918 else if (emit->key.vs.undo_viewport) { 5919 /* This code computes the final vertex position from the temporary 5920 * vertex position by undoing the viewport transformation and the 5921 * divide-by-W operation (we convert window coords back to clip coords). 5922 * This is needed when we use the 'draw' module for fallbacks. 5923 * If p is the temp pos in window coords, then the NDC coord q is: 5924 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w 5925 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w 5926 * q.z = p.z * p.w 5927 * q.w = p.w 5928 * CONST[vs_viewport_index] contains: 5929 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans } 5930 */ 5931 struct tgsi_full_dst_register tmp_pos_dst = 5932 make_dst_temp_reg(vs_pos_tmp_index); 5933 struct tgsi_full_dst_register tmp_pos_dst_xy = 5934 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY); 5935 struct tgsi_full_src_register tmp_pos_src_wwww = 5936 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5937 5938 struct tgsi_full_dst_register pos_dst_xyz = 5939 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ); 5940 struct tgsi_full_dst_register pos_dst_w = 5941 writemask_dst(&pos_dst, TGSI_WRITEMASK_W); 5942 5943 struct tgsi_full_src_register vp_xyzw = 5944 make_src_const_reg(emit->vs.viewport_index); 5945 struct tgsi_full_src_register vp_zwww = 5946 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, 5947 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 5948 5949 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */ 5950 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy, 5951 &tmp_pos_src, &vp_zwww, FALSE); 5952 5953 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */ 5954 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy, 5955 &tmp_pos_src, &vp_xyzw, FALSE); 5956 5957 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */ 5958 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz, 5959 &tmp_pos_src, &tmp_pos_src_wwww, FALSE); 5960 5961 /* MOV pos.w, tmp_pos.w */ 5962 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, 5963 &tmp_pos_src, FALSE); 5964 } 5965 else if (vs_pos_tmp_index != INVALID_INDEX) { 5966 /* This code is to handle the case where the temporary vertex 5967 * position register is created when the vertex shader has stream 5968 * output and prescale is disabled because rasterization is to be 5969 * discarded. 5970 */ 5971 struct tgsi_full_dst_register pos_dst = 5972 make_dst_output_reg(emit->vposition.out_index); 5973 5974 /* MOV pos, tmp_pos */ 5975 begin_emit_instruction(emit); 5976 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5977 emit_dst_register(emit, &pos_dst); 5978 emit_src_register(emit, &tmp_pos_src); 5979 end_emit_instruction(emit); 5980 } 5981} 5982 5983static void 5984emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) 5985{ 5986 if (emit->clip_mode == CLIP_DISTANCE) { 5987 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */ 5988 emit_clip_distance_instructions(emit); 5989 5990 } else if (emit->clip_mode == CLIP_VERTEX) { 5991 /* Convert TGSI CLIPVERTEX to CLIPDIST */ 5992 emit_clip_vertex_instructions(emit); 5993 } 5994 5995 /** 5996 * Emit vertex position and take care of legacy user planes only if 5997 * there is a valid vertex position register index. 5998 * This is to take care of the case 5999 * where the shader doesn't output vertex position. Then in 6000 * this case, don't bother to emit more vertex instructions. 6001 */ 6002 if (emit->vposition.out_index == INVALID_INDEX) 6003 return; 6004 6005 /** 6006 * Emit per-vertex clipping instructions for legacy user defined clip planes. 6007 * NOTE: we must emit the clip distance instructions before the 6008 * emit_vpos_instructions() call since the later function will change 6009 * the TEMP[vs_pos_tmp_index] value. 6010 */ 6011 if (emit->clip_mode == CLIP_LEGACY) { 6012 /* Emit CLIPDIST for legacy user defined clip planes */ 6013 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index); 6014 } 6015} 6016 6017 6018/** 6019 * Emit extra per-vertex instructions. This includes clip-coordinate 6020 * space conversion and computing clip distances. This is called for 6021 * each GS emit-vertex instruction and at the end of VS translation. 6022 */ 6023static void 6024emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) 6025{ 6026 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; 6027 6028 /* Emit clipping instructions based on clipping mode */ 6029 emit_clipping_instructions(emit); 6030 6031 /** 6032 * Reset the temporary vertex position register index 6033 * so that emit_dst_register() will use the real vertex position output 6034 */ 6035 emit->vposition.tmp_index = INVALID_INDEX; 6036 6037 /* Emit vertex position instructions */ 6038 emit_vpos_instructions(emit, vs_pos_tmp_index); 6039 6040 /* Restore original vposition.tmp_index value for the next GS vertex. 6041 * It doesn't matter for VS. 6042 */ 6043 emit->vposition.tmp_index = vs_pos_tmp_index; 6044} 6045 6046/** 6047 * Translate the TGSI_OPCODE_EMIT GS instruction. 6048 */ 6049static boolean 6050emit_vertex(struct svga_shader_emitter_v10 *emit, 6051 const struct tgsi_full_instruction *inst) 6052{ 6053 unsigned ret = TRUE; 6054 6055 assert(emit->unit == PIPE_SHADER_GEOMETRY); 6056 6057 emit_vertex_instructions(emit); 6058 6059 /* We can't use emit_simple() because the TGSI instruction has one 6060 * operand (vertex stream number) which we must ignore for VGPU10. 6061 */ 6062 begin_emit_instruction(emit); 6063 emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); 6064 end_emit_instruction(emit); 6065 6066 return ret; 6067} 6068 6069 6070/** 6071 * Emit the extra code to convert from VGPU10's boolean front-face 6072 * register to TGSI's signed front-face register. 6073 * 6074 * TODO: Make temporary front-face register a scalar. 6075 */ 6076static void 6077emit_frontface_instructions(struct svga_shader_emitter_v10 *emit) 6078{ 6079 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6080 6081 if (emit->fs.face_input_index != INVALID_INDEX) { 6082 /* convert vgpu10 boolean face register to gallium +/-1 value */ 6083 struct tgsi_full_dst_register tmp_dst = 6084 make_dst_temp_reg(emit->fs.face_tmp_index); 6085 struct tgsi_full_src_register one = 6086 make_immediate_reg_float(emit, 1.0f); 6087 struct tgsi_full_src_register neg_one = 6088 make_immediate_reg_float(emit, -1.0f); 6089 6090 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */ 6091 begin_emit_instruction(emit); 6092 emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE); 6093 emit_dst_register(emit, &tmp_dst); 6094 emit_face_register(emit); 6095 emit_src_register(emit, &one); 6096 emit_src_register(emit, &neg_one); 6097 end_emit_instruction(emit); 6098 } 6099} 6100 6101 6102/** 6103 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w. 6104 */ 6105static void 6106emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) 6107{ 6108 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6109 6110 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 6111 struct tgsi_full_dst_register tmp_dst = 6112 make_dst_temp_reg(emit->fs.fragcoord_tmp_index); 6113 struct tgsi_full_dst_register tmp_dst_xyz = 6114 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ); 6115 struct tgsi_full_dst_register tmp_dst_w = 6116 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 6117 struct tgsi_full_src_register one = 6118 make_immediate_reg_float(emit, 1.0f); 6119 struct tgsi_full_src_register fragcoord = 6120 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index); 6121 6122 /* save the input index */ 6123 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index; 6124 /* set to invalid to prevent substitution in emit_src_register() */ 6125 emit->fs.fragcoord_input_index = INVALID_INDEX; 6126 6127 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */ 6128 begin_emit_instruction(emit); 6129 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 6130 emit_dst_register(emit, &tmp_dst_xyz); 6131 emit_src_register(emit, &fragcoord); 6132 end_emit_instruction(emit); 6133 6134 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */ 6135 begin_emit_instruction(emit); 6136 emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE); 6137 emit_dst_register(emit, &tmp_dst_w); 6138 emit_src_register(emit, &one); 6139 emit_src_register(emit, &fragcoord); 6140 end_emit_instruction(emit); 6141 6142 /* restore saved value */ 6143 emit->fs.fragcoord_input_index = fragcoord_input_index; 6144 } 6145} 6146 6147 6148/** 6149 * Emit extra instructions to adjust VS inputs/attributes. This can 6150 * mean casting a vertex attribute from int to float or setting the 6151 * W component to 1, or both. 6152 */ 6153static void 6154emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) 6155{ 6156 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1; 6157 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof; 6158 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof; 6159 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra; 6160 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm; 6161 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled; 6162 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled; 6163 6164 unsigned adjust_mask = (save_w_1_mask | 6165 save_itof_mask | 6166 save_utof_mask | 6167 save_is_bgra_mask | 6168 save_puint_to_snorm_mask | 6169 save_puint_to_uscaled_mask | 6170 save_puint_to_sscaled_mask); 6171 6172 assert(emit->unit == PIPE_SHADER_VERTEX); 6173 6174 if (adjust_mask) { 6175 struct tgsi_full_src_register one = 6176 make_immediate_reg_float(emit, 1.0f); 6177 6178 struct tgsi_full_src_register one_int = 6179 make_immediate_reg_int(emit, 1); 6180 6181 /* We need to turn off these bitmasks while emitting the 6182 * instructions below, then restore them afterward. 6183 */ 6184 emit->key.vs.adjust_attrib_w_1 = 0; 6185 emit->key.vs.adjust_attrib_itof = 0; 6186 emit->key.vs.adjust_attrib_utof = 0; 6187 emit->key.vs.attrib_is_bgra = 0; 6188 emit->key.vs.attrib_puint_to_snorm = 0; 6189 emit->key.vs.attrib_puint_to_uscaled = 0; 6190 emit->key.vs.attrib_puint_to_sscaled = 0; 6191 6192 while (adjust_mask) { 6193 unsigned index = u_bit_scan(&adjust_mask); 6194 6195 /* skip the instruction if this vertex attribute is not being used */ 6196 if (emit->info.input_usage_mask[index] == 0) 6197 continue; 6198 6199 unsigned tmp = emit->vs.adjusted_input[index]; 6200 struct tgsi_full_src_register input_src = 6201 make_src_reg(TGSI_FILE_INPUT, index); 6202 6203 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6204 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6205 struct tgsi_full_dst_register tmp_dst_w = 6206 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 6207 6208 /* ITOF/UTOF/MOV tmp, input[index] */ 6209 if (save_itof_mask & (1 << index)) { 6210 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, 6211 &tmp_dst, &input_src, FALSE); 6212 } 6213 else if (save_utof_mask & (1 << index)) { 6214 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF, 6215 &tmp_dst, &input_src, FALSE); 6216 } 6217 else if (save_puint_to_snorm_mask & (1 << index)) { 6218 emit_puint_to_snorm(emit, &tmp_dst, &input_src); 6219 } 6220 else if (save_puint_to_uscaled_mask & (1 << index)) { 6221 emit_puint_to_uscaled(emit, &tmp_dst, &input_src); 6222 } 6223 else if (save_puint_to_sscaled_mask & (1 << index)) { 6224 emit_puint_to_sscaled(emit, &tmp_dst, &input_src); 6225 } 6226 else { 6227 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index)); 6228 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6229 &tmp_dst, &input_src, FALSE); 6230 } 6231 6232 if (save_is_bgra_mask & (1 << index)) { 6233 emit_swap_r_b(emit, &tmp_dst, &tmp_src); 6234 } 6235 6236 if (save_w_1_mask & (1 << index)) { 6237 /* MOV tmp.w, 1.0 */ 6238 if (emit->key.vs.attrib_is_pure_int & (1 << index)) { 6239 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6240 &tmp_dst_w, &one_int, FALSE); 6241 } 6242 else { 6243 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6244 &tmp_dst_w, &one, FALSE); 6245 } 6246 } 6247 } 6248 6249 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask; 6250 emit->key.vs.adjust_attrib_itof = save_itof_mask; 6251 emit->key.vs.adjust_attrib_utof = save_utof_mask; 6252 emit->key.vs.attrib_is_bgra = save_is_bgra_mask; 6253 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask; 6254 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask; 6255 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask; 6256 } 6257} 6258 6259 6260/** 6261 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed 6262 * to implement some instructions. We pre-allocate those values here 6263 * in the immediate constant buffer. 6264 */ 6265static void 6266alloc_common_immediates(struct svga_shader_emitter_v10 *emit) 6267{ 6268 unsigned n = 0; 6269 6270 emit->common_immediate_pos[n++] = 6271 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); 6272 6273 emit->common_immediate_pos[n++] = 6274 alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f); 6275 6276 emit->common_immediate_pos[n++] = 6277 alloc_immediate_int4(emit, 0, 1, 0, -1); 6278 6279 if (emit->key.vs.attrib_puint_to_snorm) { 6280 emit->common_immediate_pos[n++] = 6281 alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 6282 } 6283 6284 if (emit->key.vs.attrib_puint_to_uscaled) { 6285 emit->common_immediate_pos[n++] = 6286 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); 6287 } 6288 6289 if (emit->key.vs.attrib_puint_to_sscaled) { 6290 emit->common_immediate_pos[n++] = 6291 alloc_immediate_int4(emit, 22, 12, 2, 0); 6292 6293 emit->common_immediate_pos[n++] = 6294 alloc_immediate_int4(emit, 22, 30, 0, 0); 6295 } 6296 6297 assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); 6298 emit->num_common_immediates = n; 6299} 6300 6301 6302/** 6303 * Emit any extra/helper declarations/code that we might need between 6304 * the declaration section and code section. 6305 */ 6306static boolean 6307emit_pre_helpers(struct svga_shader_emitter_v10 *emit) 6308{ 6309 /* Properties */ 6310 if (emit->unit == PIPE_SHADER_GEOMETRY) 6311 emit_property_instructions(emit); 6312 6313 /* Declare inputs */ 6314 if (!emit_input_declarations(emit)) 6315 return FALSE; 6316 6317 /* Declare outputs */ 6318 if (!emit_output_declarations(emit)) 6319 return FALSE; 6320 6321 /* Declare temporary registers */ 6322 emit_temporaries_declaration(emit); 6323 6324 /* Declare constant registers */ 6325 emit_constant_declaration(emit); 6326 6327 /* Declare samplers and resources */ 6328 emit_sampler_declarations(emit); 6329 emit_resource_declarations(emit); 6330 6331 /* Declare clip distance output registers */ 6332 if (emit->unit == PIPE_SHADER_VERTEX || 6333 emit->unit == PIPE_SHADER_GEOMETRY) { 6334 emit_clip_distance_declarations(emit); 6335 } 6336 6337 alloc_common_immediates(emit); 6338 6339 if (emit->unit == PIPE_SHADER_FRAGMENT && 6340 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6341 float alpha = emit->key.fs.alpha_ref; 6342 emit->fs.alpha_ref_index = 6343 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha); 6344 } 6345 6346 /* Now, emit the constant block containing all the immediates 6347 * declared by shader, as well as the extra ones seen above. 6348 */ 6349 emit_vgpu10_immediates_block(emit); 6350 6351 if (emit->unit == PIPE_SHADER_FRAGMENT) { 6352 emit_frontface_instructions(emit); 6353 emit_fragcoord_instructions(emit); 6354 } 6355 else if (emit->unit == PIPE_SHADER_VERTEX) { 6356 emit_vertex_attrib_instructions(emit); 6357 } 6358 6359 return TRUE; 6360} 6361 6362 6363/** 6364 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w 6365 * against the alpha reference value and discards the fragment if the 6366 * comparison fails. 6367 */ 6368static void 6369emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, 6370 unsigned fs_color_tmp_index) 6371{ 6372 /* compare output color's alpha to alpha ref and kill */ 6373 unsigned tmp = get_temp_index(emit); 6374 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6375 struct tgsi_full_src_register tmp_src_x = 6376 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 6377 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6378 struct tgsi_full_src_register color_src = 6379 make_src_temp_reg(fs_color_tmp_index); 6380 struct tgsi_full_src_register color_src_w = 6381 scalar_src(&color_src, TGSI_SWIZZLE_W); 6382 struct tgsi_full_src_register ref_src = 6383 make_src_immediate_reg(emit->fs.alpha_ref_index); 6384 struct tgsi_full_dst_register color_dst = 6385 make_dst_output_reg(emit->fs.color_out_index[0]); 6386 6387 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6388 6389 /* dst = src0 'alpha_func' src1 */ 6390 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst, 6391 &color_src_w, &ref_src); 6392 6393 /* DISCARD if dst.x == 0 */ 6394 begin_emit_instruction(emit); 6395 emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */ 6396 emit_src_register(emit, &tmp_src_x); 6397 end_emit_instruction(emit); 6398 6399 /* If we don't need to broadcast the color below or set fragments to 6400 * white, emit final color here. 6401 */ 6402 if (emit->key.fs.write_color0_to_n_cbufs <= 1 && 6403 !emit->key.fs.white_fragments) { 6404 /* MOV output.color, tempcolor */ 6405 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6406 &color_src, FALSE); /* XXX saturate? */ 6407 } 6408 6409 free_temp_indexes(emit); 6410} 6411 6412 6413/** 6414 * When we need to emit white for all fragments (for emulating XOR logicop 6415 * mode), this function copies white into the temporary color output register. 6416 */ 6417static void 6418emit_set_color_white(struct svga_shader_emitter_v10 *emit, 6419 unsigned fs_color_tmp_index) 6420{ 6421 struct tgsi_full_dst_register color_dst = 6422 make_dst_temp_reg(fs_color_tmp_index); 6423 struct tgsi_full_src_register white = 6424 make_immediate_reg_float(emit, 1.0f); 6425 6426 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE); 6427} 6428 6429 6430/** 6431 * Emit instructions for writing a single color output to multiple 6432 * color buffers. 6433 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or 6434 * when key.fs.white_fragments is true). 6435 * property is set and the number of render targets is greater than one. 6436 * \param fs_color_tmp_index index of the temp register that holds the 6437 * color to broadcast. 6438 */ 6439static void 6440emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, 6441 unsigned fs_color_tmp_index) 6442{ 6443 const unsigned n = emit->key.fs.write_color0_to_n_cbufs; 6444 unsigned i; 6445 struct tgsi_full_src_register color_src = 6446 make_src_temp_reg(fs_color_tmp_index); 6447 6448 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6449 6450 for (i = 0; i < n; i++) { 6451 unsigned output_reg = emit->fs.color_out_index[i]; 6452 struct tgsi_full_dst_register color_dst = 6453 make_dst_output_reg(output_reg); 6454 6455 /* Fill in this semantic here since we'll use it later in 6456 * emit_dst_register(). 6457 */ 6458 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR; 6459 6460 /* MOV output.color[i], tempcolor */ 6461 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6462 &color_src, FALSE); /* XXX saturate? */ 6463 } 6464} 6465 6466 6467/** 6468 * Emit extra helper code after the original shader code, but before the 6469 * last END/RET instruction. 6470 * For vertex shaders this means emitting the extra code to apply the 6471 * prescale scale/translation. 6472 */ 6473static boolean 6474emit_post_helpers(struct svga_shader_emitter_v10 *emit) 6475{ 6476 if (emit->unit == PIPE_SHADER_VERTEX) { 6477 emit_vertex_instructions(emit); 6478 } 6479 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 6480 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index; 6481 6482 /* We no longer want emit_dst_register() to substitute the 6483 * temporary fragment color register for the real color output. 6484 */ 6485 emit->fs.color_tmp_index = INVALID_INDEX; 6486 6487 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6488 emit_alpha_test_instructions(emit, fs_color_tmp_index); 6489 } 6490 if (emit->key.fs.white_fragments) { 6491 emit_set_color_white(emit, fs_color_tmp_index); 6492 } 6493 if (emit->key.fs.write_color0_to_n_cbufs > 1 || 6494 emit->key.fs.white_fragments) { 6495 emit_broadcast_color_instructions(emit, fs_color_tmp_index); 6496 } 6497 } 6498 6499 return TRUE; 6500} 6501 6502 6503/** 6504 * Translate the TGSI tokens into VGPU10 tokens. 6505 */ 6506static boolean 6507emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, 6508 const struct tgsi_token *tokens) 6509{ 6510 struct tgsi_parse_context parse; 6511 boolean ret = TRUE; 6512 boolean pre_helpers_emitted = FALSE; 6513 unsigned inst_number = 0; 6514 6515 tgsi_parse_init(&parse, tokens); 6516 6517 while (!tgsi_parse_end_of_tokens(&parse)) { 6518 tgsi_parse_token(&parse); 6519 6520 switch (parse.FullToken.Token.Type) { 6521 case TGSI_TOKEN_TYPE_IMMEDIATE: 6522 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate); 6523 if (!ret) 6524 goto done; 6525 break; 6526 6527 case TGSI_TOKEN_TYPE_DECLARATION: 6528 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration); 6529 if (!ret) 6530 goto done; 6531 break; 6532 6533 case TGSI_TOKEN_TYPE_INSTRUCTION: 6534 if (!pre_helpers_emitted) { 6535 ret = emit_pre_helpers(emit); 6536 if (!ret) 6537 goto done; 6538 pre_helpers_emitted = TRUE; 6539 } 6540 ret = emit_vgpu10_instruction(emit, inst_number++, 6541 &parse.FullToken.FullInstruction); 6542 if (!ret) 6543 goto done; 6544 break; 6545 6546 case TGSI_TOKEN_TYPE_PROPERTY: 6547 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty); 6548 if (!ret) 6549 goto done; 6550 break; 6551 6552 default: 6553 break; 6554 } 6555 } 6556 6557done: 6558 tgsi_parse_free(&parse); 6559 return ret; 6560} 6561 6562 6563/** 6564 * Emit the first VGPU10 shader tokens. 6565 */ 6566static boolean 6567emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) 6568{ 6569 VGPU10ProgramToken ptoken; 6570 6571 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ 6572 ptoken.majorVersion = 4; 6573 ptoken.minorVersion = 0; 6574 ptoken.programType = translate_shader_type(emit->unit); 6575 if (!emit_dword(emit, ptoken.value)) 6576 return FALSE; 6577 6578 /* Second token: total length of shader, in tokens. We can't fill this 6579 * in until we're all done. Emit zero for now. 6580 */ 6581 return emit_dword(emit, 0); 6582} 6583 6584 6585static boolean 6586emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) 6587{ 6588 VGPU10ProgramToken *tokens; 6589 6590 /* Replace the second token with total shader length */ 6591 tokens = (VGPU10ProgramToken *) emit->buf; 6592 tokens[1].value = emit_get_num_tokens(emit); 6593 6594 return TRUE; 6595} 6596 6597 6598/** 6599 * Modify the FS to read the BCOLORs and use the FACE register 6600 * to choose between the front/back colors. 6601 */ 6602static const struct tgsi_token * 6603transform_fs_twoside(const struct tgsi_token *tokens) 6604{ 6605 if (0) { 6606 debug_printf("Before tgsi_add_two_side ------------------\n"); 6607 tgsi_dump(tokens,0); 6608 } 6609 tokens = tgsi_add_two_side(tokens); 6610 if (0) { 6611 debug_printf("After tgsi_add_two_side ------------------\n"); 6612 tgsi_dump(tokens, 0); 6613 } 6614 return tokens; 6615} 6616 6617 6618/** 6619 * Modify the FS to do polygon stipple. 6620 */ 6621static const struct tgsi_token * 6622transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, 6623 const struct tgsi_token *tokens) 6624{ 6625 const struct tgsi_token *new_tokens; 6626 unsigned unit; 6627 6628 if (0) { 6629 debug_printf("Before pstipple ------------------\n"); 6630 tgsi_dump(tokens,0); 6631 } 6632 6633 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, 6634 TGSI_FILE_INPUT); 6635 6636 emit->fs.pstipple_sampler_unit = unit; 6637 6638 /* Setup texture state for stipple */ 6639 emit->sampler_target[unit] = TGSI_TEXTURE_2D; 6640 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; 6641 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; 6642 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; 6643 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; 6644 6645 if (0) { 6646 debug_printf("After pstipple ------------------\n"); 6647 tgsi_dump(new_tokens, 0); 6648 } 6649 6650 return new_tokens; 6651} 6652 6653/** 6654 * Modify the FS to support anti-aliasing point. 6655 */ 6656static const struct tgsi_token * 6657transform_fs_aapoint(const struct tgsi_token *tokens, 6658 int aa_coord_index) 6659{ 6660 if (0) { 6661 debug_printf("Before tgsi_add_aa_point ------------------\n"); 6662 tgsi_dump(tokens,0); 6663 } 6664 tokens = tgsi_add_aa_point(tokens, aa_coord_index); 6665 if (0) { 6666 debug_printf("After tgsi_add_aa_point ------------------\n"); 6667 tgsi_dump(tokens, 0); 6668 } 6669 return tokens; 6670} 6671 6672/** 6673 * This is the main entrypoint for the TGSI -> VPGU10 translator. 6674 */ 6675struct svga_shader_variant * 6676svga_tgsi_vgpu10_translate(struct svga_context *svga, 6677 const struct svga_shader *shader, 6678 const struct svga_compile_key *key, 6679 unsigned unit) 6680{ 6681 struct svga_shader_variant *variant = NULL; 6682 struct svga_shader_emitter_v10 *emit; 6683 const struct tgsi_token *tokens = shader->tokens; 6684 struct svga_vertex_shader *vs = svga->curr.vs; 6685 struct svga_geometry_shader *gs = svga->curr.gs; 6686 6687 assert(unit == PIPE_SHADER_VERTEX || 6688 unit == PIPE_SHADER_GEOMETRY || 6689 unit == PIPE_SHADER_FRAGMENT); 6690 6691 /* These two flags cannot be used together */ 6692 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1); 6693 6694 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE); 6695 /* 6696 * Setup the code emitter 6697 */ 6698 emit = alloc_emitter(); 6699 if (!emit) 6700 goto done; 6701 6702 emit->unit = unit; 6703 emit->key = *key; 6704 6705 emit->vposition.need_prescale = (emit->key.vs.need_prescale || 6706 emit->key.gs.need_prescale); 6707 emit->vposition.tmp_index = INVALID_INDEX; 6708 emit->vposition.so_index = INVALID_INDEX; 6709 emit->vposition.out_index = INVALID_INDEX; 6710 6711 emit->fs.color_tmp_index = INVALID_INDEX; 6712 emit->fs.face_input_index = INVALID_INDEX; 6713 emit->fs.fragcoord_input_index = INVALID_INDEX; 6714 6715 emit->gs.prim_id_index = INVALID_INDEX; 6716 6717 emit->clip_dist_out_index = INVALID_INDEX; 6718 emit->clip_dist_tmp_index = INVALID_INDEX; 6719 emit->clip_dist_so_index = INVALID_INDEX; 6720 emit->clip_vertex_out_index = INVALID_INDEX; 6721 6722 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { 6723 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; 6724 } 6725 6726 if (unit == PIPE_SHADER_FRAGMENT) { 6727 if (key->fs.light_twoside) { 6728 tokens = transform_fs_twoside(tokens); 6729 } 6730 if (key->fs.pstipple) { 6731 const struct tgsi_token *new_tokens = 6732 transform_fs_pstipple(emit, tokens); 6733 if (tokens != shader->tokens) { 6734 /* free the two-sided shader tokens */ 6735 tgsi_free_tokens(tokens); 6736 } 6737 tokens = new_tokens; 6738 } 6739 if (key->fs.aa_point) { 6740 tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index); 6741 } 6742 } 6743 6744 if (SVGA_DEBUG & DEBUG_TGSI) { 6745 debug_printf("#####################################\n"); 6746 debug_printf("### TGSI Shader %u\n", shader->id); 6747 tgsi_dump(tokens, 0); 6748 } 6749 6750 /** 6751 * Rescan the header if the token string is different from the one 6752 * included in the shader; otherwise, the header info is already up-to-date 6753 */ 6754 if (tokens != shader->tokens) { 6755 tgsi_scan_shader(tokens, &emit->info); 6756 } else { 6757 emit->info = shader->info; 6758 } 6759 6760 emit->num_outputs = emit->info.num_outputs; 6761 6762 if (unit == PIPE_SHADER_FRAGMENT) { 6763 /* Compute FS input remapping to match the output from VS/GS */ 6764 if (gs) { 6765 svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage); 6766 } else { 6767 assert(vs); 6768 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6769 } 6770 } else if (unit == PIPE_SHADER_GEOMETRY) { 6771 assert(vs); 6772 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6773 } 6774 6775 determine_clipping_mode(emit); 6776 6777 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) { 6778 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) { 6779 /* if there is stream output declarations associated 6780 * with this shader or the shader writes to ClipDistance 6781 * then reserve extra registers for the non-adjusted vertex position 6782 * and the ClipDistance shadow copy 6783 */ 6784 emit->vposition.so_index = emit->num_outputs++; 6785 6786 if (emit->clip_mode == CLIP_DISTANCE) { 6787 emit->clip_dist_so_index = emit->num_outputs++; 6788 if (emit->info.num_written_clipdistance > 4) 6789 emit->num_outputs++; 6790 } 6791 } 6792 } 6793 6794 /* 6795 * Do actual shader translation. 6796 */ 6797 if (!emit_vgpu10_header(emit)) { 6798 debug_printf("svga: emit VGPU10 header failed\n"); 6799 goto cleanup; 6800 } 6801 6802 if (!emit_vgpu10_instructions(emit, tokens)) { 6803 debug_printf("svga: emit VGPU10 instructions failed\n"); 6804 goto cleanup; 6805 } 6806 6807 if (!emit_vgpu10_tail(emit)) { 6808 debug_printf("svga: emit VGPU10 tail failed\n"); 6809 goto cleanup; 6810 } 6811 6812 if (emit->register_overflow) { 6813 goto cleanup; 6814 } 6815 6816 /* 6817 * Create, initialize the 'variant' object. 6818 */ 6819 variant = svga_new_shader_variant(svga); 6820 if (!variant) 6821 goto cleanup; 6822 6823 variant->shader = shader; 6824 variant->nr_tokens = emit_get_num_tokens(emit); 6825 variant->tokens = (const unsigned *)emit->buf; 6826 emit->buf = NULL; /* buffer is no longer owed by emitter context */ 6827 memcpy(&variant->key, key, sizeof(*key)); 6828 variant->id = UTIL_BITMASK_INVALID_INDEX; 6829 6830 /* The extra constant starting offset starts with the number of 6831 * shader constants declared in the shader. 6832 */ 6833 variant->extra_const_start = emit->num_shader_consts[0]; 6834 if (key->gs.wide_point) { 6835 /** 6836 * The extra constant added in the transformed shader 6837 * for inverse viewport scale is to be supplied by the driver. 6838 * So the extra constant starting offset needs to be reduced by 1. 6839 */ 6840 assert(variant->extra_const_start > 0); 6841 variant->extra_const_start--; 6842 } 6843 6844 variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; 6845 6846 /* If there was exactly one write to a fragment shader output register 6847 * and it came from a constant buffer, we know all fragments will have 6848 * the same color (except for blending). 6849 */ 6850 variant->constant_color_output = 6851 emit->constant_color_output && emit->num_output_writes == 1; 6852 6853 /** keep track in the variant if flat interpolation is used 6854 * for any of the varyings. 6855 */ 6856 variant->uses_flat_interp = emit->uses_flat_interp; 6857 6858 if (tokens != shader->tokens) { 6859 tgsi_free_tokens(tokens); 6860 } 6861 6862cleanup: 6863 free_emitter(emit); 6864 6865done: 6866 SVGA_STATS_TIME_POP(svga_sws(svga)); 6867 return variant; 6868} 6869