svga_tgsi_vgpu10.c revision 2e1cfcc431471c68ba79c9323716bed7da79c909
1/********************************************************** 2 * Copyright 1998-2013 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26/** 27 * @file svga_tgsi_vgpu10.c 28 * 29 * TGSI -> VGPU10 shader translation. 30 * 31 * \author Mingcheng Chen 32 * \author Brian Paul 33 */ 34 35#include "pipe/p_compiler.h" 36#include "pipe/p_shader_tokens.h" 37#include "pipe/p_defines.h" 38#include "tgsi/tgsi_build.h" 39#include "tgsi/tgsi_dump.h" 40#include "tgsi/tgsi_info.h" 41#include "tgsi/tgsi_parse.h" 42#include "tgsi/tgsi_scan.h" 43#include "tgsi/tgsi_two_side.h" 44#include "tgsi/tgsi_aa_point.h" 45#include "tgsi/tgsi_util.h" 46#include "util/u_math.h" 47#include "util/u_memory.h" 48#include "util/u_bitmask.h" 49#include "util/u_debug.h" 50#include "util/u_pstipple.h" 51 52#include "svga_context.h" 53#include "svga_debug.h" 54#include "svga_link.h" 55#include "svga_shader.h" 56#include "svga_tgsi.h" 57 58#include "VGPU10ShaderTokens.h" 59 60 61#define INVALID_INDEX 99999 62#define MAX_INTERNAL_TEMPS 3 63#define MAX_SYSTEM_VALUES 4 64#define MAX_IMMEDIATE_COUNT \ 65 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) 66#define MAX_TEMP_ARRAYS 64 /* Enough? */ 67 68 69/** 70 * Clipping is complicated. There's four different cases which we 71 * handle during VS/GS shader translation: 72 */ 73enum clipping_mode 74{ 75 CLIP_NONE, /**< No clipping enabled */ 76 CLIP_LEGACY, /**< The shader has no clipping declarations or code but 77 * one or more user-defined clip planes are enabled. We 78 * generate extra code to emit clip distances. 79 */ 80 CLIP_DISTANCE, /**< The shader already declares clip distance output 81 * registers and has code to write to them. 82 */ 83 CLIP_VERTEX /**< The shader declares a clip vertex output register and 84 * has code that writes to the register. We convert the 85 * clipvertex position into one or more clip distances. 86 */ 87}; 88 89 90struct svga_shader_emitter_v10 91{ 92 /* The token output buffer */ 93 unsigned size; 94 char *buf; 95 char *ptr; 96 97 /* Information about the shader and state (does not change) */ 98 struct svga_compile_key key; 99 struct tgsi_shader_info info; 100 unsigned unit; 101 102 unsigned inst_start_token; 103 boolean discard_instruction; /**< throw away current instruction? */ 104 105 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; 106 unsigned num_immediates; /**< Number of immediates emitted */ 107 unsigned common_immediate_pos[8]; /**< literals for common immediates */ 108 unsigned num_common_immediates; 109 boolean immediates_emitted; 110 111 unsigned num_outputs; /**< include any extra outputs */ 112 /** The first extra output is reserved for 113 * non-adjusted vertex position for 114 * stream output purpose 115 */ 116 117 /* Temporary Registers */ 118 unsigned num_shader_temps; /**< num of temps used by original shader */ 119 unsigned internal_temp_count; /**< currently allocated internal temps */ 120 struct { 121 unsigned start, size; 122 } temp_arrays[MAX_TEMP_ARRAYS]; 123 unsigned num_temp_arrays; 124 125 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ 126 struct { 127 unsigned arrayId, index; 128 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ 129 130 /** Number of constants used by original shader for each constant buffer. 131 * The size should probably always match with that of svga_state.constbufs. 132 */ 133 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; 134 135 /* Samplers */ 136 unsigned num_samplers; 137 ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */ 138 ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */ 139 140 /* Address regs (really implemented with temps) */ 141 unsigned num_address_regs; 142 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; 143 144 /* Output register usage masks */ 145 ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; 146 147 /* To map TGSI system value index to VGPU shader input indexes */ 148 ubyte system_value_indexes[MAX_SYSTEM_VALUES]; 149 150 struct { 151 /* vertex position scale/translation */ 152 unsigned out_index; /**< the real position output reg */ 153 unsigned tmp_index; /**< the fake/temp position output reg */ 154 unsigned so_index; /**< the non-adjusted position output reg */ 155 unsigned prescale_scale_index, prescale_trans_index; 156 boolean need_prescale; 157 } vposition; 158 159 /* For vertex shaders only */ 160 struct { 161 /* viewport constant */ 162 unsigned viewport_index; 163 164 /* temp index of adjusted vertex attributes */ 165 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS]; 166 } vs; 167 168 /* For fragment shaders only */ 169 struct { 170 /* apha test */ 171 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */ 172 unsigned color_tmp_index; /**< fake/temp color output reg */ 173 unsigned alpha_ref_index; /**< immediate constant for alpha ref */ 174 175 /* front-face */ 176 unsigned face_input_index; /**< real fragment shader face reg (bool) */ 177 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ 178 179 unsigned pstipple_sampler_unit; 180 181 unsigned fragcoord_input_index; /**< real fragment position input reg */ 182 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ 183 } fs; 184 185 /* For geometry shaders only */ 186 struct { 187 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */ 188 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */ 189 unsigned input_size; /**< size of input arrays */ 190 unsigned prim_id_index; /**< primitive id register index */ 191 unsigned max_out_vertices; /**< maximum number of output vertices */ 192 } gs; 193 194 /* For vertex or geometry shaders */ 195 enum clipping_mode clip_mode; 196 unsigned clip_dist_out_index; /**< clip distance output register index */ 197 unsigned clip_dist_tmp_index; /**< clip distance temporary register */ 198 unsigned clip_dist_so_index; /**< clip distance shadow copy */ 199 200 /** Index of temporary holding the clipvertex coordinate */ 201 unsigned clip_vertex_out_index; /**< clip vertex output register index */ 202 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */ 203 204 /* user clip plane constant slot indexes */ 205 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; 206 207 unsigned num_output_writes; 208 boolean constant_color_output; 209 210 boolean uses_flat_interp; 211 212 /* For all shaders: const reg index for RECT coord scaling */ 213 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; 214 215 /* For all shaders: const reg index for texture buffer size */ 216 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; 217 218 /* VS/GS/FS Linkage info */ 219 struct shader_linkage linkage; 220 221 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ 222}; 223 224 225static boolean 226emit_post_helpers(struct svga_shader_emitter_v10 *emit); 227 228static boolean 229emit_vertex(struct svga_shader_emitter_v10 *emit, 230 const struct tgsi_full_instruction *inst); 231 232static char err_buf[128]; 233 234static boolean 235expand(struct svga_shader_emitter_v10 *emit) 236{ 237 char *new_buf; 238 unsigned newsize = emit->size * 2; 239 240 if (emit->buf != err_buf) 241 new_buf = REALLOC(emit->buf, emit->size, newsize); 242 else 243 new_buf = NULL; 244 245 if (!new_buf) { 246 emit->ptr = err_buf; 247 emit->buf = err_buf; 248 emit->size = sizeof(err_buf); 249 return FALSE; 250 } 251 252 emit->size = newsize; 253 emit->ptr = new_buf + (emit->ptr - emit->buf); 254 emit->buf = new_buf; 255 return TRUE; 256} 257 258/** 259 * Create and initialize a new svga_shader_emitter_v10 object. 260 */ 261static struct svga_shader_emitter_v10 * 262alloc_emitter(void) 263{ 264 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit)); 265 266 if (!emit) 267 return NULL; 268 269 /* to initialize the output buffer */ 270 emit->size = 512; 271 if (!expand(emit)) { 272 FREE(emit); 273 return NULL; 274 } 275 return emit; 276} 277 278/** 279 * Free an svga_shader_emitter_v10 object. 280 */ 281static void 282free_emitter(struct svga_shader_emitter_v10 *emit) 283{ 284 assert(emit); 285 FREE(emit->buf); /* will be NULL if translation succeeded */ 286 FREE(emit); 287} 288 289static inline boolean 290reserve(struct svga_shader_emitter_v10 *emit, 291 unsigned nr_dwords) 292{ 293 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) { 294 if (!expand(emit)) 295 return FALSE; 296 } 297 298 return TRUE; 299} 300 301static boolean 302emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword) 303{ 304 if (!reserve(emit, 1)) 305 return FALSE; 306 307 *(uint32 *)emit->ptr = dword; 308 emit->ptr += sizeof dword; 309 return TRUE; 310} 311 312static boolean 313emit_dwords(struct svga_shader_emitter_v10 *emit, 314 const uint32 *dwords, 315 unsigned nr) 316{ 317 if (!reserve(emit, nr)) 318 return FALSE; 319 320 memcpy(emit->ptr, dwords, nr * sizeof *dwords); 321 emit->ptr += nr * sizeof *dwords; 322 return TRUE; 323} 324 325/** Return the number of tokens in the emitter's buffer */ 326static unsigned 327emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit) 328{ 329 return (emit->ptr - emit->buf) / sizeof(unsigned); 330} 331 332 333/** 334 * Check for register overflow. If we overflow we'll set an 335 * error flag. This function can be called for register declarations 336 * or use as src/dst instruction operands. 337 * \param type register type. One of VGPU10_OPERAND_TYPE_x 338 or VGPU10_OPCODE_DCL_x 339 * \param index the register index 340 */ 341static void 342check_register_index(struct svga_shader_emitter_v10 *emit, 343 unsigned operandType, unsigned index) 344{ 345 bool overflow_before = emit->register_overflow; 346 347 switch (operandType) { 348 case VGPU10_OPERAND_TYPE_TEMP: 349 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP: 350 case VGPU10_OPCODE_DCL_TEMPS: 351 if (index >= VGPU10_MAX_TEMPS) { 352 emit->register_overflow = TRUE; 353 } 354 break; 355 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER: 356 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER: 357 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 358 emit->register_overflow = TRUE; 359 } 360 break; 361 case VGPU10_OPERAND_TYPE_INPUT: 362 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID: 363 case VGPU10_OPCODE_DCL_INPUT: 364 case VGPU10_OPCODE_DCL_INPUT_SGV: 365 case VGPU10_OPCODE_DCL_INPUT_SIV: 366 case VGPU10_OPCODE_DCL_INPUT_PS: 367 case VGPU10_OPCODE_DCL_INPUT_PS_SGV: 368 case VGPU10_OPCODE_DCL_INPUT_PS_SIV: 369 if ((emit->unit == PIPE_SHADER_VERTEX && 370 index >= VGPU10_MAX_VS_INPUTS) || 371 (emit->unit == PIPE_SHADER_GEOMETRY && 372 index >= VGPU10_MAX_GS_INPUTS) || 373 (emit->unit == PIPE_SHADER_FRAGMENT && 374 index >= VGPU10_MAX_FS_INPUTS)) { 375 emit->register_overflow = TRUE; 376 } 377 break; 378 case VGPU10_OPERAND_TYPE_OUTPUT: 379 case VGPU10_OPCODE_DCL_OUTPUT: 380 case VGPU10_OPCODE_DCL_OUTPUT_SGV: 381 case VGPU10_OPCODE_DCL_OUTPUT_SIV: 382 if ((emit->unit == PIPE_SHADER_VERTEX && 383 index >= VGPU10_MAX_VS_OUTPUTS) || 384 (emit->unit == PIPE_SHADER_GEOMETRY && 385 index >= VGPU10_MAX_GS_OUTPUTS) || 386 (emit->unit == PIPE_SHADER_FRAGMENT && 387 index >= VGPU10_MAX_FS_OUTPUTS)) { 388 emit->register_overflow = TRUE; 389 } 390 break; 391 case VGPU10_OPERAND_TYPE_SAMPLER: 392 case VGPU10_OPCODE_DCL_SAMPLER: 393 if (index >= VGPU10_MAX_SAMPLERS) { 394 emit->register_overflow = TRUE; 395 } 396 break; 397 case VGPU10_OPERAND_TYPE_RESOURCE: 398 case VGPU10_OPCODE_DCL_RESOURCE: 399 if (index >= VGPU10_MAX_RESOURCES) { 400 emit->register_overflow = TRUE; 401 } 402 break; 403 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: 404 if (index >= MAX_IMMEDIATE_COUNT) { 405 emit->register_overflow = TRUE; 406 } 407 break; 408 default: 409 assert(0); 410 ; /* nothing */ 411 } 412 413 if (emit->register_overflow && !overflow_before) { 414 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n", 415 operandType, index); 416 } 417} 418 419 420/** 421 * Examine misc state to determine the clipping mode. 422 */ 423static void 424determine_clipping_mode(struct svga_shader_emitter_v10 *emit) 425{ 426 if (emit->info.num_written_clipdistance > 0) { 427 emit->clip_mode = CLIP_DISTANCE; 428 } 429 else if (emit->info.writes_clipvertex) { 430 emit->clip_mode = CLIP_VERTEX; 431 } 432 else if (emit->key.clip_plane_enable) { 433 emit->clip_mode = CLIP_LEGACY; 434 } 435 else { 436 emit->clip_mode = CLIP_NONE; 437 } 438} 439 440 441/** 442 * For clip distance register declarations and clip distance register 443 * writes we need to mask the declaration usage or instruction writemask 444 * (respectively) against the set of the really-enabled clipping planes. 445 * 446 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables 447 * has a VS that writes to all 8 clip distance registers, but the plane enable 448 * flags are a subset of that. 449 * 450 * This function is used to apply the plane enable flags to the register 451 * declaration or instruction writemask. 452 * 453 * \param writemask the declaration usage mask or instruction writemask 454 * \param clip_reg_index which clip plane register is being declared/written. 455 * The legal values are 0 and 1 (two clip planes per 456 * register, for a total of 8 clip planes) 457 */ 458static unsigned 459apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit, 460 unsigned writemask, unsigned clip_reg_index) 461{ 462 unsigned shift; 463 464 assert(clip_reg_index < 2); 465 466 /* four clip planes per clip register: */ 467 shift = clip_reg_index * 4; 468 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf); 469 470 return writemask; 471} 472 473 474/** 475 * Translate gallium shader type into VGPU10 type. 476 */ 477static VGPU10_PROGRAM_TYPE 478translate_shader_type(unsigned type) 479{ 480 switch (type) { 481 case PIPE_SHADER_VERTEX: 482 return VGPU10_VERTEX_SHADER; 483 case PIPE_SHADER_GEOMETRY: 484 return VGPU10_GEOMETRY_SHADER; 485 case PIPE_SHADER_FRAGMENT: 486 return VGPU10_PIXEL_SHADER; 487 default: 488 assert(!"Unexpected shader type"); 489 return VGPU10_VERTEX_SHADER; 490 } 491} 492 493 494/** 495 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x 496 * Note: we only need to translate the opcodes for "simple" instructions, 497 * as seen below. All other opcodes are handled/translated specially. 498 */ 499static VGPU10_OPCODE_TYPE 500translate_opcode(unsigned opcode) 501{ 502 switch (opcode) { 503 case TGSI_OPCODE_MOV: 504 return VGPU10_OPCODE_MOV; 505 case TGSI_OPCODE_MUL: 506 return VGPU10_OPCODE_MUL; 507 case TGSI_OPCODE_ADD: 508 return VGPU10_OPCODE_ADD; 509 case TGSI_OPCODE_DP3: 510 return VGPU10_OPCODE_DP3; 511 case TGSI_OPCODE_DP4: 512 return VGPU10_OPCODE_DP4; 513 case TGSI_OPCODE_MIN: 514 return VGPU10_OPCODE_MIN; 515 case TGSI_OPCODE_MAX: 516 return VGPU10_OPCODE_MAX; 517 case TGSI_OPCODE_MAD: 518 return VGPU10_OPCODE_MAD; 519 case TGSI_OPCODE_SQRT: 520 return VGPU10_OPCODE_SQRT; 521 case TGSI_OPCODE_FRC: 522 return VGPU10_OPCODE_FRC; 523 case TGSI_OPCODE_FLR: 524 return VGPU10_OPCODE_ROUND_NI; 525 case TGSI_OPCODE_FSEQ: 526 return VGPU10_OPCODE_EQ; 527 case TGSI_OPCODE_FSGE: 528 return VGPU10_OPCODE_GE; 529 case TGSI_OPCODE_FSNE: 530 return VGPU10_OPCODE_NE; 531 case TGSI_OPCODE_DDX: 532 return VGPU10_OPCODE_DERIV_RTX; 533 case TGSI_OPCODE_DDY: 534 return VGPU10_OPCODE_DERIV_RTY; 535 case TGSI_OPCODE_RET: 536 return VGPU10_OPCODE_RET; 537 case TGSI_OPCODE_DIV: 538 return VGPU10_OPCODE_DIV; 539 case TGSI_OPCODE_IDIV: 540 return VGPU10_OPCODE_IDIV; 541 case TGSI_OPCODE_DP2: 542 return VGPU10_OPCODE_DP2; 543 case TGSI_OPCODE_BRK: 544 return VGPU10_OPCODE_BREAK; 545 case TGSI_OPCODE_IF: 546 return VGPU10_OPCODE_IF; 547 case TGSI_OPCODE_ELSE: 548 return VGPU10_OPCODE_ELSE; 549 case TGSI_OPCODE_ENDIF: 550 return VGPU10_OPCODE_ENDIF; 551 case TGSI_OPCODE_CEIL: 552 return VGPU10_OPCODE_ROUND_PI; 553 case TGSI_OPCODE_I2F: 554 return VGPU10_OPCODE_ITOF; 555 case TGSI_OPCODE_NOT: 556 return VGPU10_OPCODE_NOT; 557 case TGSI_OPCODE_TRUNC: 558 return VGPU10_OPCODE_ROUND_Z; 559 case TGSI_OPCODE_SHL: 560 return VGPU10_OPCODE_ISHL; 561 case TGSI_OPCODE_AND: 562 return VGPU10_OPCODE_AND; 563 case TGSI_OPCODE_OR: 564 return VGPU10_OPCODE_OR; 565 case TGSI_OPCODE_XOR: 566 return VGPU10_OPCODE_XOR; 567 case TGSI_OPCODE_CONT: 568 return VGPU10_OPCODE_CONTINUE; 569 case TGSI_OPCODE_EMIT: 570 return VGPU10_OPCODE_EMIT; 571 case TGSI_OPCODE_ENDPRIM: 572 return VGPU10_OPCODE_CUT; 573 case TGSI_OPCODE_BGNLOOP: 574 return VGPU10_OPCODE_LOOP; 575 case TGSI_OPCODE_ENDLOOP: 576 return VGPU10_OPCODE_ENDLOOP; 577 case TGSI_OPCODE_ENDSUB: 578 return VGPU10_OPCODE_RET; 579 case TGSI_OPCODE_NOP: 580 return VGPU10_OPCODE_NOP; 581 case TGSI_OPCODE_BREAKC: 582 return VGPU10_OPCODE_BREAKC; 583 case TGSI_OPCODE_END: 584 return VGPU10_OPCODE_RET; 585 case TGSI_OPCODE_F2I: 586 return VGPU10_OPCODE_FTOI; 587 case TGSI_OPCODE_IMAX: 588 return VGPU10_OPCODE_IMAX; 589 case TGSI_OPCODE_IMIN: 590 return VGPU10_OPCODE_IMIN; 591 case TGSI_OPCODE_UDIV: 592 case TGSI_OPCODE_UMOD: 593 case TGSI_OPCODE_MOD: 594 return VGPU10_OPCODE_UDIV; 595 case TGSI_OPCODE_IMUL_HI: 596 return VGPU10_OPCODE_IMUL; 597 case TGSI_OPCODE_INEG: 598 return VGPU10_OPCODE_INEG; 599 case TGSI_OPCODE_ISHR: 600 return VGPU10_OPCODE_ISHR; 601 case TGSI_OPCODE_ISGE: 602 return VGPU10_OPCODE_IGE; 603 case TGSI_OPCODE_ISLT: 604 return VGPU10_OPCODE_ILT; 605 case TGSI_OPCODE_F2U: 606 return VGPU10_OPCODE_FTOU; 607 case TGSI_OPCODE_UADD: 608 return VGPU10_OPCODE_IADD; 609 case TGSI_OPCODE_U2F: 610 return VGPU10_OPCODE_UTOF; 611 case TGSI_OPCODE_UCMP: 612 return VGPU10_OPCODE_MOVC; 613 case TGSI_OPCODE_UMAD: 614 return VGPU10_OPCODE_UMAD; 615 case TGSI_OPCODE_UMAX: 616 return VGPU10_OPCODE_UMAX; 617 case TGSI_OPCODE_UMIN: 618 return VGPU10_OPCODE_UMIN; 619 case TGSI_OPCODE_UMUL: 620 case TGSI_OPCODE_UMUL_HI: 621 return VGPU10_OPCODE_UMUL; 622 case TGSI_OPCODE_USEQ: 623 return VGPU10_OPCODE_IEQ; 624 case TGSI_OPCODE_USGE: 625 return VGPU10_OPCODE_UGE; 626 case TGSI_OPCODE_USHR: 627 return VGPU10_OPCODE_USHR; 628 case TGSI_OPCODE_USLT: 629 return VGPU10_OPCODE_ULT; 630 case TGSI_OPCODE_USNE: 631 return VGPU10_OPCODE_INE; 632 case TGSI_OPCODE_SWITCH: 633 return VGPU10_OPCODE_SWITCH; 634 case TGSI_OPCODE_CASE: 635 return VGPU10_OPCODE_CASE; 636 case TGSI_OPCODE_DEFAULT: 637 return VGPU10_OPCODE_DEFAULT; 638 case TGSI_OPCODE_ENDSWITCH: 639 return VGPU10_OPCODE_ENDSWITCH; 640 case TGSI_OPCODE_FSLT: 641 return VGPU10_OPCODE_LT; 642 case TGSI_OPCODE_ROUND: 643 return VGPU10_OPCODE_ROUND_NE; 644 default: 645 assert(!"Unexpected TGSI opcode in translate_opcode()"); 646 return VGPU10_OPCODE_NOP; 647 } 648} 649 650 651/** 652 * Translate a TGSI register file type into a VGPU10 operand type. 653 * \param array is the TGSI_FILE_TEMPORARY register an array? 654 */ 655static VGPU10_OPERAND_TYPE 656translate_register_file(enum tgsi_file_type file, boolean array) 657{ 658 switch (file) { 659 case TGSI_FILE_CONSTANT: 660 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 661 case TGSI_FILE_INPUT: 662 return VGPU10_OPERAND_TYPE_INPUT; 663 case TGSI_FILE_OUTPUT: 664 return VGPU10_OPERAND_TYPE_OUTPUT; 665 case TGSI_FILE_TEMPORARY: 666 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP 667 : VGPU10_OPERAND_TYPE_TEMP; 668 case TGSI_FILE_IMMEDIATE: 669 /* all immediates are 32-bit values at this time so 670 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time. 671 */ 672 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER; 673 case TGSI_FILE_SAMPLER: 674 return VGPU10_OPERAND_TYPE_SAMPLER; 675 case TGSI_FILE_SYSTEM_VALUE: 676 return VGPU10_OPERAND_TYPE_INPUT; 677 678 /* XXX TODO more cases to finish */ 679 680 default: 681 assert(!"Bad tgsi register file!"); 682 return VGPU10_OPERAND_TYPE_NULL; 683 } 684} 685 686 687/** 688 * Emit a null dst register 689 */ 690static void 691emit_null_dst_register(struct svga_shader_emitter_v10 *emit) 692{ 693 VGPU10OperandToken0 operand; 694 695 operand.value = 0; 696 operand.operandType = VGPU10_OPERAND_TYPE_NULL; 697 operand.numComponents = VGPU10_OPERAND_0_COMPONENT; 698 699 emit_dword(emit, operand.value); 700} 701 702 703/** 704 * If the given register is a temporary, return the array ID. 705 * Else return zero. 706 */ 707static unsigned 708get_temp_array_id(const struct svga_shader_emitter_v10 *emit, 709 unsigned file, unsigned index) 710{ 711 if (file == TGSI_FILE_TEMPORARY) { 712 return emit->temp_map[index].arrayId; 713 } 714 else { 715 return 0; 716 } 717} 718 719 720/** 721 * If the given register is a temporary, convert the index from a TGSI 722 * TEMPORARY index to a VGPU10 temp index. 723 */ 724static unsigned 725remap_temp_index(const struct svga_shader_emitter_v10 *emit, 726 unsigned file, unsigned index) 727{ 728 if (file == TGSI_FILE_TEMPORARY) { 729 return emit->temp_map[index].index; 730 } 731 else { 732 return index; 733 } 734} 735 736 737/** 738 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc). 739 * Note: the operandType field must already be initialized. 740 */ 741static VGPU10OperandToken0 742setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, 743 VGPU10OperandToken0 operand0, 744 unsigned file, 745 boolean indirect, boolean index2D, 746 unsigned tempArrayID) 747{ 748 unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_0D; 749 750 /* 751 * Compute index dimensions 752 */ 753 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 || 754 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 755 /* there's no swizzle for in-line immediates */ 756 indexDim = VGPU10_OPERAND_INDEX_0D; 757 assert(operand0.selectionMode == 0); 758 } 759 else { 760 if (index2D || 761 tempArrayID > 0 || 762 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 763 indexDim = VGPU10_OPERAND_INDEX_2D; 764 } 765 else { 766 indexDim = VGPU10_OPERAND_INDEX_1D; 767 } 768 } 769 770 /* 771 * Compute index representations (immediate, relative, etc). 772 */ 773 if (tempArrayID > 0) { 774 assert(file == TGSI_FILE_TEMPORARY); 775 /* First index is the array ID, second index is the array element */ 776 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 777 if (indirect) { 778 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 779 } 780 else { 781 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 782 } 783 } 784 else if (indirect) { 785 if (file == TGSI_FILE_CONSTANT) { 786 /* index[0] indicates which constant buffer while index[1] indicates 787 * the position in the constant buffer. 788 */ 789 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 790 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 791 } 792 else { 793 /* All other register files are 1-dimensional */ 794 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 795 } 796 } 797 else { 798 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 799 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 800 } 801 802 operand0.indexDimension = indexDim; 803 operand0.index0Representation = index0Rep; 804 operand0.index1Representation = index1Rep; 805 806 return operand0; 807} 808 809 810/** 811 * Emit the operand for expressing an address register for indirect indexing. 812 * Note that the address register is really just a temp register. 813 * \param addr_reg_index which address register to use 814 */ 815static void 816emit_indirect_register(struct svga_shader_emitter_v10 *emit, 817 unsigned addr_reg_index) 818{ 819 unsigned tmp_reg_index; 820 VGPU10OperandToken0 operand0; 821 822 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS); 823 824 tmp_reg_index = emit->address_reg_index[addr_reg_index]; 825 826 /* operand0 is a simple temporary register, selecting one component */ 827 operand0.value = 0; 828 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP; 829 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 830 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 831 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 832 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 833 operand0.swizzleX = 0; 834 operand0.swizzleY = 1; 835 operand0.swizzleZ = 2; 836 operand0.swizzleW = 3; 837 838 emit_dword(emit, operand0.value); 839 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index)); 840} 841 842 843/** 844 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens. 845 * \param emit the emitter context 846 * \param reg the TGSI dst register to translate 847 */ 848static void 849emit_dst_register(struct svga_shader_emitter_v10 *emit, 850 const struct tgsi_full_dst_register *reg) 851{ 852 unsigned file = reg->Register.File; 853 unsigned index = reg->Register.Index; 854 const unsigned sem_name = emit->info.output_semantic_name[index]; 855 const unsigned sem_index = emit->info.output_semantic_index[index]; 856 unsigned writemask = reg->Register.WriteMask; 857 const unsigned indirect = reg->Register.Indirect; 858 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 859 const unsigned index2d = reg->Register.Dimension; 860 VGPU10OperandToken0 operand0; 861 862 if (file == TGSI_FILE_OUTPUT) { 863 if (emit->unit == PIPE_SHADER_VERTEX || 864 emit->unit == PIPE_SHADER_GEOMETRY) { 865 if (index == emit->vposition.out_index && 866 emit->vposition.tmp_index != INVALID_INDEX) { 867 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the 868 * vertex position result in a temporary so that we can modify 869 * it in the post_helper() code. 870 */ 871 file = TGSI_FILE_TEMPORARY; 872 index = emit->vposition.tmp_index; 873 } 874 else if (sem_name == TGSI_SEMANTIC_CLIPDIST && 875 emit->clip_dist_tmp_index != INVALID_INDEX) { 876 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. 877 * We store the clip distance in a temporary first, then 878 * we'll copy it to the shadow copy and to CLIPDIST with the 879 * enabled planes mask in emit_clip_distance_instructions(). 880 */ 881 file = TGSI_FILE_TEMPORARY; 882 index = emit->clip_dist_tmp_index + sem_index; 883 } 884 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && 885 emit->clip_vertex_tmp_index != INVALID_INDEX) { 886 /* replace the CLIPVERTEX output register with a temporary */ 887 assert(emit->clip_mode == CLIP_VERTEX); 888 assert(sem_index == 0); 889 file = TGSI_FILE_TEMPORARY; 890 index = emit->clip_vertex_tmp_index; 891 } 892 } 893 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 894 if (sem_name == TGSI_SEMANTIC_POSITION) { 895 /* Fragment depth output register */ 896 operand0.value = 0; 897 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 898 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 899 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 900 emit_dword(emit, operand0.value); 901 return; 902 } 903 else if (index == emit->fs.color_out_index[0] && 904 emit->fs.color_tmp_index != INVALID_INDEX) { 905 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the 906 * fragment color result in a temporary so that we can read it 907 * it in the post_helper() code. 908 */ 909 file = TGSI_FILE_TEMPORARY; 910 index = emit->fs.color_tmp_index; 911 } 912 else { 913 /* Typically, for fragment shaders, the output register index 914 * matches the color semantic index. But not when we write to 915 * the fragment depth register. In that case, OUT[0] will be 916 * fragdepth and OUT[1] will be the 0th color output. We need 917 * to use the semantic index for color outputs. 918 */ 919 assert(sem_name == TGSI_SEMANTIC_COLOR); 920 index = emit->info.output_semantic_index[index]; 921 922 emit->num_output_writes++; 923 } 924 } 925 } 926 927 /* init operand tokens to all zero */ 928 operand0.value = 0; 929 930 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 931 932 /* the operand has a writemask */ 933 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 934 935 /* Which of the four dest components to write to. Note that we can use a 936 * simple assignment here since TGSI writemasks match VGPU10 writemasks. 937 */ 938 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X); 939 operand0.mask = writemask; 940 941 /* translate TGSI register file type to VGPU10 operand type */ 942 operand0.operandType = translate_register_file(file, tempArrayId > 0); 943 944 check_register_index(emit, operand0.operandType, index); 945 946 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 947 index2d, tempArrayId); 948 949 /* Emit tokens */ 950 emit_dword(emit, operand0.value); 951 if (tempArrayId > 0) { 952 emit_dword(emit, tempArrayId); 953 } 954 955 emit_dword(emit, remap_temp_index(emit, file, index)); 956 957 if (indirect) { 958 emit_indirect_register(emit, reg->Indirect.Index); 959 } 960} 961 962 963/** 964 * Translate a src register of a TGSI instruction and emit VGPU10 tokens. 965 */ 966static void 967emit_src_register(struct svga_shader_emitter_v10 *emit, 968 const struct tgsi_full_src_register *reg) 969{ 970 unsigned file = reg->Register.File; 971 unsigned index = reg->Register.Index; 972 const unsigned indirect = reg->Register.Indirect; 973 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 974 const unsigned index2d = reg->Register.Dimension; 975 const unsigned swizzleX = reg->Register.SwizzleX; 976 const unsigned swizzleY = reg->Register.SwizzleY; 977 const unsigned swizzleZ = reg->Register.SwizzleZ; 978 const unsigned swizzleW = reg->Register.SwizzleW; 979 const unsigned absolute = reg->Register.Absolute; 980 const unsigned negate = reg->Register.Negate; 981 bool is_prim_id = FALSE; 982 983 VGPU10OperandToken0 operand0; 984 VGPU10OperandToken1 operand1; 985 986 if (emit->unit == PIPE_SHADER_FRAGMENT && 987 file == TGSI_FILE_INPUT) { 988 if (index == emit->fs.face_input_index) { 989 /* Replace INPUT[FACE] with TEMP[FACE] */ 990 file = TGSI_FILE_TEMPORARY; 991 index = emit->fs.face_tmp_index; 992 } 993 else if (index == emit->fs.fragcoord_input_index) { 994 /* Replace INPUT[POSITION] with TEMP[POSITION] */ 995 file = TGSI_FILE_TEMPORARY; 996 index = emit->fs.fragcoord_tmp_index; 997 } 998 else { 999 /* We remap fragment shader inputs to that FS input indexes 1000 * match up with VS/GS output indexes. 1001 */ 1002 index = emit->linkage.input_map[index]; 1003 } 1004 } 1005 else if (emit->unit == PIPE_SHADER_GEOMETRY && 1006 file == TGSI_FILE_INPUT) { 1007 is_prim_id = (index == emit->gs.prim_id_index); 1008 index = emit->linkage.input_map[index]; 1009 } 1010 else if (emit->unit == PIPE_SHADER_VERTEX) { 1011 if (file == TGSI_FILE_INPUT) { 1012 /* if input is adjusted... */ 1013 if ((emit->key.vs.adjust_attrib_w_1 | 1014 emit->key.vs.adjust_attrib_itof | 1015 emit->key.vs.adjust_attrib_utof | 1016 emit->key.vs.attrib_is_bgra | 1017 emit->key.vs.attrib_puint_to_snorm | 1018 emit->key.vs.attrib_puint_to_uscaled | 1019 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) { 1020 file = TGSI_FILE_TEMPORARY; 1021 index = emit->vs.adjusted_input[index]; 1022 } 1023 } 1024 else if (file == TGSI_FILE_SYSTEM_VALUE) { 1025 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1026 index = emit->system_value_indexes[index]; 1027 } 1028 } 1029 1030 operand0.value = operand1.value = 0; 1031 1032 if (is_prim_id) { 1033 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 1034 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 1035 } 1036 else { 1037 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1038 operand0.operandType = translate_register_file(file, tempArrayId > 0); 1039 } 1040 1041 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 1042 index2d, tempArrayId); 1043 1044 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 && 1045 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 1046 /* there's no swizzle for in-line immediates */ 1047 if (swizzleX == swizzleY && 1048 swizzleX == swizzleZ && 1049 swizzleX == swizzleW) { 1050 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1051 } 1052 else { 1053 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1054 } 1055 1056 operand0.swizzleX = swizzleX; 1057 operand0.swizzleY = swizzleY; 1058 operand0.swizzleZ = swizzleZ; 1059 operand0.swizzleW = swizzleW; 1060 1061 if (absolute || negate) { 1062 operand0.extended = 1; 1063 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER; 1064 if (absolute && !negate) 1065 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS; 1066 if (!absolute && negate) 1067 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG; 1068 if (absolute && negate) 1069 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG; 1070 } 1071 } 1072 1073 /* Emit the operand tokens */ 1074 emit_dword(emit, operand0.value); 1075 if (operand0.extended) 1076 emit_dword(emit, operand1.value); 1077 1078 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) { 1079 /* Emit the four float/int in-line immediate values */ 1080 unsigned *c; 1081 assert(index < ARRAY_SIZE(emit->immediates)); 1082 assert(file == TGSI_FILE_IMMEDIATE); 1083 assert(swizzleX < 4); 1084 assert(swizzleY < 4); 1085 assert(swizzleZ < 4); 1086 assert(swizzleW < 4); 1087 c = (unsigned *) emit->immediates[index]; 1088 emit_dword(emit, c[swizzleX]); 1089 emit_dword(emit, c[swizzleY]); 1090 emit_dword(emit, c[swizzleZ]); 1091 emit_dword(emit, c[swizzleW]); 1092 } 1093 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) { 1094 /* Emit the register index(es) */ 1095 if (index2d || 1096 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 1097 emit_dword(emit, reg->Dimension.Index); 1098 } 1099 1100 if (tempArrayId > 0) { 1101 emit_dword(emit, tempArrayId); 1102 } 1103 1104 emit_dword(emit, remap_temp_index(emit, file, index)); 1105 1106 if (indirect) { 1107 emit_indirect_register(emit, reg->Indirect.Index); 1108 } 1109 } 1110} 1111 1112 1113/** 1114 * Emit a resource operand (for use with a SAMPLE instruction). 1115 */ 1116static void 1117emit_resource_register(struct svga_shader_emitter_v10 *emit, 1118 unsigned resource_number) 1119{ 1120 VGPU10OperandToken0 operand0; 1121 1122 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number); 1123 1124 /* init */ 1125 operand0.value = 0; 1126 1127 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 1128 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1129 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1130 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1131 operand0.swizzleX = VGPU10_COMPONENT_X; 1132 operand0.swizzleY = VGPU10_COMPONENT_Y; 1133 operand0.swizzleZ = VGPU10_COMPONENT_Z; 1134 operand0.swizzleW = VGPU10_COMPONENT_W; 1135 1136 emit_dword(emit, operand0.value); 1137 emit_dword(emit, resource_number); 1138} 1139 1140 1141/** 1142 * Emit a sampler operand (for use with a SAMPLE instruction). 1143 */ 1144static void 1145emit_sampler_register(struct svga_shader_emitter_v10 *emit, 1146 unsigned sampler_number) 1147{ 1148 VGPU10OperandToken0 operand0; 1149 1150 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); 1151 1152 /* init */ 1153 operand0.value = 0; 1154 1155 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 1156 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1157 1158 emit_dword(emit, operand0.value); 1159 emit_dword(emit, sampler_number); 1160} 1161 1162 1163/** 1164 * Emit an operand which reads the IS_FRONT_FACING register. 1165 */ 1166static void 1167emit_face_register(struct svga_shader_emitter_v10 *emit) 1168{ 1169 VGPU10OperandToken0 operand0; 1170 unsigned index = emit->linkage.input_map[emit->fs.face_input_index]; 1171 1172 /* init */ 1173 operand0.value = 0; 1174 1175 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT; 1176 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1177 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1178 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1179 1180 operand0.swizzleX = VGPU10_COMPONENT_X; 1181 operand0.swizzleY = VGPU10_COMPONENT_X; 1182 operand0.swizzleZ = VGPU10_COMPONENT_X; 1183 operand0.swizzleW = VGPU10_COMPONENT_X; 1184 1185 emit_dword(emit, operand0.value); 1186 emit_dword(emit, index); 1187} 1188 1189 1190/** 1191 * Emit the token for a VGPU10 opcode. 1192 * \param saturate clamp result to [0,1]? 1193 */ 1194static void 1195emit_opcode(struct svga_shader_emitter_v10 *emit, 1196 unsigned vgpu10_opcode, boolean saturate) 1197{ 1198 VGPU10OpcodeToken0 token0; 1199 1200 token0.value = 0; /* init all fields to zero */ 1201 token0.opcodeType = vgpu10_opcode; 1202 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1203 token0.saturate = saturate; 1204 1205 emit_dword(emit, token0.value); 1206} 1207 1208 1209/** 1210 * Emit the token for a VGPU10 resinfo instruction. 1211 * \param modifier return type modifier, _uint or _rcpFloat. 1212 * TODO: We may want to remove this parameter if it will 1213 * only ever be used as _uint. 1214 */ 1215static void 1216emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit, 1217 VGPU10_RESINFO_RETURN_TYPE modifier) 1218{ 1219 VGPU10OpcodeToken0 token0; 1220 1221 token0.value = 0; /* init all fields to zero */ 1222 token0.opcodeType = VGPU10_OPCODE_RESINFO; 1223 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1224 token0.resinfoReturnType = modifier; 1225 1226 emit_dword(emit, token0.value); 1227} 1228 1229 1230/** 1231 * Emit opcode tokens for a texture sample instruction. Texture instructions 1232 * can be rather complicated (texel offsets, etc) so we have this specialized 1233 * function. 1234 */ 1235static void 1236emit_sample_opcode(struct svga_shader_emitter_v10 *emit, 1237 unsigned vgpu10_opcode, boolean saturate, 1238 const int offsets[3]) 1239{ 1240 VGPU10OpcodeToken0 token0; 1241 VGPU10OpcodeToken1 token1; 1242 1243 token0.value = 0; /* init all fields to zero */ 1244 token0.opcodeType = vgpu10_opcode; 1245 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1246 token0.saturate = saturate; 1247 1248 if (offsets[0] || offsets[1] || offsets[2]) { 1249 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1250 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1251 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1252 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1253 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1254 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1255 1256 token0.extended = 1; 1257 token1.value = 0; 1258 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS; 1259 token1.offsetU = offsets[0]; 1260 token1.offsetV = offsets[1]; 1261 token1.offsetW = offsets[2]; 1262 } 1263 1264 emit_dword(emit, token0.value); 1265 if (token0.extended) { 1266 emit_dword(emit, token1.value); 1267 } 1268} 1269 1270 1271/** 1272 * Emit a DISCARD opcode token. 1273 * If nonzero is set, we'll discard the fragment if the X component is not 0. 1274 * Otherwise, we'll discard the fragment if the X component is 0. 1275 */ 1276static void 1277emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero) 1278{ 1279 VGPU10OpcodeToken0 opcode0; 1280 1281 opcode0.value = 0; 1282 opcode0.opcodeType = VGPU10_OPCODE_DISCARD; 1283 if (nonzero) 1284 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 1285 1286 emit_dword(emit, opcode0.value); 1287} 1288 1289 1290/** 1291 * We need to call this before we begin emitting a VGPU10 instruction. 1292 */ 1293static void 1294begin_emit_instruction(struct svga_shader_emitter_v10 *emit) 1295{ 1296 assert(emit->inst_start_token == 0); 1297 /* Save location of the instruction's VGPU10OpcodeToken0 token. 1298 * Note, we can't save a pointer because it would become invalid if 1299 * we have to realloc the output buffer. 1300 */ 1301 emit->inst_start_token = emit_get_num_tokens(emit); 1302} 1303 1304 1305/** 1306 * We need to call this after we emit the last token of a VGPU10 instruction. 1307 * This function patches in the opcode token's instructionLength field. 1308 */ 1309static void 1310end_emit_instruction(struct svga_shader_emitter_v10 *emit) 1311{ 1312 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; 1313 unsigned inst_length; 1314 1315 assert(emit->inst_start_token > 0); 1316 1317 if (emit->discard_instruction) { 1318 /* Back up the emit->ptr to where this instruction started so 1319 * that we discard the current instruction. 1320 */ 1321 emit->ptr = (char *) (tokens + emit->inst_start_token); 1322 } 1323 else { 1324 /* Compute instruction length and patch that into the start of 1325 * the instruction. 1326 */ 1327 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token; 1328 1329 assert(inst_length > 0); 1330 1331 tokens[emit->inst_start_token].instructionLength = inst_length; 1332 } 1333 1334 emit->inst_start_token = 0; /* reset to zero for error checking */ 1335 emit->discard_instruction = FALSE; 1336} 1337 1338 1339/** 1340 * Return index for a free temporary register. 1341 */ 1342static unsigned 1343get_temp_index(struct svga_shader_emitter_v10 *emit) 1344{ 1345 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS); 1346 return emit->num_shader_temps + emit->internal_temp_count++; 1347} 1348 1349 1350/** 1351 * Release the temporaries which were generated by get_temp_index(). 1352 */ 1353static void 1354free_temp_indexes(struct svga_shader_emitter_v10 *emit) 1355{ 1356 emit->internal_temp_count = 0; 1357} 1358 1359 1360/** 1361 * Create a tgsi_full_src_register. 1362 */ 1363static struct tgsi_full_src_register 1364make_src_reg(unsigned file, unsigned index) 1365{ 1366 struct tgsi_full_src_register reg; 1367 1368 memset(®, 0, sizeof(reg)); 1369 reg.Register.File = file; 1370 reg.Register.Index = index; 1371 reg.Register.SwizzleX = TGSI_SWIZZLE_X; 1372 reg.Register.SwizzleY = TGSI_SWIZZLE_Y; 1373 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1374 reg.Register.SwizzleW = TGSI_SWIZZLE_W; 1375 return reg; 1376} 1377 1378 1379/** 1380 * Create a tgsi_full_src_register for a temporary. 1381 */ 1382static struct tgsi_full_src_register 1383make_src_temp_reg(unsigned index) 1384{ 1385 return make_src_reg(TGSI_FILE_TEMPORARY, index); 1386} 1387 1388 1389/** 1390 * Create a tgsi_full_src_register for a constant. 1391 */ 1392static struct tgsi_full_src_register 1393make_src_const_reg(unsigned index) 1394{ 1395 return make_src_reg(TGSI_FILE_CONSTANT, index); 1396} 1397 1398 1399/** 1400 * Create a tgsi_full_src_register for an immediate constant. 1401 */ 1402static struct tgsi_full_src_register 1403make_src_immediate_reg(unsigned index) 1404{ 1405 return make_src_reg(TGSI_FILE_IMMEDIATE, index); 1406} 1407 1408 1409/** 1410 * Create a tgsi_full_dst_register. 1411 */ 1412static struct tgsi_full_dst_register 1413make_dst_reg(unsigned file, unsigned index) 1414{ 1415 struct tgsi_full_dst_register reg; 1416 1417 memset(®, 0, sizeof(reg)); 1418 reg.Register.File = file; 1419 reg.Register.Index = index; 1420 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW; 1421 return reg; 1422} 1423 1424 1425/** 1426 * Create a tgsi_full_dst_register for a temporary. 1427 */ 1428static struct tgsi_full_dst_register 1429make_dst_temp_reg(unsigned index) 1430{ 1431 return make_dst_reg(TGSI_FILE_TEMPORARY, index); 1432} 1433 1434 1435/** 1436 * Create a tgsi_full_dst_register for an output. 1437 */ 1438static struct tgsi_full_dst_register 1439make_dst_output_reg(unsigned index) 1440{ 1441 return make_dst_reg(TGSI_FILE_OUTPUT, index); 1442} 1443 1444 1445/** 1446 * Create negated tgsi_full_src_register. 1447 */ 1448static struct tgsi_full_src_register 1449negate_src(const struct tgsi_full_src_register *reg) 1450{ 1451 struct tgsi_full_src_register neg = *reg; 1452 neg.Register.Negate = !reg->Register.Negate; 1453 return neg; 1454} 1455 1456/** 1457 * Create absolute value of a tgsi_full_src_register. 1458 */ 1459static struct tgsi_full_src_register 1460absolute_src(const struct tgsi_full_src_register *reg) 1461{ 1462 struct tgsi_full_src_register absolute = *reg; 1463 absolute.Register.Absolute = 1; 1464 return absolute; 1465} 1466 1467 1468/** Return the named swizzle term from the src register */ 1469static inline unsigned 1470get_swizzle(const struct tgsi_full_src_register *reg, unsigned term) 1471{ 1472 switch (term) { 1473 case TGSI_SWIZZLE_X: 1474 return reg->Register.SwizzleX; 1475 case TGSI_SWIZZLE_Y: 1476 return reg->Register.SwizzleY; 1477 case TGSI_SWIZZLE_Z: 1478 return reg->Register.SwizzleZ; 1479 case TGSI_SWIZZLE_W: 1480 return reg->Register.SwizzleW; 1481 default: 1482 assert(!"Bad swizzle"); 1483 return TGSI_SWIZZLE_X; 1484 } 1485} 1486 1487 1488/** 1489 * Create swizzled tgsi_full_src_register. 1490 */ 1491static struct tgsi_full_src_register 1492swizzle_src(const struct tgsi_full_src_register *reg, 1493 unsigned swizzleX, unsigned swizzleY, 1494 unsigned swizzleZ, unsigned swizzleW) 1495{ 1496 struct tgsi_full_src_register swizzled = *reg; 1497 /* Note: we swizzle the current swizzle */ 1498 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX); 1499 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY); 1500 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ); 1501 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW); 1502 return swizzled; 1503} 1504 1505 1506/** 1507 * Create swizzled tgsi_full_src_register where all the swizzle 1508 * terms are the same. 1509 */ 1510static struct tgsi_full_src_register 1511scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle) 1512{ 1513 struct tgsi_full_src_register swizzled = *reg; 1514 /* Note: we swizzle the current swizzle */ 1515 swizzled.Register.SwizzleX = 1516 swizzled.Register.SwizzleY = 1517 swizzled.Register.SwizzleZ = 1518 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle); 1519 return swizzled; 1520} 1521 1522 1523/** 1524 * Create new tgsi_full_dst_register with writemask. 1525 * \param mask bitmask of TGSI_WRITEMASK_[XYZW] 1526 */ 1527static struct tgsi_full_dst_register 1528writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask) 1529{ 1530 struct tgsi_full_dst_register masked = *reg; 1531 masked.Register.WriteMask = mask; 1532 return masked; 1533} 1534 1535 1536/** 1537 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW. 1538 */ 1539static boolean 1540same_swizzle_terms(const struct tgsi_full_src_register *reg) 1541{ 1542 return (reg->Register.SwizzleX == reg->Register.SwizzleY && 1543 reg->Register.SwizzleY == reg->Register.SwizzleZ && 1544 reg->Register.SwizzleZ == reg->Register.SwizzleW); 1545} 1546 1547 1548/** 1549 * Search the vector for the value 'x' and return its position. 1550 */ 1551static int 1552find_imm_in_vec4(const union tgsi_immediate_data vec[4], 1553 union tgsi_immediate_data x) 1554{ 1555 unsigned i; 1556 for (i = 0; i < 4; i++) { 1557 if (vec[i].Int == x.Int) 1558 return i; 1559 } 1560 return -1; 1561} 1562 1563 1564/** 1565 * Helper used by make_immediate_reg(), make_immediate_reg_4(). 1566 */ 1567static int 1568find_immediate(struct svga_shader_emitter_v10 *emit, 1569 union tgsi_immediate_data x, unsigned startIndex) 1570{ 1571 const unsigned endIndex = emit->num_immediates; 1572 unsigned i; 1573 1574 assert(emit->immediates_emitted); 1575 1576 /* Search immediates for x, y, z, w */ 1577 for (i = startIndex; i < endIndex; i++) { 1578 if (x.Int == emit->immediates[i][0].Int || 1579 x.Int == emit->immediates[i][1].Int || 1580 x.Int == emit->immediates[i][2].Int || 1581 x.Int == emit->immediates[i][3].Int) { 1582 return i; 1583 } 1584 } 1585 /* Should never try to use an immediate value that wasn't pre-declared */ 1586 assert(!"find_immediate() failed!"); 1587 return -1; 1588} 1589 1590 1591/** 1592 * Return a tgsi_full_src_register for an immediate/literal 1593 * union tgsi_immediate_data[4] value. 1594 * Note: the values must have been previously declared/allocated in 1595 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same 1596 * vec4 immediate. 1597 */ 1598static struct tgsi_full_src_register 1599make_immediate_reg_4(struct svga_shader_emitter_v10 *emit, 1600 const union tgsi_immediate_data imm[4]) 1601{ 1602 struct tgsi_full_src_register reg; 1603 unsigned i; 1604 1605 for (i = 0; i < emit->num_common_immediates; i++) { 1606 /* search for first component value */ 1607 int immpos = find_immediate(emit, imm[0], i); 1608 int x, y, z, w; 1609 1610 assert(immpos >= 0); 1611 1612 /* find remaining components within the immediate vector */ 1613 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]); 1614 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]); 1615 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]); 1616 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]); 1617 1618 if (x >=0 && y >= 0 && z >= 0 && w >= 0) { 1619 /* found them all */ 1620 memset(®, 0, sizeof(reg)); 1621 reg.Register.File = TGSI_FILE_IMMEDIATE; 1622 reg.Register.Index = immpos; 1623 reg.Register.SwizzleX = x; 1624 reg.Register.SwizzleY = y; 1625 reg.Register.SwizzleZ = z; 1626 reg.Register.SwizzleW = w; 1627 return reg; 1628 } 1629 /* else, keep searching */ 1630 } 1631 1632 assert(!"Failed to find immediate register!"); 1633 1634 /* Just return IMM[0].xxxx */ 1635 memset(®, 0, sizeof(reg)); 1636 reg.Register.File = TGSI_FILE_IMMEDIATE; 1637 return reg; 1638} 1639 1640 1641/** 1642 * Return a tgsi_full_src_register for an immediate/literal 1643 * union tgsi_immediate_data value of the form {value, value, value, value}. 1644 * \sa make_immediate_reg_4() regarding allowed values. 1645 */ 1646static struct tgsi_full_src_register 1647make_immediate_reg(struct svga_shader_emitter_v10 *emit, 1648 union tgsi_immediate_data value) 1649{ 1650 struct tgsi_full_src_register reg; 1651 int immpos = find_immediate(emit, value, 0); 1652 1653 assert(immpos >= 0); 1654 1655 memset(®, 0, sizeof(reg)); 1656 reg.Register.File = TGSI_FILE_IMMEDIATE; 1657 reg.Register.Index = immpos; 1658 reg.Register.SwizzleX = 1659 reg.Register.SwizzleY = 1660 reg.Register.SwizzleZ = 1661 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value); 1662 1663 return reg; 1664} 1665 1666 1667/** 1668 * Return a tgsi_full_src_register for an immediate/literal float[4] value. 1669 * \sa make_immediate_reg_4() regarding allowed values. 1670 */ 1671static struct tgsi_full_src_register 1672make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit, 1673 float x, float y, float z, float w) 1674{ 1675 union tgsi_immediate_data imm[4]; 1676 imm[0].Float = x; 1677 imm[1].Float = y; 1678 imm[2].Float = z; 1679 imm[3].Float = w; 1680 return make_immediate_reg_4(emit, imm); 1681} 1682 1683 1684/** 1685 * Return a tgsi_full_src_register for an immediate/literal float value 1686 * of the form {value, value, value, value}. 1687 * \sa make_immediate_reg_4() regarding allowed values. 1688 */ 1689static struct tgsi_full_src_register 1690make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value) 1691{ 1692 union tgsi_immediate_data imm; 1693 imm.Float = value; 1694 return make_immediate_reg(emit, imm); 1695} 1696 1697 1698/** 1699 * Return a tgsi_full_src_register for an immediate/literal int[4] vector. 1700 */ 1701static struct tgsi_full_src_register 1702make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit, 1703 int x, int y, int z, int w) 1704{ 1705 union tgsi_immediate_data imm[4]; 1706 imm[0].Int = x; 1707 imm[1].Int = y; 1708 imm[2].Int = z; 1709 imm[3].Int = w; 1710 return make_immediate_reg_4(emit, imm); 1711} 1712 1713 1714/** 1715 * Return a tgsi_full_src_register for an immediate/literal int value 1716 * of the form {value, value, value, value}. 1717 * \sa make_immediate_reg_4() regarding allowed values. 1718 */ 1719static struct tgsi_full_src_register 1720make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value) 1721{ 1722 union tgsi_immediate_data imm; 1723 imm.Int = value; 1724 return make_immediate_reg(emit, imm); 1725} 1726 1727 1728/** 1729 * Allocate space for a union tgsi_immediate_data[4] immediate. 1730 * \return the index/position of the immediate. 1731 */ 1732static unsigned 1733alloc_immediate_4(struct svga_shader_emitter_v10 *emit, 1734 const union tgsi_immediate_data imm[4]) 1735{ 1736 unsigned n = emit->num_immediates++; 1737 assert(!emit->immediates_emitted); 1738 assert(n < ARRAY_SIZE(emit->immediates)); 1739 emit->immediates[n][0] = imm[0]; 1740 emit->immediates[n][1] = imm[1]; 1741 emit->immediates[n][2] = imm[2]; 1742 emit->immediates[n][3] = imm[3]; 1743 return n; 1744} 1745 1746 1747/** 1748 * Allocate space for a float[4] immediate. 1749 * \return the index/position of the immediate. 1750 */ 1751static unsigned 1752alloc_immediate_float4(struct svga_shader_emitter_v10 *emit, 1753 float x, float y, float z, float w) 1754{ 1755 union tgsi_immediate_data imm[4]; 1756 imm[0].Float = x; 1757 imm[1].Float = y; 1758 imm[2].Float = z; 1759 imm[3].Float = w; 1760 return alloc_immediate_4(emit, imm); 1761} 1762 1763 1764/** 1765 * Allocate space for an int[4] immediate. 1766 * \return the index/position of the immediate. 1767 */ 1768static unsigned 1769alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, 1770 int x, int y, int z, int w) 1771{ 1772 union tgsi_immediate_data imm[4]; 1773 imm[0].Int = x; 1774 imm[1].Int = y; 1775 imm[2].Int = z; 1776 imm[3].Int = w; 1777 return alloc_immediate_4(emit, imm); 1778} 1779 1780 1781/** 1782 * Allocate a shader input to store a system value. 1783 */ 1784static unsigned 1785alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) 1786{ 1787 const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index; 1788 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1789 emit->system_value_indexes[index] = n; 1790 return n; 1791} 1792 1793 1794/** 1795 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10. 1796 */ 1797static boolean 1798emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, 1799 const struct tgsi_full_immediate *imm) 1800{ 1801 /* We don't actually emit any code here. We just save the 1802 * immediate values and emit them later. 1803 */ 1804 alloc_immediate_4(emit, imm->u); 1805 return TRUE; 1806} 1807 1808 1809/** 1810 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block 1811 * containing all the immediate values previously allocated 1812 * with alloc_immediate_4(). 1813 */ 1814static boolean 1815emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit) 1816{ 1817 VGPU10OpcodeToken0 token; 1818 1819 assert(!emit->immediates_emitted); 1820 1821 token.value = 0; 1822 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA; 1823 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER; 1824 1825 /* Note: no begin/end_emit_instruction() calls */ 1826 emit_dword(emit, token.value); 1827 emit_dword(emit, 2 + 4 * emit->num_immediates); 1828 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates); 1829 1830 emit->immediates_emitted = TRUE; 1831 1832 return TRUE; 1833} 1834 1835 1836/** 1837 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10 1838 * interpolation mode. 1839 * \return a VGPU10_INTERPOLATION_x value 1840 */ 1841static unsigned 1842translate_interpolation(const struct svga_shader_emitter_v10 *emit, 1843 unsigned interp, unsigned interpolate_loc) 1844{ 1845 if (interp == TGSI_INTERPOLATE_COLOR) { 1846 interp = emit->key.fs.flatshade ? 1847 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE; 1848 } 1849 1850 switch (interp) { 1851 case TGSI_INTERPOLATE_CONSTANT: 1852 return VGPU10_INTERPOLATION_CONSTANT; 1853 case TGSI_INTERPOLATE_LINEAR: 1854 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1855 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID : 1856 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; 1857 case TGSI_INTERPOLATE_PERSPECTIVE: 1858 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1859 VGPU10_INTERPOLATION_LINEAR_CENTROID : 1860 VGPU10_INTERPOLATION_LINEAR; 1861 default: 1862 assert(!"Unexpected interpolation mode"); 1863 return VGPU10_INTERPOLATION_CONSTANT; 1864 } 1865} 1866 1867 1868/** 1869 * Translate a TGSI property to VGPU10. 1870 * Don't emit any instructions yet, only need to gather the primitive property information. 1871 * The output primitive topology might be changed later. The final property instructions 1872 * will be emitted as part of the pre-helper code. 1873 */ 1874static boolean 1875emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, 1876 const struct tgsi_full_property *prop) 1877{ 1878 static const VGPU10_PRIMITIVE primType[] = { 1879 VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */ 1880 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */ 1881 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */ 1882 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */ 1883 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */ 1884 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */ 1885 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */ 1886 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */ 1887 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1888 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1889 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1890 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1891 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1892 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1893 }; 1894 1895 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = { 1896 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */ 1897 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */ 1898 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */ 1899 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */ 1900 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */ 1901 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */ 1902 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */ 1903 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */ 1904 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1905 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1906 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1907 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1908 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1909 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1910 }; 1911 1912 static const unsigned inputArraySize[] = { 1913 0, /* VGPU10_PRIMITIVE_UNDEFINED */ 1914 1, /* VGPU10_PRIMITIVE_POINT */ 1915 2, /* VGPU10_PRIMITIVE_LINE */ 1916 3, /* VGPU10_PRIMITIVE_TRIANGLE */ 1917 0, 1918 0, 1919 4, /* VGPU10_PRIMITIVE_LINE_ADJ */ 1920 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */ 1921 }; 1922 1923 switch (prop->Property.PropertyName) { 1924 case TGSI_PROPERTY_GS_INPUT_PRIM: 1925 assert(prop->u[0].Data < ARRAY_SIZE(primType)); 1926 emit->gs.prim_type = primType[prop->u[0].Data]; 1927 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED); 1928 emit->gs.input_size = inputArraySize[emit->gs.prim_type]; 1929 break; 1930 1931 case TGSI_PROPERTY_GS_OUTPUT_PRIM: 1932 assert(prop->u[0].Data < ARRAY_SIZE(primTopology)); 1933 emit->gs.prim_topology = primTopology[prop->u[0].Data]; 1934 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED); 1935 break; 1936 1937 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: 1938 emit->gs.max_out_vertices = prop->u[0].Data; 1939 break; 1940 1941 default: 1942 break; 1943 } 1944 1945 return TRUE; 1946} 1947 1948 1949static void 1950emit_property_instruction(struct svga_shader_emitter_v10 *emit, 1951 VGPU10OpcodeToken0 opcode0, unsigned nData, 1952 unsigned data) 1953{ 1954 begin_emit_instruction(emit); 1955 emit_dword(emit, opcode0.value); 1956 if (nData) 1957 emit_dword(emit, data); 1958 end_emit_instruction(emit); 1959} 1960 1961 1962/** 1963 * Emit property instructions 1964 */ 1965static void 1966emit_property_instructions(struct svga_shader_emitter_v10 *emit) 1967{ 1968 VGPU10OpcodeToken0 opcode0; 1969 1970 assert(emit->unit == PIPE_SHADER_GEOMETRY); 1971 1972 /* emit input primitive type declaration */ 1973 opcode0.value = 0; 1974 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE; 1975 opcode0.primitive = emit->gs.prim_type; 1976 emit_property_instruction(emit, opcode0, 0, 0); 1977 1978 /* emit output primitive topology declaration */ 1979 opcode0.value = 0; 1980 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; 1981 opcode0.primitiveTopology = emit->gs.prim_topology; 1982 emit_property_instruction(emit, opcode0, 0, 0); 1983 1984 /* emit max output vertices */ 1985 opcode0.value = 0; 1986 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT; 1987 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices); 1988} 1989 1990 1991/** 1992 * Emit a vgpu10 declaration "instruction". 1993 * \param index the register index 1994 * \param size array size of the operand. In most cases, it is 1, 1995 * but for inputs to geometry shader, the array size varies 1996 * depending on the primitive type. 1997 */ 1998static void 1999emit_decl_instruction(struct svga_shader_emitter_v10 *emit, 2000 VGPU10OpcodeToken0 opcode0, 2001 VGPU10OperandToken0 operand0, 2002 VGPU10NameToken name_token, 2003 unsigned index, unsigned size) 2004{ 2005 assert(opcode0.opcodeType); 2006 assert(operand0.mask); 2007 2008 begin_emit_instruction(emit); 2009 emit_dword(emit, opcode0.value); 2010 2011 emit_dword(emit, operand0.value); 2012 2013 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { 2014 /* Next token is the index of the register to declare */ 2015 emit_dword(emit, index); 2016 } 2017 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { 2018 /* Next token is the size of the register */ 2019 emit_dword(emit, size); 2020 2021 /* Followed by the index of the register */ 2022 emit_dword(emit, index); 2023 } 2024 2025 if (name_token.value) { 2026 emit_dword(emit, name_token.value); 2027 } 2028 2029 end_emit_instruction(emit); 2030} 2031 2032 2033/** 2034 * Emit the declaration for a shader input. 2035 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx 2036 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x 2037 * \param dim index dimension 2038 * \param index the input register index 2039 * \param size array size of the operand. In most cases, it is 1, 2040 * but for inputs to geometry shader, the array size varies 2041 * depending on the primitive type. 2042 * \param name one of VGPU10_NAME_x 2043 * \parma numComp number of components 2044 * \param selMode component selection mode 2045 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2046 * \param interpMode interpolation mode 2047 */ 2048static void 2049emit_input_declaration(struct svga_shader_emitter_v10 *emit, 2050 unsigned opcodeType, unsigned operandType, 2051 unsigned dim, unsigned index, unsigned size, 2052 unsigned name, unsigned numComp, 2053 unsigned selMode, unsigned usageMask, 2054 unsigned interpMode) 2055{ 2056 VGPU10OpcodeToken0 opcode0; 2057 VGPU10OperandToken0 operand0; 2058 VGPU10NameToken name_token; 2059 2060 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2061 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || 2062 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || 2063 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || 2064 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); 2065 assert(operandType == VGPU10_OPERAND_TYPE_INPUT || 2066 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID); 2067 assert(numComp <= VGPU10_OPERAND_4_COMPONENT); 2068 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); 2069 assert(dim <= VGPU10_OPERAND_INDEX_3D); 2070 assert(name == VGPU10_NAME_UNDEFINED || 2071 name == VGPU10_NAME_POSITION || 2072 name == VGPU10_NAME_INSTANCE_ID || 2073 name == VGPU10_NAME_VERTEX_ID || 2074 name == VGPU10_NAME_PRIMITIVE_ID || 2075 name == VGPU10_NAME_IS_FRONT_FACE); 2076 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || 2077 interpMode == VGPU10_INTERPOLATION_CONSTANT || 2078 interpMode == VGPU10_INTERPOLATION_LINEAR || 2079 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || 2080 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || 2081 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID); 2082 2083 check_register_index(emit, opcodeType, index); 2084 2085 opcode0.value = operand0.value = name_token.value = 0; 2086 2087 opcode0.opcodeType = opcodeType; 2088 opcode0.interpolationMode = interpMode; 2089 2090 operand0.operandType = operandType; 2091 operand0.numComponents = numComp; 2092 operand0.selectionMode = selMode; 2093 operand0.mask = usageMask; 2094 operand0.indexDimension = dim; 2095 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2096 if (dim == VGPU10_OPERAND_INDEX_2D) 2097 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2098 2099 name_token.name = name; 2100 2101 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); 2102} 2103 2104 2105/** 2106 * Emit the declaration for a shader output. 2107 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx 2108 * \param index the output register index 2109 * \param name one of VGPU10_NAME_x 2110 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2111 */ 2112static void 2113emit_output_declaration(struct svga_shader_emitter_v10 *emit, 2114 unsigned type, unsigned index, 2115 unsigned name, unsigned usageMask) 2116{ 2117 VGPU10OpcodeToken0 opcode0; 2118 VGPU10OperandToken0 operand0; 2119 VGPU10NameToken name_token; 2120 2121 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2122 assert(type == VGPU10_OPCODE_DCL_OUTPUT || 2123 type == VGPU10_OPCODE_DCL_OUTPUT_SGV || 2124 type == VGPU10_OPCODE_DCL_OUTPUT_SIV); 2125 assert(name == VGPU10_NAME_UNDEFINED || 2126 name == VGPU10_NAME_POSITION || 2127 name == VGPU10_NAME_PRIMITIVE_ID || 2128 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || 2129 name == VGPU10_NAME_CLIP_DISTANCE); 2130 2131 check_register_index(emit, type, index); 2132 2133 opcode0.value = operand0.value = name_token.value = 0; 2134 2135 opcode0.opcodeType = type; 2136 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; 2137 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2138 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2139 operand0.mask = usageMask; 2140 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2141 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2142 2143 name_token.name = name; 2144 2145 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); 2146} 2147 2148 2149/** 2150 * Emit the declaration for the fragment depth output. 2151 */ 2152static void 2153emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) 2154{ 2155 VGPU10OpcodeToken0 opcode0; 2156 VGPU10OperandToken0 operand0; 2157 VGPU10NameToken name_token; 2158 2159 assert(emit->unit == PIPE_SHADER_FRAGMENT); 2160 2161 opcode0.value = operand0.value = name_token.value = 0; 2162 2163 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; 2164 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 2165 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 2166 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 2167 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2168 2169 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); 2170} 2171 2172 2173/** 2174 * Emit the declaration for a system value input/output. 2175 */ 2176static void 2177emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, 2178 unsigned semantic_name, unsigned index) 2179{ 2180 switch (semantic_name) { 2181 case TGSI_SEMANTIC_INSTANCEID: 2182 index = alloc_system_value_index(emit, index); 2183 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2184 VGPU10_OPERAND_TYPE_INPUT, 2185 VGPU10_OPERAND_INDEX_1D, 2186 index, 1, 2187 VGPU10_NAME_INSTANCE_ID, 2188 VGPU10_OPERAND_4_COMPONENT, 2189 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2190 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2191 VGPU10_INTERPOLATION_UNDEFINED); 2192 break; 2193 case TGSI_SEMANTIC_VERTEXID: 2194 index = alloc_system_value_index(emit, index); 2195 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2196 VGPU10_OPERAND_TYPE_INPUT, 2197 VGPU10_OPERAND_INDEX_1D, 2198 index, 1, 2199 VGPU10_NAME_VERTEX_ID, 2200 VGPU10_OPERAND_4_COMPONENT, 2201 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2202 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2203 VGPU10_INTERPOLATION_UNDEFINED); 2204 break; 2205 default: 2206 ; /* XXX */ 2207 } 2208} 2209 2210/** 2211 * Translate a TGSI declaration to VGPU10. 2212 */ 2213static boolean 2214emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, 2215 const struct tgsi_full_declaration *decl) 2216{ 2217 switch (decl->Declaration.File) { 2218 case TGSI_FILE_INPUT: 2219 /* do nothing - see emit_input_declarations() */ 2220 return TRUE; 2221 2222 case TGSI_FILE_OUTPUT: 2223 assert(decl->Range.First == decl->Range.Last); 2224 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; 2225 return TRUE; 2226 2227 case TGSI_FILE_TEMPORARY: 2228 /* Don't declare the temps here. Just keep track of how many 2229 * and emit the declaration later. 2230 */ 2231 if (decl->Declaration.Array) { 2232 /* Indexed temporary array. Save the start index of the array 2233 * and the size of the array. 2234 */ 2235 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); 2236 unsigned i; 2237 2238 assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); 2239 2240 /* Save this array so we can emit the declaration for it later */ 2241 emit->temp_arrays[arrayID].start = decl->Range.First; 2242 emit->temp_arrays[arrayID].size = 2243 decl->Range.Last - decl->Range.First + 1; 2244 2245 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); 2246 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); 2247 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); 2248 2249 /* Fill in the temp_map entries for this array */ 2250 for (i = decl->Range.First; i <= decl->Range.Last; i++) { 2251 emit->temp_map[i].arrayId = arrayID; 2252 emit->temp_map[i].index = i - decl->Range.First; 2253 } 2254 } 2255 2256 /* for all temps, indexed or not, keep track of highest index */ 2257 emit->num_shader_temps = MAX2(emit->num_shader_temps, 2258 decl->Range.Last + 1); 2259 return TRUE; 2260 2261 case TGSI_FILE_CONSTANT: 2262 /* Don't declare constants here. Just keep track and emit later. */ 2263 { 2264 unsigned constbuf = 0, num_consts; 2265 if (decl->Declaration.Dimension) { 2266 constbuf = decl->Dim.Index2D; 2267 } 2268 /* We throw an assertion here when, in fact, the shader should never 2269 * have linked due to constbuf index out of bounds, so we shouldn't 2270 * have reached here. 2271 */ 2272 assert(constbuf < ARRAY_SIZE(emit->num_shader_consts)); 2273 2274 num_consts = MAX2(emit->num_shader_consts[constbuf], 2275 decl->Range.Last + 1); 2276 2277 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 2278 debug_printf("Warning: constant buffer is declared to size [%u]" 2279 " but [%u] is the limit.\n", 2280 num_consts, 2281 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2282 } 2283 /* The linker doesn't enforce the max UBO size so we clamp here */ 2284 emit->num_shader_consts[constbuf] = 2285 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2286 } 2287 return TRUE; 2288 2289 case TGSI_FILE_IMMEDIATE: 2290 assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); 2291 return FALSE; 2292 2293 case TGSI_FILE_SYSTEM_VALUE: 2294 emit_system_value_declaration(emit, decl->Semantic.Name, 2295 decl->Range.First); 2296 return TRUE; 2297 2298 case TGSI_FILE_SAMPLER: 2299 /* Don't declare samplers here. Just keep track and emit later. */ 2300 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); 2301 return TRUE; 2302 2303#if 0 2304 case TGSI_FILE_RESOURCE: 2305 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ 2306 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ 2307 assert(!"TGSI_FILE_RESOURCE not handled yet"); 2308 return FALSE; 2309#endif 2310 2311 case TGSI_FILE_ADDRESS: 2312 emit->num_address_regs = MAX2(emit->num_address_regs, 2313 decl->Range.Last + 1); 2314 return TRUE; 2315 2316 case TGSI_FILE_SAMPLER_VIEW: 2317 { 2318 unsigned unit = decl->Range.First; 2319 assert(decl->Range.First == decl->Range.Last); 2320 emit->sampler_target[unit] = decl->SamplerView.Resource; 2321 /* Note: we can ignore YZW return types for now */ 2322 emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX; 2323 } 2324 return TRUE; 2325 2326 default: 2327 assert(!"Unexpected type of declaration"); 2328 return FALSE; 2329 } 2330} 2331 2332 2333 2334/** 2335 * Emit all input declarations. 2336 */ 2337static boolean 2338emit_input_declarations(struct svga_shader_emitter_v10 *emit) 2339{ 2340 unsigned i; 2341 2342 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2343 2344 for (i = 0; i < emit->linkage.num_inputs; i++) { 2345 unsigned semantic_name = emit->info.input_semantic_name[i]; 2346 unsigned usage_mask = emit->info.input_usage_mask[i]; 2347 unsigned index = emit->linkage.input_map[i]; 2348 unsigned type, interpolationMode, name; 2349 2350 if (usage_mask == 0) 2351 continue; /* register is not actually used */ 2352 2353 if (semantic_name == TGSI_SEMANTIC_POSITION) { 2354 /* fragment position input */ 2355 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2356 interpolationMode = VGPU10_INTERPOLATION_LINEAR; 2357 name = VGPU10_NAME_POSITION; 2358 if (usage_mask & TGSI_WRITEMASK_W) { 2359 /* we need to replace use of 'w' with '1/w' */ 2360 emit->fs.fragcoord_input_index = i; 2361 } 2362 } 2363 else if (semantic_name == TGSI_SEMANTIC_FACE) { 2364 /* fragment front-facing input */ 2365 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2366 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2367 name = VGPU10_NAME_IS_FRONT_FACE; 2368 emit->fs.face_input_index = i; 2369 } 2370 else if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2371 /* primitive ID */ 2372 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2373 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2374 name = VGPU10_NAME_PRIMITIVE_ID; 2375 } 2376 else { 2377 /* general fragment input */ 2378 type = VGPU10_OPCODE_DCL_INPUT_PS; 2379 interpolationMode = 2380 translate_interpolation(emit, 2381 emit->info.input_interpolate[i], 2382 emit->info.input_interpolate_loc[i]); 2383 2384 /* keeps track if flat interpolation mode is being used */ 2385 emit->uses_flat_interp = emit->uses_flat_interp || 2386 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); 2387 2388 name = VGPU10_NAME_UNDEFINED; 2389 } 2390 2391 emit_input_declaration(emit, type, 2392 VGPU10_OPERAND_TYPE_INPUT, 2393 VGPU10_OPERAND_INDEX_1D, index, 1, 2394 name, 2395 VGPU10_OPERAND_4_COMPONENT, 2396 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2397 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2398 interpolationMode); 2399 } 2400 } 2401 else if (emit->unit == PIPE_SHADER_GEOMETRY) { 2402 2403 for (i = 0; i < emit->info.num_inputs; i++) { 2404 unsigned semantic_name = emit->info.input_semantic_name[i]; 2405 unsigned usage_mask = emit->info.input_usage_mask[i]; 2406 unsigned index = emit->linkage.input_map[i]; 2407 unsigned opcodeType, operandType; 2408 unsigned numComp, selMode; 2409 unsigned name; 2410 unsigned dim; 2411 2412 if (usage_mask == 0) 2413 continue; /* register is not actually used */ 2414 2415 opcodeType = VGPU10_OPCODE_DCL_INPUT; 2416 operandType = VGPU10_OPERAND_TYPE_INPUT; 2417 numComp = VGPU10_OPERAND_4_COMPONENT; 2418 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2419 name = VGPU10_NAME_UNDEFINED; 2420 2421 /* all geometry shader inputs are two dimensional except gl_PrimitiveID */ 2422 dim = VGPU10_OPERAND_INDEX_2D; 2423 2424 if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2425 /* Primitive ID */ 2426 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 2427 dim = VGPU10_OPERAND_INDEX_0D; 2428 numComp = VGPU10_OPERAND_0_COMPONENT; 2429 selMode = 0; 2430 2431 /* also save the register index so we can check for 2432 * primitive id when emit src register. We need to modify the 2433 * operand type, index dimension when emit primitive id src reg. 2434 */ 2435 emit->gs.prim_id_index = i; 2436 } 2437 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2438 /* vertex position input */ 2439 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; 2440 name = VGPU10_NAME_POSITION; 2441 } 2442 2443 emit_input_declaration(emit, opcodeType, operandType, 2444 dim, index, 2445 emit->gs.input_size, 2446 name, 2447 numComp, selMode, 2448 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2449 VGPU10_INTERPOLATION_UNDEFINED); 2450 } 2451 } 2452 else { 2453 assert(emit->unit == PIPE_SHADER_VERTEX); 2454 2455 for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) { 2456 unsigned usage_mask = emit->info.input_usage_mask[i]; 2457 unsigned index = i; 2458 2459 if (usage_mask == 0) 2460 continue; /* register is not actually used */ 2461 2462 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 2463 VGPU10_OPERAND_TYPE_INPUT, 2464 VGPU10_OPERAND_INDEX_1D, index, 1, 2465 VGPU10_NAME_UNDEFINED, 2466 VGPU10_OPERAND_4_COMPONENT, 2467 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2468 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2469 VGPU10_INTERPOLATION_UNDEFINED); 2470 } 2471 } 2472 2473 return TRUE; 2474} 2475 2476 2477/** 2478 * Emit all output declarations. 2479 */ 2480static boolean 2481emit_output_declarations(struct svga_shader_emitter_v10 *emit) 2482{ 2483 unsigned i; 2484 2485 for (i = 0; i < emit->info.num_outputs; i++) { 2486 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ 2487 const unsigned semantic_name = emit->info.output_semantic_name[i]; 2488 const unsigned semantic_index = emit->info.output_semantic_index[i]; 2489 unsigned index = i; 2490 2491 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2492 if (semantic_name == TGSI_SEMANTIC_COLOR) { 2493 assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index)); 2494 2495 emit->fs.color_out_index[semantic_index] = index; 2496 2497 /* The semantic index is the shader's color output/buffer index */ 2498 emit_output_declaration(emit, 2499 VGPU10_OPCODE_DCL_OUTPUT, semantic_index, 2500 VGPU10_NAME_UNDEFINED, 2501 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2502 2503 if (semantic_index == 0) { 2504 if (emit->key.fs.write_color0_to_n_cbufs > 1) { 2505 /* Emit declarations for the additional color outputs 2506 * for broadcasting. 2507 */ 2508 unsigned j; 2509 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { 2510 /* Allocate a new output index */ 2511 unsigned idx = emit->info.num_outputs + j - 1; 2512 emit->fs.color_out_index[j] = idx; 2513 emit_output_declaration(emit, 2514 VGPU10_OPCODE_DCL_OUTPUT, idx, 2515 VGPU10_NAME_UNDEFINED, 2516 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2517 emit->info.output_semantic_index[idx] = j; 2518 } 2519 } 2520 } 2521 else { 2522 assert(!emit->key.fs.write_color0_to_n_cbufs); 2523 } 2524 } 2525 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2526 /* Fragment depth output */ 2527 emit_fragdepth_output_declaration(emit); 2528 } 2529 else { 2530 assert(!"Bad output semantic name"); 2531 } 2532 } 2533 else { 2534 /* VS or GS */ 2535 unsigned name, type; 2536 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2537 2538 switch (semantic_name) { 2539 case TGSI_SEMANTIC_POSITION: 2540 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2541 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2542 name = VGPU10_NAME_POSITION; 2543 /* Save the index of the vertex position output register */ 2544 emit->vposition.out_index = index; 2545 break; 2546 case TGSI_SEMANTIC_CLIPDIST: 2547 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2548 name = VGPU10_NAME_CLIP_DISTANCE; 2549 /* save the starting index of the clip distance output register */ 2550 if (semantic_index == 0) 2551 emit->clip_dist_out_index = index; 2552 writemask = emit->output_usage_mask[index]; 2553 writemask = apply_clip_plane_mask(emit, writemask, semantic_index); 2554 if (writemask == 0x0) { 2555 continue; /* discard this do-nothing declaration */ 2556 } 2557 break; 2558 case TGSI_SEMANTIC_PRIMID: 2559 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2560 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2561 name = VGPU10_NAME_PRIMITIVE_ID; 2562 break; 2563 case TGSI_SEMANTIC_LAYER: 2564 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2565 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2566 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; 2567 break; 2568 case TGSI_SEMANTIC_CLIPVERTEX: 2569 type = VGPU10_OPCODE_DCL_OUTPUT; 2570 name = VGPU10_NAME_UNDEFINED; 2571 emit->clip_vertex_out_index = index; 2572 break; 2573 default: 2574 /* generic output */ 2575 type = VGPU10_OPCODE_DCL_OUTPUT; 2576 name = VGPU10_NAME_UNDEFINED; 2577 } 2578 2579 emit_output_declaration(emit, type, index, name, writemask); 2580 } 2581 } 2582 2583 if (emit->vposition.so_index != INVALID_INDEX && 2584 emit->vposition.out_index != INVALID_INDEX) { 2585 2586 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2587 2588 /* Emit the declaration for the non-adjusted vertex position 2589 * for stream output purpose 2590 */ 2591 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2592 emit->vposition.so_index, 2593 VGPU10_NAME_UNDEFINED, 2594 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2595 } 2596 2597 if (emit->clip_dist_so_index != INVALID_INDEX && 2598 emit->clip_dist_out_index != INVALID_INDEX) { 2599 2600 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2601 2602 /* Emit the declaration for the clip distance shadow copy which 2603 * will be used for stream output purpose and for clip distance 2604 * varying variable 2605 */ 2606 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2607 emit->clip_dist_so_index, 2608 VGPU10_NAME_UNDEFINED, 2609 emit->output_usage_mask[emit->clip_dist_out_index]); 2610 2611 if (emit->info.num_written_clipdistance > 4) { 2612 /* for the second clip distance register, each handles 4 planes */ 2613 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2614 emit->clip_dist_so_index + 1, 2615 VGPU10_NAME_UNDEFINED, 2616 emit->output_usage_mask[emit->clip_dist_out_index+1]); 2617 } 2618 } 2619 2620 return TRUE; 2621} 2622 2623 2624/** 2625 * Emit the declaration for the temporary registers. 2626 */ 2627static boolean 2628emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) 2629{ 2630 unsigned total_temps, reg, i; 2631 2632 total_temps = emit->num_shader_temps; 2633 2634 /* If there is indirect access to non-indexable temps in the shader, 2635 * convert those temps to indexable temps. This works around a bug 2636 * in the GLSL->TGSI translator exposed in piglit test 2637 * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test. 2638 * Internal temps added by the driver remain as non-indexable temps. 2639 */ 2640 if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) && 2641 emit->num_temp_arrays == 0) { 2642 unsigned arrayID; 2643 2644 arrayID = 1; 2645 emit->num_temp_arrays = arrayID + 1; 2646 emit->temp_arrays[arrayID].start = 0; 2647 emit->temp_arrays[arrayID].size = total_temps; 2648 2649 /* Fill in the temp_map entries for this temp array */ 2650 for (i = 0; i < total_temps; i++) { 2651 emit->temp_map[i].arrayId = arrayID; 2652 emit->temp_map[i].index = i; 2653 } 2654 } 2655 2656 /* Allocate extra temps for specially-implemented instructions, 2657 * such as LIT. 2658 */ 2659 total_temps += MAX_INTERNAL_TEMPS; 2660 2661 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { 2662 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || 2663 emit->key.clip_plane_enable || 2664 emit->vposition.so_index != INVALID_INDEX) { 2665 emit->vposition.tmp_index = total_temps; 2666 total_temps += 1; 2667 } 2668 2669 if (emit->unit == PIPE_SHADER_VERTEX) { 2670 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | 2671 emit->key.vs.adjust_attrib_itof | 2672 emit->key.vs.adjust_attrib_utof | 2673 emit->key.vs.attrib_is_bgra | 2674 emit->key.vs.attrib_puint_to_snorm | 2675 emit->key.vs.attrib_puint_to_uscaled | 2676 emit->key.vs.attrib_puint_to_sscaled); 2677 while (attrib_mask) { 2678 unsigned index = u_bit_scan(&attrib_mask); 2679 emit->vs.adjusted_input[index] = total_temps++; 2680 } 2681 } 2682 2683 if (emit->clip_mode == CLIP_DISTANCE) { 2684 /* We need to write the clip distance to a temporary register 2685 * first. Then it will be copied to the shadow copy for 2686 * the clip distance varying variable and stream output purpose. 2687 * It will also be copied to the actual CLIPDIST register 2688 * according to the enabled clip planes 2689 */ 2690 emit->clip_dist_tmp_index = total_temps++; 2691 if (emit->info.num_written_clipdistance > 4) 2692 total_temps++; /* second clip register */ 2693 } 2694 else if (emit->clip_mode == CLIP_VERTEX) { 2695 /* We need to convert the TGSI CLIPVERTEX output to one or more 2696 * clip distances. Allocate a temp reg for the clipvertex here. 2697 */ 2698 assert(emit->info.writes_clipvertex > 0); 2699 emit->clip_vertex_tmp_index = total_temps; 2700 total_temps++; 2701 } 2702 } 2703 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 2704 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS || 2705 emit->key.fs.white_fragments || 2706 emit->key.fs.write_color0_to_n_cbufs > 1) { 2707 /* Allocate a temp to hold the output color */ 2708 emit->fs.color_tmp_index = total_temps; 2709 total_temps += 1; 2710 } 2711 2712 if (emit->fs.face_input_index != INVALID_INDEX) { 2713 /* Allocate a temp for the +/-1 face register */ 2714 emit->fs.face_tmp_index = total_temps; 2715 total_temps += 1; 2716 } 2717 2718 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 2719 /* Allocate a temp for modified fragment position register */ 2720 emit->fs.fragcoord_tmp_index = total_temps; 2721 total_temps += 1; 2722 } 2723 } 2724 2725 for (i = 0; i < emit->num_address_regs; i++) { 2726 emit->address_reg_index[i] = total_temps++; 2727 } 2728 2729 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10 2730 * temp indexes. Basically, we compact all the non-array temp register 2731 * indexes into a consecutive series. 2732 * 2733 * Before, we may have some TGSI declarations like: 2734 * DCL TEMP[0..1], LOCAL 2735 * DCL TEMP[2..4], ARRAY(1), LOCAL 2736 * DCL TEMP[5..7], ARRAY(2), LOCAL 2737 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things 2738 * 2739 * After, we'll have a map like this: 2740 * temp_map[0] = { array 0, index 0 } 2741 * temp_map[1] = { array 0, index 1 } 2742 * temp_map[2] = { array 1, index 0 } 2743 * temp_map[3] = { array 1, index 1 } 2744 * temp_map[4] = { array 1, index 2 } 2745 * temp_map[5] = { array 2, index 0 } 2746 * temp_map[6] = { array 2, index 1 } 2747 * temp_map[7] = { array 2, index 2 } 2748 * temp_map[8] = { array 0, index 2 } 2749 * temp_map[9] = { array 0, index 3 } 2750 * 2751 * We'll declare two arrays of 3 elements, plus a set of four non-indexed 2752 * temps numbered 0..3 2753 * 2754 * Any time we emit a temporary register index, we'll have to use the 2755 * temp_map[] table to convert the TGSI index to the VGPU10 index. 2756 * 2757 * Finally, we recompute the total_temps value here. 2758 */ 2759 reg = 0; 2760 for (i = 0; i < total_temps; i++) { 2761 if (emit->temp_map[i].arrayId == 0) { 2762 emit->temp_map[i].index = reg++; 2763 } 2764 } 2765 2766 if (0) { 2767 debug_printf("total_temps %u\n", total_temps); 2768 for (i = 0; i < total_temps; i++) { 2769 debug_printf("temp %u -> array %u index %u\n", 2770 i, emit->temp_map[i].arrayId, emit->temp_map[i].index); 2771 } 2772 } 2773 2774 total_temps = reg; 2775 2776 /* Emit declaration of ordinary temp registers */ 2777 if (total_temps > 0) { 2778 VGPU10OpcodeToken0 opcode0; 2779 2780 opcode0.value = 0; 2781 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS; 2782 2783 begin_emit_instruction(emit); 2784 emit_dword(emit, opcode0.value); 2785 emit_dword(emit, total_temps); 2786 end_emit_instruction(emit); 2787 } 2788 2789 /* Emit declarations for indexable temp arrays. Skip 0th entry since 2790 * it's unused. 2791 */ 2792 for (i = 1; i < emit->num_temp_arrays; i++) { 2793 unsigned num_temps = emit->temp_arrays[i].size; 2794 2795 if (num_temps > 0) { 2796 VGPU10OpcodeToken0 opcode0; 2797 2798 opcode0.value = 0; 2799 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP; 2800 2801 begin_emit_instruction(emit); 2802 emit_dword(emit, opcode0.value); 2803 emit_dword(emit, i); /* which array */ 2804 emit_dword(emit, num_temps); 2805 emit_dword(emit, 4); /* num components */ 2806 end_emit_instruction(emit); 2807 2808 total_temps += num_temps; 2809 } 2810 } 2811 2812 /* Check that the grand total of all regular and indexed temps is 2813 * under the limit. 2814 */ 2815 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1); 2816 2817 return TRUE; 2818} 2819 2820 2821static boolean 2822emit_constant_declaration(struct svga_shader_emitter_v10 *emit) 2823{ 2824 VGPU10OpcodeToken0 opcode0; 2825 VGPU10OperandToken0 operand0; 2826 unsigned total_consts, i; 2827 2828 opcode0.value = 0; 2829 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER; 2830 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED; 2831 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */ 2832 2833 operand0.value = 0; 2834 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2835 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D; 2836 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2837 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2838 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 2839 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 2840 operand0.swizzleX = 0; 2841 operand0.swizzleY = 1; 2842 operand0.swizzleZ = 2; 2843 operand0.swizzleW = 3; 2844 2845 /** 2846 * Emit declaration for constant buffer [0]. We also allocate 2847 * room for the extra constants here. 2848 */ 2849 total_consts = emit->num_shader_consts[0]; 2850 2851 /* Now, allocate constant slots for the "extra" constants */ 2852 2853 /* Vertex position scale/translation */ 2854 if (emit->vposition.need_prescale) { 2855 emit->vposition.prescale_scale_index = total_consts++; 2856 emit->vposition.prescale_trans_index = total_consts++; 2857 } 2858 2859 if (emit->unit == PIPE_SHADER_VERTEX) { 2860 if (emit->key.vs.undo_viewport) { 2861 emit->vs.viewport_index = total_consts++; 2862 } 2863 } 2864 2865 /* user-defined clip planes */ 2866 if (emit->key.clip_plane_enable) { 2867 unsigned n = util_bitcount(emit->key.clip_plane_enable); 2868 assert(emit->unit == PIPE_SHADER_VERTEX || 2869 emit->unit == PIPE_SHADER_GEOMETRY); 2870 for (i = 0; i < n; i++) { 2871 emit->clip_plane_const[i] = total_consts++; 2872 } 2873 } 2874 2875 /* Texcoord scale factors for RECT textures */ 2876 { 2877 for (i = 0; i < emit->num_samplers; i++) { 2878 if (emit->key.tex[i].unnormalized) { 2879 emit->texcoord_scale_index[i] = total_consts++; 2880 } 2881 } 2882 } 2883 2884 /* Texture buffer sizes */ 2885 for (i = 0; i < emit->num_samplers; i++) { 2886 if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) { 2887 emit->texture_buffer_size_index[i] = total_consts++; 2888 } 2889 } 2890 2891 if (total_consts > 0) { 2892 begin_emit_instruction(emit); 2893 emit_dword(emit, opcode0.value); 2894 emit_dword(emit, operand0.value); 2895 emit_dword(emit, 0); /* which const buffer slot */ 2896 emit_dword(emit, total_consts); 2897 end_emit_instruction(emit); 2898 } 2899 2900 /* Declare remaining constant buffers (UBOs) */ 2901 for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) { 2902 if (emit->num_shader_consts[i] > 0) { 2903 begin_emit_instruction(emit); 2904 emit_dword(emit, opcode0.value); 2905 emit_dword(emit, operand0.value); 2906 emit_dword(emit, i); /* which const buffer slot */ 2907 emit_dword(emit, emit->num_shader_consts[i]); 2908 end_emit_instruction(emit); 2909 } 2910 } 2911 2912 return TRUE; 2913} 2914 2915 2916/** 2917 * Emit declarations for samplers. 2918 */ 2919static boolean 2920emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) 2921{ 2922 unsigned i; 2923 2924 for (i = 0; i < emit->num_samplers; i++) { 2925 VGPU10OpcodeToken0 opcode0; 2926 VGPU10OperandToken0 operand0; 2927 2928 opcode0.value = 0; 2929 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER; 2930 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT; 2931 2932 operand0.value = 0; 2933 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 2934 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 2935 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2936 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2937 2938 begin_emit_instruction(emit); 2939 emit_dword(emit, opcode0.value); 2940 emit_dword(emit, operand0.value); 2941 emit_dword(emit, i); 2942 end_emit_instruction(emit); 2943 } 2944 2945 return TRUE; 2946} 2947 2948 2949/** 2950 * Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x. 2951 */ 2952static unsigned 2953tgsi_texture_to_resource_dimension(unsigned target, boolean is_array) 2954{ 2955 switch (target) { 2956 case TGSI_TEXTURE_BUFFER: 2957 return VGPU10_RESOURCE_DIMENSION_BUFFER; 2958 case TGSI_TEXTURE_1D: 2959 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2960 case TGSI_TEXTURE_2D: 2961 case TGSI_TEXTURE_RECT: 2962 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2963 case TGSI_TEXTURE_3D: 2964 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; 2965 case TGSI_TEXTURE_CUBE: 2966 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 2967 case TGSI_TEXTURE_SHADOW1D: 2968 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2969 case TGSI_TEXTURE_SHADOW2D: 2970 case TGSI_TEXTURE_SHADOWRECT: 2971 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2972 case TGSI_TEXTURE_1D_ARRAY: 2973 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2974 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY 2975 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2976 case TGSI_TEXTURE_2D_ARRAY: 2977 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2978 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY 2979 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2980 case TGSI_TEXTURE_SHADOWCUBE: 2981 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 2982 case TGSI_TEXTURE_2D_MSAA: 2983 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 2984 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2985 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY 2986 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 2987 case TGSI_TEXTURE_CUBE_ARRAY: 2988 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY; 2989 default: 2990 assert(!"Unexpected resource type"); 2991 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2992 } 2993} 2994 2995 2996/** 2997 * Given a tgsi_return_type, return true iff it is an integer type. 2998 */ 2999static boolean 3000is_integer_type(enum tgsi_return_type type) 3001{ 3002 switch (type) { 3003 case TGSI_RETURN_TYPE_SINT: 3004 case TGSI_RETURN_TYPE_UINT: 3005 return TRUE; 3006 case TGSI_RETURN_TYPE_FLOAT: 3007 case TGSI_RETURN_TYPE_UNORM: 3008 case TGSI_RETURN_TYPE_SNORM: 3009 return FALSE; 3010 case TGSI_RETURN_TYPE_COUNT: 3011 default: 3012 assert(!"is_integer_type: Unknown tgsi_return_type"); 3013 return FALSE; 3014 } 3015} 3016 3017 3018/** 3019 * Emit declarations for resources. 3020 * XXX When we're sure that all TGSI shaders will be generated with 3021 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may 3022 * rework this code. 3023 */ 3024static boolean 3025emit_resource_declarations(struct svga_shader_emitter_v10 *emit) 3026{ 3027 unsigned i; 3028 3029 /* Emit resource decl for each sampler */ 3030 for (i = 0; i < emit->num_samplers; i++) { 3031 VGPU10OpcodeToken0 opcode0; 3032 VGPU10OperandToken0 operand0; 3033 VGPU10ResourceReturnTypeToken return_type; 3034 VGPU10_RESOURCE_RETURN_TYPE rt; 3035 3036 opcode0.value = 0; 3037 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; 3038 opcode0.resourceDimension = 3039 tgsi_texture_to_resource_dimension(emit->sampler_target[i], 3040 emit->key.tex[i].is_array); 3041 operand0.value = 0; 3042 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 3043 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 3044 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 3045 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3046 3047#if 1 3048 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */ 3049 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1); 3050 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1); 3051 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1); 3052 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1); 3053 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1); 3054 assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT); 3055 rt = emit->sampler_return_type[i] + 1; 3056#else 3057 switch (emit->sampler_return_type[i]) { 3058 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break; 3059 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break; 3060 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break; 3061 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break; 3062 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break; 3063 case TGSI_RETURN_TYPE_COUNT: 3064 default: 3065 rt = VGPU10_RETURN_TYPE_FLOAT; 3066 assert(!"emit_resource_declarations: Unknown tgsi_return_type"); 3067 } 3068#endif 3069 3070 return_type.value = 0; 3071 return_type.component0 = rt; 3072 return_type.component1 = rt; 3073 return_type.component2 = rt; 3074 return_type.component3 = rt; 3075 3076 begin_emit_instruction(emit); 3077 emit_dword(emit, opcode0.value); 3078 emit_dword(emit, operand0.value); 3079 emit_dword(emit, i); 3080 emit_dword(emit, return_type.value); 3081 end_emit_instruction(emit); 3082 } 3083 3084 return TRUE; 3085} 3086 3087static void 3088emit_instruction_op1(struct svga_shader_emitter_v10 *emit, 3089 unsigned opcode, 3090 const struct tgsi_full_dst_register *dst, 3091 const struct tgsi_full_src_register *src, 3092 boolean saturate) 3093{ 3094 begin_emit_instruction(emit); 3095 emit_opcode(emit, opcode, saturate); 3096 emit_dst_register(emit, dst); 3097 emit_src_register(emit, src); 3098 end_emit_instruction(emit); 3099} 3100 3101static void 3102emit_instruction_op2(struct svga_shader_emitter_v10 *emit, 3103 unsigned opcode, 3104 const struct tgsi_full_dst_register *dst, 3105 const struct tgsi_full_src_register *src1, 3106 const struct tgsi_full_src_register *src2, 3107 boolean saturate) 3108{ 3109 begin_emit_instruction(emit); 3110 emit_opcode(emit, opcode, saturate); 3111 emit_dst_register(emit, dst); 3112 emit_src_register(emit, src1); 3113 emit_src_register(emit, src2); 3114 end_emit_instruction(emit); 3115} 3116 3117static void 3118emit_instruction_op3(struct svga_shader_emitter_v10 *emit, 3119 unsigned opcode, 3120 const struct tgsi_full_dst_register *dst, 3121 const struct tgsi_full_src_register *src1, 3122 const struct tgsi_full_src_register *src2, 3123 const struct tgsi_full_src_register *src3, 3124 boolean saturate) 3125{ 3126 begin_emit_instruction(emit); 3127 emit_opcode(emit, opcode, saturate); 3128 emit_dst_register(emit, dst); 3129 emit_src_register(emit, src1); 3130 emit_src_register(emit, src2); 3131 emit_src_register(emit, src3); 3132 end_emit_instruction(emit); 3133} 3134 3135/** 3136 * Emit the actual clip distance instructions to be used for clipping 3137 * by copying the clip distance from the temporary registers to the 3138 * CLIPDIST registers written with the enabled planes mask. 3139 * Also copy the clip distance from the temporary to the clip distance 3140 * shadow copy register which will be referenced by the input shader 3141 */ 3142static void 3143emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) 3144{ 3145 struct tgsi_full_src_register tmp_clip_dist_src; 3146 struct tgsi_full_dst_register clip_dist_dst; 3147 3148 unsigned i; 3149 unsigned clip_plane_enable = emit->key.clip_plane_enable; 3150 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; 3151 int num_written_clipdist = emit->info.num_written_clipdistance; 3152 3153 assert(emit->clip_dist_out_index != INVALID_INDEX); 3154 assert(emit->clip_dist_tmp_index != INVALID_INDEX); 3155 3156 /** 3157 * Temporary reset the temporary clip dist register index so 3158 * that the copy to the real clip dist register will not 3159 * attempt to copy to the temporary register again 3160 */ 3161 emit->clip_dist_tmp_index = INVALID_INDEX; 3162 3163 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) { 3164 3165 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); 3166 3167 /** 3168 * copy to the shadow copy for use by varying variable and 3169 * stream output. All clip distances 3170 * will be written regardless of the enabled clipping planes. 3171 */ 3172 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3173 emit->clip_dist_so_index + i); 3174 3175 /* MOV clip_dist_so, tmp_clip_dist */ 3176 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3177 &tmp_clip_dist_src, FALSE); 3178 3179 /** 3180 * copy those clip distances to enabled clipping planes 3181 * to CLIPDIST registers for clipping 3182 */ 3183 if (clip_plane_enable & 0xf) { 3184 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3185 emit->clip_dist_out_index + i); 3186 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf); 3187 3188 /* MOV CLIPDIST, tmp_clip_dist */ 3189 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3190 &tmp_clip_dist_src, FALSE); 3191 } 3192 /* four clip planes per clip register */ 3193 clip_plane_enable >>= 4; 3194 } 3195 /** 3196 * set the temporary clip dist register index back to the 3197 * temporary index for the next vertex 3198 */ 3199 emit->clip_dist_tmp_index = clip_dist_tmp_index; 3200} 3201 3202/* Declare clip distance output registers for user-defined clip planes 3203 * or the TGSI_CLIPVERTEX output. 3204 */ 3205static void 3206emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) 3207{ 3208 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3209 unsigned index = emit->num_outputs; 3210 unsigned plane_mask; 3211 3212 assert(emit->unit == PIPE_SHADER_VERTEX || 3213 emit->unit == PIPE_SHADER_GEOMETRY); 3214 assert(num_clip_planes <= 8); 3215 3216 if (emit->clip_mode != CLIP_LEGACY && 3217 emit->clip_mode != CLIP_VERTEX) { 3218 return; 3219 } 3220 3221 if (num_clip_planes == 0) 3222 return; 3223 3224 /* Declare one or two clip output registers. The number of components 3225 * in the mask reflects the number of clip planes. For example, if 5 3226 * clip planes are needed, we'll declare outputs similar to: 3227 * dcl_output_siv o2.xyzw, clip_distance 3228 * dcl_output_siv o3.x, clip_distance 3229 */ 3230 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */ 3231 3232 plane_mask = (1 << num_clip_planes) - 1; 3233 if (plane_mask & 0xf) { 3234 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3235 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, 3236 VGPU10_NAME_CLIP_DISTANCE, cmask); 3237 emit->num_outputs++; 3238 } 3239 if (plane_mask & 0xf0) { 3240 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3241 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, 3242 VGPU10_NAME_CLIP_DISTANCE, cmask); 3243 emit->num_outputs++; 3244 } 3245} 3246 3247 3248/** 3249 * Emit the instructions for writing to the clip distance registers 3250 * to handle legacy/automatic clip planes. 3251 * For each clip plane, the distance is the dot product of the vertex 3252 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients. 3253 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE 3254 * output registers already declared. 3255 */ 3256static void 3257emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, 3258 unsigned vpos_tmp_index) 3259{ 3260 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3261 3262 assert(emit->clip_mode == CLIP_LEGACY); 3263 assert(num_clip_planes <= 8); 3264 3265 assert(emit->unit == PIPE_SHADER_VERTEX || 3266 emit->unit == PIPE_SHADER_GEOMETRY); 3267 3268 for (i = 0; i < num_clip_planes; i++) { 3269 struct tgsi_full_dst_register dst; 3270 struct tgsi_full_src_register plane_src, vpos_src; 3271 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3272 unsigned comp = i % 4; 3273 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3274 3275 /* create dst, src regs */ 3276 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3277 dst = writemask_dst(&dst, writemask); 3278 3279 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3280 vpos_src = make_src_temp_reg(vpos_tmp_index); 3281 3282 /* DP4 clip_dist, plane, vpos */ 3283 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3284 &plane_src, &vpos_src, FALSE); 3285 } 3286} 3287 3288 3289/** 3290 * Emit the instructions for computing the clip distance results from 3291 * the clip vertex temporary. 3292 * For each clip plane, the distance is the dot product of the clip vertex 3293 * position (found in a temp reg) and the clip plane coefficients. 3294 */ 3295static void 3296emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) 3297{ 3298 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable); 3299 unsigned i; 3300 struct tgsi_full_dst_register dst; 3301 struct tgsi_full_src_register clipvert_src; 3302 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index; 3303 3304 assert(emit->unit == PIPE_SHADER_VERTEX || 3305 emit->unit == PIPE_SHADER_GEOMETRY); 3306 3307 assert(emit->clip_mode == CLIP_VERTEX); 3308 3309 clipvert_src = make_src_temp_reg(clip_vertex_tmp); 3310 3311 for (i = 0; i < num_clip; i++) { 3312 struct tgsi_full_src_register plane_src; 3313 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3314 unsigned comp = i % 4; 3315 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3316 3317 /* create dst, src regs */ 3318 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3319 dst = writemask_dst(&dst, writemask); 3320 3321 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3322 3323 /* DP4 clip_dist, plane, vpos */ 3324 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3325 &plane_src, &clipvert_src, FALSE); 3326 } 3327 3328 /* copy temporary clip vertex register to the clip vertex register */ 3329 3330 assert(emit->clip_vertex_out_index != INVALID_INDEX); 3331 3332 /** 3333 * temporary reset the temporary clip vertex register index so 3334 * that copy to the clip vertex register will not attempt 3335 * to copy to the temporary register again 3336 */ 3337 emit->clip_vertex_tmp_index = INVALID_INDEX; 3338 3339 /* MOV clip_vertex, clip_vertex_tmp */ 3340 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index); 3341 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 3342 &dst, &clipvert_src, FALSE); 3343 3344 /** 3345 * set the temporary clip vertex register index back to the 3346 * temporary index for the next vertex 3347 */ 3348 emit->clip_vertex_tmp_index = clip_vertex_tmp; 3349} 3350 3351/** 3352 * Emit code to convert RGBA to BGRA 3353 */ 3354static void 3355emit_swap_r_b(struct svga_shader_emitter_v10 *emit, 3356 const struct tgsi_full_dst_register *dst, 3357 const struct tgsi_full_src_register *src) 3358{ 3359 struct tgsi_full_src_register bgra_src = 3360 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W); 3361 3362 begin_emit_instruction(emit); 3363 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 3364 emit_dst_register(emit, dst); 3365 emit_src_register(emit, &bgra_src); 3366 end_emit_instruction(emit); 3367} 3368 3369 3370/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */ 3371static void 3372emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit, 3373 const struct tgsi_full_dst_register *dst, 3374 const struct tgsi_full_src_register *src) 3375{ 3376 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f); 3377 struct tgsi_full_src_register two = 3378 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f); 3379 struct tgsi_full_src_register neg_two = 3380 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 3381 3382 unsigned val_tmp = get_temp_index(emit); 3383 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp); 3384 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp); 3385 3386 unsigned bias_tmp = get_temp_index(emit); 3387 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp); 3388 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp); 3389 3390 /* val = src * 2.0 */ 3391 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, 3392 src, &two, FALSE); 3393 3394 /* bias = src > 0.5 */ 3395 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, 3396 src, &half, FALSE); 3397 3398 /* bias = bias & -2.0 */ 3399 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst, 3400 &bias_src, &neg_two, FALSE); 3401 3402 /* dst = val + bias */ 3403 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst, 3404 &val_src, &bias_src, FALSE); 3405 3406 free_temp_indexes(emit); 3407} 3408 3409 3410/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */ 3411static void 3412emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit, 3413 const struct tgsi_full_dst_register *dst, 3414 const struct tgsi_full_src_register *src) 3415{ 3416 struct tgsi_full_src_register scale = 3417 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f); 3418 3419 /* dst = src * scale */ 3420 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE); 3421} 3422 3423 3424/** Convert from R32_UINT to 10_10_10_2_sscaled */ 3425static void 3426emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, 3427 const struct tgsi_full_dst_register *dst, 3428 const struct tgsi_full_src_register *src) 3429{ 3430 struct tgsi_full_src_register lshift = 3431 make_immediate_reg_int4(emit, 22, 12, 2, 0); 3432 struct tgsi_full_src_register rshift = 3433 make_immediate_reg_int4(emit, 22, 22, 22, 30); 3434 3435 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X); 3436 3437 unsigned tmp = get_temp_index(emit); 3438 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3439 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3440 3441 /* 3442 * r = (pixel << 22) >> 22; # signed int in [511, -512] 3443 * g = (pixel << 12) >> 22; # signed int in [511, -512] 3444 * b = (pixel << 2) >> 22; # signed int in [511, -512] 3445 * a = (pixel << 0) >> 30; # signed int in [1, -2] 3446 * dst = i_to_f(r,g,b,a); # convert to float 3447 */ 3448 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst, 3449 &src_xxxx, &lshift, FALSE); 3450 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst, 3451 &tmp_src, &rshift, FALSE); 3452 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE); 3453 3454 free_temp_indexes(emit); 3455} 3456 3457 3458/** 3459 * Emit code for TGSI_OPCODE_ABS instruction. 3460 */ 3461static boolean 3462emit_abs(struct svga_shader_emitter_v10 *emit, 3463 const struct tgsi_full_instruction *inst) 3464{ 3465 /* dst = ABS(s0): 3466 * dst = abs(s0) 3467 * Translates into: 3468 * MOV dst, abs(s0) 3469 */ 3470 struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]); 3471 3472 /* MOV dst, abs(s0) */ 3473 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 3474 &abs_src0, inst->Instruction.Saturate); 3475 3476 return TRUE; 3477} 3478 3479 3480/** 3481 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction. 3482 */ 3483static boolean 3484emit_arl_uarl(struct svga_shader_emitter_v10 *emit, 3485 const struct tgsi_full_instruction *inst) 3486{ 3487 unsigned index = inst->Dst[0].Register.Index; 3488 struct tgsi_full_dst_register dst; 3489 unsigned opcode; 3490 3491 assert(index < MAX_VGPU10_ADDR_REGS); 3492 dst = make_dst_temp_reg(emit->address_reg_index[index]); 3493 3494 /* ARL dst, s0 3495 * Translates into: 3496 * FTOI address_tmp, s0 3497 * 3498 * UARL dst, s0 3499 * Translates into: 3500 * MOV address_tmp, s0 3501 */ 3502 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL) 3503 opcode = VGPU10_OPCODE_FTOI; 3504 else 3505 opcode = VGPU10_OPCODE_MOV; 3506 3507 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE); 3508 3509 return TRUE; 3510} 3511 3512 3513/** 3514 * Emit code for TGSI_OPCODE_CAL instruction. 3515 */ 3516static boolean 3517emit_cal(struct svga_shader_emitter_v10 *emit, 3518 const struct tgsi_full_instruction *inst) 3519{ 3520 unsigned label = inst->Label.Label; 3521 VGPU10OperandToken0 operand; 3522 operand.value = 0; 3523 operand.operandType = VGPU10_OPERAND_TYPE_LABEL; 3524 3525 begin_emit_instruction(emit); 3526 emit_dword(emit, operand.value); 3527 emit_dword(emit, label); 3528 end_emit_instruction(emit); 3529 3530 return TRUE; 3531} 3532 3533 3534/** 3535 * Emit code for TGSI_OPCODE_IABS instruction. 3536 */ 3537static boolean 3538emit_iabs(struct svga_shader_emitter_v10 *emit, 3539 const struct tgsi_full_instruction *inst) 3540{ 3541 /* dst.x = (src0.x < 0) ? -src0.x : src0.x 3542 * dst.y = (src0.y < 0) ? -src0.y : src0.y 3543 * dst.z = (src0.z < 0) ? -src0.z : src0.z 3544 * dst.w = (src0.w < 0) ? -src0.w : src0.w 3545 * 3546 * Translates into 3547 * IMAX dst, src, neg(src) 3548 */ 3549 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); 3550 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], 3551 &inst->Src[0], &neg_src, FALSE); 3552 3553 return TRUE; 3554} 3555 3556 3557/** 3558 * Emit code for TGSI_OPCODE_CMP instruction. 3559 */ 3560static boolean 3561emit_cmp(struct svga_shader_emitter_v10 *emit, 3562 const struct tgsi_full_instruction *inst) 3563{ 3564 /* dst.x = (src0.x < 0) ? src1.x : src2.x 3565 * dst.y = (src0.y < 0) ? src1.y : src2.y 3566 * dst.z = (src0.z < 0) ? src1.z : src2.z 3567 * dst.w = (src0.w < 0) ? src1.w : src2.w 3568 * 3569 * Translates into 3570 * LT tmp, src0, 0.0 3571 * MOVC dst, tmp, src1, src2 3572 */ 3573 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3574 unsigned tmp = get_temp_index(emit); 3575 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3576 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3577 3578 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, 3579 &inst->Src[0], &zero, FALSE); 3580 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], 3581 &tmp_src, &inst->Src[1], &inst->Src[2], 3582 inst->Instruction.Saturate); 3583 3584 free_temp_indexes(emit); 3585 3586 return TRUE; 3587} 3588 3589 3590/** 3591 * Emit code for TGSI_OPCODE_DP2A instruction. 3592 */ 3593static boolean 3594emit_dp2a(struct svga_shader_emitter_v10 *emit, 3595 const struct tgsi_full_instruction *inst) 3596{ 3597 /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x 3598 * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x 3599 * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x 3600 * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x 3601 * Translate into 3602 * MAD tmp.x, s0.y, s1.y, s2.x 3603 * MAD tmp.x, s0.x, s1.x, tmp.x 3604 * MOV dst.xyzw, tmp.xxxx 3605 */ 3606 unsigned tmp = get_temp_index(emit); 3607 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3608 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3609 3610 struct tgsi_full_src_register tmp_src_xxxx = 3611 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3612 struct tgsi_full_dst_register tmp_dst_x = 3613 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3614 3615 struct tgsi_full_src_register src0_xxxx = 3616 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 3617 struct tgsi_full_src_register src0_yyyy = 3618 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 3619 struct tgsi_full_src_register src1_xxxx = 3620 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 3621 struct tgsi_full_src_register src1_yyyy = 3622 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 3623 struct tgsi_full_src_register src2_xxxx = 3624 scalar_src(&inst->Src[2], TGSI_SWIZZLE_X); 3625 3626 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy, 3627 &src1_yyyy, &src2_xxxx, FALSE); 3628 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx, 3629 &src1_xxxx, &tmp_src_xxxx, FALSE); 3630 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 3631 &tmp_src_xxxx, inst->Instruction.Saturate); 3632 3633 free_temp_indexes(emit); 3634 3635 return TRUE; 3636} 3637 3638 3639/** 3640 * Emit code for TGSI_OPCODE_DPH instruction. 3641 */ 3642static boolean 3643emit_dph(struct svga_shader_emitter_v10 *emit, 3644 const struct tgsi_full_instruction *inst) 3645{ 3646 /* 3647 * DP3 tmp, s0, s1 3648 * ADD dst, tmp, s1.wwww 3649 */ 3650 3651 struct tgsi_full_src_register s1_wwww = 3652 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, 3653 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 3654 3655 unsigned tmp = get_temp_index(emit); 3656 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3657 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3658 3659 /* DP3 tmp, s0, s1 */ 3660 emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0], 3661 &inst->Src[1], FALSE); 3662 3663 /* ADD dst, tmp, s1.wwww */ 3664 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src, 3665 &s1_wwww, inst->Instruction.Saturate); 3666 3667 free_temp_indexes(emit); 3668 3669 return TRUE; 3670} 3671 3672 3673/** 3674 * Emit code for TGSI_OPCODE_DST instruction. 3675 */ 3676static boolean 3677emit_dst(struct svga_shader_emitter_v10 *emit, 3678 const struct tgsi_full_instruction *inst) 3679{ 3680 /* 3681 * dst.x = 1 3682 * dst.y = src0.y * src1.y 3683 * dst.z = src0.z 3684 * dst.w = src1.w 3685 */ 3686 3687 struct tgsi_full_src_register s0_yyyy = 3688 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 3689 struct tgsi_full_src_register s0_zzzz = 3690 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 3691 struct tgsi_full_src_register s1_yyyy = 3692 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 3693 struct tgsi_full_src_register s1_wwww = 3694 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W); 3695 3696 /* 3697 * If dst and either src0 and src1 are the same we need 3698 * to create a temporary for it and insert a extra move. 3699 */ 3700 unsigned tmp_move = get_temp_index(emit); 3701 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3702 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3703 3704 /* MOV dst.x, 1.0 */ 3705 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3706 struct tgsi_full_dst_register dst_x = 3707 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3708 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3709 3710 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 3711 } 3712 3713 /* MUL dst.y, s0.y, s1.y */ 3714 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3715 struct tgsi_full_dst_register dst_y = 3716 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3717 3718 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, 3719 &s1_yyyy, inst->Instruction.Saturate); 3720 } 3721 3722 /* MOV dst.z, s0.z */ 3723 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3724 struct tgsi_full_dst_register dst_z = 3725 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3726 3727 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz, 3728 inst->Instruction.Saturate); 3729 } 3730 3731 /* MOV dst.w, s1.w */ 3732 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3733 struct tgsi_full_dst_register dst_w = 3734 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3735 3736 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww, 3737 inst->Instruction.Saturate); 3738 } 3739 3740 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3741 FALSE); 3742 free_temp_indexes(emit); 3743 3744 return TRUE; 3745} 3746 3747 3748 3749/** 3750 * Emit code for TGSI_OPCODE_ENDPRIM (GS only) 3751 */ 3752static boolean 3753emit_endprim(struct svga_shader_emitter_v10 *emit, 3754 const struct tgsi_full_instruction *inst) 3755{ 3756 assert(emit->unit == PIPE_SHADER_GEOMETRY); 3757 3758 /* We can't use emit_simple() because the TGSI instruction has one 3759 * operand (vertex stream number) which we must ignore for VGPU10. 3760 */ 3761 begin_emit_instruction(emit); 3762 emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); 3763 end_emit_instruction(emit); 3764 return TRUE; 3765} 3766 3767 3768/** 3769 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction. 3770 */ 3771static boolean 3772emit_ex2(struct svga_shader_emitter_v10 *emit, 3773 const struct tgsi_full_instruction *inst) 3774{ 3775 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x 3776 * while VGPU10 computes four values. 3777 * 3778 * dst = EX2(src): 3779 * dst.xyzw = 2.0 ^ src.x 3780 */ 3781 3782 struct tgsi_full_src_register src_xxxx = 3783 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3784 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3785 3786 /* EXP tmp, s0.xxxx */ 3787 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, 3788 inst->Instruction.Saturate); 3789 3790 return TRUE; 3791} 3792 3793 3794/** 3795 * Emit code for TGSI_OPCODE_EXP instruction. 3796 */ 3797static boolean 3798emit_exp(struct svga_shader_emitter_v10 *emit, 3799 const struct tgsi_full_instruction *inst) 3800{ 3801 /* 3802 * dst.x = 2 ^ floor(s0.x) 3803 * dst.y = s0.x - floor(s0.x) 3804 * dst.z = 2 ^ s0.x 3805 * dst.w = 1.0 3806 */ 3807 3808 struct tgsi_full_src_register src_xxxx = 3809 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 3810 unsigned tmp = get_temp_index(emit); 3811 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3812 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3813 3814 /* 3815 * If dst and src are the same we need to create 3816 * a temporary for it and insert a extra move. 3817 */ 3818 unsigned tmp_move = get_temp_index(emit); 3819 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3820 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3821 3822 /* only use X component of temp reg */ 3823 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3824 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3825 3826 /* ROUND_NI tmp.x, s0.x */ 3827 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 3828 &src_xxxx, FALSE); /* round to -infinity */ 3829 3830 /* EXP dst.x, tmp.x */ 3831 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3832 struct tgsi_full_dst_register dst_x = 3833 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3834 3835 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, 3836 inst->Instruction.Saturate); 3837 } 3838 3839 /* ADD dst.y, s0.x, -tmp */ 3840 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3841 struct tgsi_full_dst_register dst_y = 3842 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3843 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src); 3844 3845 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, 3846 &neg_tmp_src, inst->Instruction.Saturate); 3847 } 3848 3849 /* EXP dst.z, s0.x */ 3850 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3851 struct tgsi_full_dst_register dst_z = 3852 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3853 3854 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, 3855 inst->Instruction.Saturate); 3856 } 3857 3858 /* MOV dst.w, 1.0 */ 3859 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3860 struct tgsi_full_dst_register dst_w = 3861 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3862 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3863 3864 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, 3865 FALSE); 3866 } 3867 3868 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3869 FALSE); 3870 3871 free_temp_indexes(emit); 3872 3873 return TRUE; 3874} 3875 3876 3877/** 3878 * Emit code for TGSI_OPCODE_IF instruction. 3879 */ 3880static boolean 3881emit_if(struct svga_shader_emitter_v10 *emit, 3882 const struct tgsi_full_instruction *inst) 3883{ 3884 VGPU10OpcodeToken0 opcode0; 3885 3886 /* The src register should be a scalar */ 3887 assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY && 3888 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ && 3889 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW); 3890 3891 /* The only special thing here is that we need to set the 3892 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if 3893 * src.x is non-zero. 3894 */ 3895 opcode0.value = 0; 3896 opcode0.opcodeType = VGPU10_OPCODE_IF; 3897 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 3898 3899 begin_emit_instruction(emit); 3900 emit_dword(emit, opcode0.value); 3901 emit_src_register(emit, &inst->Src[0]); 3902 end_emit_instruction(emit); 3903 3904 return TRUE; 3905} 3906 3907 3908/** 3909 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of 3910 * the register components are negative). 3911 */ 3912static boolean 3913emit_kill_if(struct svga_shader_emitter_v10 *emit, 3914 const struct tgsi_full_instruction *inst) 3915{ 3916 unsigned tmp = get_temp_index(emit); 3917 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3918 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3919 3920 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3921 3922 struct tgsi_full_dst_register tmp_dst_x = 3923 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3924 struct tgsi_full_src_register tmp_src_xxxx = 3925 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3926 3927 /* tmp = src[0] < 0.0 */ 3928 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 3929 &zero, FALSE); 3930 3931 if (!same_swizzle_terms(&inst->Src[0])) { 3932 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to 3933 * logically OR the swizzle terms. Most uses of KILL_IF only 3934 * test one channel so it's good to avoid these extra steps. 3935 */ 3936 struct tgsi_full_src_register tmp_src_yyyy = 3937 scalar_src(&tmp_src, TGSI_SWIZZLE_Y); 3938 struct tgsi_full_src_register tmp_src_zzzz = 3939 scalar_src(&tmp_src, TGSI_SWIZZLE_Z); 3940 struct tgsi_full_src_register tmp_src_wwww = 3941 scalar_src(&tmp_src, TGSI_SWIZZLE_W); 3942 3943 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3944 &tmp_src_yyyy, FALSE); 3945 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3946 &tmp_src_zzzz, FALSE); 3947 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3948 &tmp_src_wwww, FALSE); 3949 } 3950 3951 begin_emit_instruction(emit); 3952 emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */ 3953 emit_src_register(emit, &tmp_src_xxxx); 3954 end_emit_instruction(emit); 3955 3956 free_temp_indexes(emit); 3957 3958 return TRUE; 3959} 3960 3961 3962/** 3963 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard). 3964 */ 3965static boolean 3966emit_kill(struct svga_shader_emitter_v10 *emit, 3967 const struct tgsi_full_instruction *inst) 3968{ 3969 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3970 3971 /* DISCARD if 0.0 is zero */ 3972 begin_emit_instruction(emit); 3973 emit_discard_opcode(emit, FALSE); 3974 emit_src_register(emit, &zero); 3975 end_emit_instruction(emit); 3976 3977 return TRUE; 3978} 3979 3980 3981/** 3982 * Emit code for TGSI_OPCODE_LG2 instruction. 3983 */ 3984static boolean 3985emit_lg2(struct svga_shader_emitter_v10 *emit, 3986 const struct tgsi_full_instruction *inst) 3987{ 3988 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x 3989 * while VGPU10 computes four values. 3990 * 3991 * dst = LG2(src): 3992 * dst.xyzw = log2(src.x) 3993 */ 3994 3995 struct tgsi_full_src_register src_xxxx = 3996 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3997 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3998 3999 /* LOG tmp, s0.xxxx */ 4000 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx, 4001 inst->Instruction.Saturate); 4002 4003 return TRUE; 4004} 4005 4006 4007/** 4008 * Emit code for TGSI_OPCODE_LIT instruction. 4009 */ 4010static boolean 4011emit_lit(struct svga_shader_emitter_v10 *emit, 4012 const struct tgsi_full_instruction *inst) 4013{ 4014 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4015 4016 /* 4017 * If dst and src are the same we need to create 4018 * a temporary for it and insert a extra move. 4019 */ 4020 unsigned tmp_move = get_temp_index(emit); 4021 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 4022 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 4023 4024 /* 4025 * dst.x = 1 4026 * dst.y = max(src.x, 0) 4027 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 4028 * dst.w = 1 4029 */ 4030 4031 /* MOV dst.x, 1.0 */ 4032 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4033 struct tgsi_full_dst_register dst_x = 4034 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 4035 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 4036 } 4037 4038 /* MOV dst.w, 1.0 */ 4039 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4040 struct tgsi_full_dst_register dst_w = 4041 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 4042 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 4043 } 4044 4045 /* MAX dst.y, src.x, 0.0 */ 4046 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4047 struct tgsi_full_dst_register dst_y = 4048 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 4049 struct tgsi_full_src_register zero = 4050 make_immediate_reg_float(emit, 0.0f); 4051 struct tgsi_full_src_register src_xxxx = 4052 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4053 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4054 4055 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, 4056 &zero, inst->Instruction.Saturate); 4057 } 4058 4059 /* 4060 * tmp1 = clamp(src.w, -128, 128); 4061 * MAX tmp1, src.w, -128 4062 * MIN tmp1, tmp1, 128 4063 * 4064 * tmp2 = max(tmp2, 0); 4065 * MAX tmp2, src.y, 0 4066 * 4067 * tmp1 = pow(tmp2, tmp1); 4068 * LOG tmp2, tmp2 4069 * MUL tmp1, tmp2, tmp1 4070 * EXP tmp1, tmp1 4071 * 4072 * tmp1 = (src.w == 0) ? 1 : tmp1; 4073 * EQ tmp2, 0, src.w 4074 * MOVC tmp1, tmp2, 1.0, tmp1 4075 * 4076 * dst.z = (0 < src.x) ? tmp1 : 0; 4077 * LT tmp2, 0, src.x 4078 * MOVC dst.z, tmp2, tmp1, 0.0 4079 */ 4080 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4081 struct tgsi_full_dst_register dst_z = 4082 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 4083 4084 unsigned tmp1 = get_temp_index(emit); 4085 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4086 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4087 unsigned tmp2 = get_temp_index(emit); 4088 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4089 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4090 4091 struct tgsi_full_src_register src_xxxx = 4092 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4093 struct tgsi_full_src_register src_yyyy = 4094 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 4095 struct tgsi_full_src_register src_wwww = 4096 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 4097 4098 struct tgsi_full_src_register zero = 4099 make_immediate_reg_float(emit, 0.0f); 4100 struct tgsi_full_src_register lowerbound = 4101 make_immediate_reg_float(emit, -128.0f); 4102 struct tgsi_full_src_register upperbound = 4103 make_immediate_reg_float(emit, 128.0f); 4104 4105 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, 4106 &lowerbound, FALSE); 4107 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, 4108 &upperbound, FALSE); 4109 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, 4110 &zero, FALSE); 4111 4112 /* POW tmp1, tmp2, tmp1 */ 4113 /* LOG tmp2, tmp2 */ 4114 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src, 4115 FALSE); 4116 4117 /* MUL tmp1, tmp2, tmp1 */ 4118 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, 4119 &tmp1_src, FALSE); 4120 4121 /* EXP tmp1, tmp1 */ 4122 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src, 4123 FALSE); 4124 4125 /* EQ tmp2, 0, src.w */ 4126 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, 4127 &src_wwww, FALSE); 4128 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ 4129 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, 4130 &tmp2_src, &one, &tmp1_src, FALSE); 4131 4132 /* LT tmp2, 0, src.x */ 4133 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, 4134 &src_xxxx, FALSE); 4135 /* MOVC dst.z, tmp2, tmp1, 0.0 */ 4136 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, 4137 &tmp2_src, &tmp1_src, &zero, FALSE); 4138 } 4139 4140 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 4141 FALSE); 4142 free_temp_indexes(emit); 4143 4144 return TRUE; 4145} 4146 4147 4148/** 4149 * Emit code for TGSI_OPCODE_LOG instruction. 4150 */ 4151static boolean 4152emit_log(struct svga_shader_emitter_v10 *emit, 4153 const struct tgsi_full_instruction *inst) 4154{ 4155 /* 4156 * dst.x = floor(lg2(abs(s0.x))) 4157 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) 4158 * dst.z = lg2(abs(s0.x)) 4159 * dst.w = 1.0 4160 */ 4161 4162 struct tgsi_full_src_register src_xxxx = 4163 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4164 unsigned tmp = get_temp_index(emit); 4165 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4166 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4167 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); 4168 4169 /* only use X component of temp reg */ 4170 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4171 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4172 4173 /* LOG tmp.x, abs(s0.x) */ 4174 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 4175 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, 4176 &abs_src_xxxx, FALSE); 4177 } 4178 4179 /* MOV dst.z, tmp.x */ 4180 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4181 struct tgsi_full_dst_register dst_z = 4182 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); 4183 4184 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, 4185 &tmp_src, inst->Instruction.Saturate); 4186 } 4187 4188 /* FLR tmp.x, tmp.x */ 4189 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { 4190 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 4191 &tmp_src, FALSE); 4192 } 4193 4194 /* MOV dst.x, tmp.x */ 4195 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4196 struct tgsi_full_dst_register dst_x = 4197 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 4198 4199 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src, 4200 inst->Instruction.Saturate); 4201 } 4202 4203 /* EXP tmp.x, tmp.x */ 4204 /* DIV dst.y, abs(s0.x), tmp.x */ 4205 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4206 struct tgsi_full_dst_register dst_y = 4207 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 4208 4209 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src, 4210 FALSE); 4211 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, 4212 &tmp_src, inst->Instruction.Saturate); 4213 } 4214 4215 /* MOV dst.w, 1.0 */ 4216 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4217 struct tgsi_full_dst_register dst_w = 4218 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); 4219 struct tgsi_full_src_register one = 4220 make_immediate_reg_float(emit, 1.0f); 4221 4222 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 4223 } 4224 4225 free_temp_indexes(emit); 4226 4227 return TRUE; 4228} 4229 4230 4231/** 4232 * Emit code for TGSI_OPCODE_LRP instruction. 4233 */ 4234static boolean 4235emit_lrp(struct svga_shader_emitter_v10 *emit, 4236 const struct tgsi_full_instruction *inst) 4237{ 4238 /* dst = LRP(s0, s1, s2): 4239 * dst = s0 * (s1 - s2) + s2 4240 * Translates into: 4241 * SUB tmp, s1, s2; tmp = s1 - s2 4242 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 4243 */ 4244 unsigned tmp = get_temp_index(emit); 4245 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); 4246 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); 4247 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); 4248 4249 /* ADD tmp, s1, -s2 */ 4250 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp, 4251 &inst->Src[1], &neg_src2, FALSE); 4252 4253 /* MAD dst, s1, tmp, s3 */ 4254 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], 4255 &inst->Src[0], &src_tmp, &inst->Src[2], 4256 inst->Instruction.Saturate); 4257 4258 free_temp_indexes(emit); 4259 4260 return TRUE; 4261} 4262 4263 4264/** 4265 * Emit code for TGSI_OPCODE_POW instruction. 4266 */ 4267static boolean 4268emit_pow(struct svga_shader_emitter_v10 *emit, 4269 const struct tgsi_full_instruction *inst) 4270{ 4271 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and 4272 * src1.x while VGPU10 computes four values. 4273 * 4274 * dst = POW(src0, src1): 4275 * dst.xyzw = src0.x ^ src1.x 4276 */ 4277 unsigned tmp = get_temp_index(emit); 4278 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4279 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4280 struct tgsi_full_src_register src0_xxxx = 4281 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4282 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4283 struct tgsi_full_src_register src1_xxxx = 4284 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4285 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4286 4287 /* LOG tmp, s0.xxxx */ 4288 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx, 4289 FALSE); 4290 4291 /* MUL tmp, tmp, s1.xxxx */ 4292 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src, 4293 &src1_xxxx, FALSE); 4294 4295 /* EXP tmp, s0.xxxx */ 4296 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], 4297 &tmp_src, inst->Instruction.Saturate); 4298 4299 /* free tmp */ 4300 free_temp_indexes(emit); 4301 4302 return TRUE; 4303} 4304 4305 4306/** 4307 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. 4308 */ 4309static boolean 4310emit_rcp(struct svga_shader_emitter_v10 *emit, 4311 const struct tgsi_full_instruction *inst) 4312{ 4313 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4314 4315 unsigned tmp = get_temp_index(emit); 4316 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4317 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4318 4319 struct tgsi_full_dst_register tmp_dst_x = 4320 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4321 struct tgsi_full_src_register tmp_src_xxxx = 4322 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4323 4324 /* DIV tmp.x, 1.0, s0 */ 4325 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one, 4326 &inst->Src[0], FALSE); 4327 4328 /* MOV dst, tmp.xxxx */ 4329 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4330 &tmp_src_xxxx, inst->Instruction.Saturate); 4331 4332 free_temp_indexes(emit); 4333 4334 return TRUE; 4335} 4336 4337 4338/** 4339 * Emit code for TGSI_OPCODE_RSQ instruction. 4340 */ 4341static boolean 4342emit_rsq(struct svga_shader_emitter_v10 *emit, 4343 const struct tgsi_full_instruction *inst) 4344{ 4345 /* dst = RSQ(src): 4346 * dst.xyzw = 1 / sqrt(src.x) 4347 * Translates into: 4348 * RSQ tmp, src.x 4349 * MOV dst, tmp.xxxx 4350 */ 4351 4352 unsigned tmp = get_temp_index(emit); 4353 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4354 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4355 4356 struct tgsi_full_dst_register tmp_dst_x = 4357 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4358 struct tgsi_full_src_register tmp_src_xxxx = 4359 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4360 4361 /* RSQ tmp, src.x */ 4362 emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x, 4363 &inst->Src[0], FALSE); 4364 4365 /* MOV dst, tmp.xxxx */ 4366 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4367 &tmp_src_xxxx, inst->Instruction.Saturate); 4368 4369 /* free tmp */ 4370 free_temp_indexes(emit); 4371 4372 return TRUE; 4373} 4374 4375 4376/** 4377 * Emit code for TGSI_OPCODE_SCS instruction. 4378 */ 4379static boolean 4380emit_scs(struct svga_shader_emitter_v10 *emit, 4381 const struct tgsi_full_instruction *inst) 4382{ 4383 /* dst.x = cos(src.x) 4384 * dst.y = sin(src.x) 4385 * dst.z = 0.0 4386 * dst.w = 1.0 4387 */ 4388 struct tgsi_full_dst_register dst_x = 4389 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 4390 struct tgsi_full_dst_register dst_y = 4391 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 4392 struct tgsi_full_dst_register dst_zw = 4393 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW); 4394 4395 struct tgsi_full_src_register zero_one = 4396 make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f); 4397 4398 begin_emit_instruction(emit); 4399 emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate); 4400 emit_dst_register(emit, &dst_y); 4401 emit_dst_register(emit, &dst_x); 4402 emit_src_register(emit, &inst->Src[0]); 4403 end_emit_instruction(emit); 4404 4405 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 4406 &dst_zw, &zero_one, inst->Instruction.Saturate); 4407 4408 return TRUE; 4409} 4410 4411 4412/** 4413 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. 4414 */ 4415static boolean 4416emit_seq(struct svga_shader_emitter_v10 *emit, 4417 const struct tgsi_full_instruction *inst) 4418{ 4419 /* dst = SEQ(s0, s1): 4420 * dst = s0 == s1 ? 1.0 : 0.0 (per component) 4421 * Translates into: 4422 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4423 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4424 */ 4425 unsigned tmp = get_temp_index(emit); 4426 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4427 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4428 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4429 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4430 4431 /* EQ tmp, s0, s1 */ 4432 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], 4433 &inst->Src[1], FALSE); 4434 4435 /* MOVC dst, tmp, one, zero */ 4436 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4437 &one, &zero, FALSE); 4438 4439 free_temp_indexes(emit); 4440 4441 return TRUE; 4442} 4443 4444 4445/** 4446 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. 4447 */ 4448static boolean 4449emit_sge(struct svga_shader_emitter_v10 *emit, 4450 const struct tgsi_full_instruction *inst) 4451{ 4452 /* dst = SGE(s0, s1): 4453 * dst = s0 >= s1 ? 1.0 : 0.0 (per component) 4454 * Translates into: 4455 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) 4456 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4457 */ 4458 unsigned tmp = get_temp_index(emit); 4459 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4460 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4461 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4462 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4463 4464 /* GE tmp, s0, s1 */ 4465 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], 4466 &inst->Src[1], FALSE); 4467 4468 /* MOVC dst, tmp, one, zero */ 4469 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4470 &one, &zero, FALSE); 4471 4472 free_temp_indexes(emit); 4473 4474 return TRUE; 4475} 4476 4477 4478/** 4479 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. 4480 */ 4481static boolean 4482emit_sgt(struct svga_shader_emitter_v10 *emit, 4483 const struct tgsi_full_instruction *inst) 4484{ 4485 /* dst = SGT(s0, s1): 4486 * dst = s0 > s1 ? 1.0 : 0.0 (per component) 4487 * Translates into: 4488 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) 4489 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4490 */ 4491 unsigned tmp = get_temp_index(emit); 4492 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4493 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4494 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4495 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4496 4497 /* LT tmp, s1, s0 */ 4498 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], 4499 &inst->Src[0], FALSE); 4500 4501 /* MOVC dst, tmp, one, zero */ 4502 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4503 &one, &zero, FALSE); 4504 4505 free_temp_indexes(emit); 4506 4507 return TRUE; 4508} 4509 4510 4511/** 4512 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. 4513 */ 4514static boolean 4515emit_sincos(struct svga_shader_emitter_v10 *emit, 4516 const struct tgsi_full_instruction *inst) 4517{ 4518 unsigned tmp = get_temp_index(emit); 4519 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4520 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4521 4522 struct tgsi_full_src_register tmp_src_xxxx = 4523 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4524 struct tgsi_full_dst_register tmp_dst_x = 4525 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4526 4527 begin_emit_instruction(emit); 4528 emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); 4529 4530 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) 4531 { 4532 emit_dst_register(emit, &tmp_dst_x); /* first destination register */ 4533 emit_null_dst_register(emit); /* second destination register */ 4534 } 4535 else { 4536 emit_null_dst_register(emit); 4537 emit_dst_register(emit, &tmp_dst_x); 4538 } 4539 4540 emit_src_register(emit, &inst->Src[0]); 4541 end_emit_instruction(emit); 4542 4543 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4544 &tmp_src_xxxx, inst->Instruction.Saturate); 4545 4546 free_temp_indexes(emit); 4547 4548 return TRUE; 4549} 4550 4551 4552/** 4553 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. 4554 */ 4555static boolean 4556emit_sle(struct svga_shader_emitter_v10 *emit, 4557 const struct tgsi_full_instruction *inst) 4558{ 4559 /* dst = SLE(s0, s1): 4560 * dst = s0 <= s1 ? 1.0 : 0.0 (per component) 4561 * Translates into: 4562 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) 4563 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4564 */ 4565 unsigned tmp = get_temp_index(emit); 4566 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4567 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4568 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4569 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4570 4571 /* GE tmp, s1, s0 */ 4572 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], 4573 &inst->Src[0], FALSE); 4574 4575 /* MOVC dst, tmp, one, zero */ 4576 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4577 &one, &zero, FALSE); 4578 4579 free_temp_indexes(emit); 4580 4581 return TRUE; 4582} 4583 4584 4585/** 4586 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. 4587 */ 4588static boolean 4589emit_slt(struct svga_shader_emitter_v10 *emit, 4590 const struct tgsi_full_instruction *inst) 4591{ 4592 /* dst = SLT(s0, s1): 4593 * dst = s0 < s1 ? 1.0 : 0.0 (per component) 4594 * Translates into: 4595 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) 4596 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4597 */ 4598 unsigned tmp = get_temp_index(emit); 4599 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4600 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4601 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4602 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4603 4604 /* LT tmp, s0, s1 */ 4605 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 4606 &inst->Src[1], FALSE); 4607 4608 /* MOVC dst, tmp, one, zero */ 4609 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4610 &one, &zero, FALSE); 4611 4612 free_temp_indexes(emit); 4613 4614 return TRUE; 4615} 4616 4617 4618/** 4619 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. 4620 */ 4621static boolean 4622emit_sne(struct svga_shader_emitter_v10 *emit, 4623 const struct tgsi_full_instruction *inst) 4624{ 4625 /* dst = SNE(s0, s1): 4626 * dst = s0 != s1 ? 1.0 : 0.0 (per component) 4627 * Translates into: 4628 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4629 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4630 */ 4631 unsigned tmp = get_temp_index(emit); 4632 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4633 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4634 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4635 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4636 4637 /* NE tmp, s0, s1 */ 4638 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], 4639 &inst->Src[1], FALSE); 4640 4641 /* MOVC dst, tmp, one, zero */ 4642 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4643 &one, &zero, FALSE); 4644 4645 free_temp_indexes(emit); 4646 4647 return TRUE; 4648} 4649 4650 4651/** 4652 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. 4653 */ 4654static boolean 4655emit_ssg(struct svga_shader_emitter_v10 *emit, 4656 const struct tgsi_full_instruction *inst) 4657{ 4658 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 4659 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 4660 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 4661 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 4662 * Translates into: 4663 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) 4664 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) 4665 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) 4666 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) 4667 */ 4668 struct tgsi_full_src_register zero = 4669 make_immediate_reg_float(emit, 0.0f); 4670 struct tgsi_full_src_register one = 4671 make_immediate_reg_float(emit, 1.0f); 4672 struct tgsi_full_src_register neg_one = 4673 make_immediate_reg_float(emit, -1.0f); 4674 4675 unsigned tmp1 = get_temp_index(emit); 4676 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4677 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4678 4679 unsigned tmp2 = get_temp_index(emit); 4680 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4681 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4682 4683 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], 4684 &zero, FALSE); 4685 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, 4686 &neg_one, &zero, FALSE); 4687 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, 4688 &inst->Src[0], FALSE); 4689 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, 4690 &one, &tmp2_src, FALSE); 4691 4692 free_temp_indexes(emit); 4693 4694 return TRUE; 4695} 4696 4697 4698/** 4699 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. 4700 */ 4701static boolean 4702emit_issg(struct svga_shader_emitter_v10 *emit, 4703 const struct tgsi_full_instruction *inst) 4704{ 4705 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 4706 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 4707 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 4708 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 4709 * Translates into: 4710 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) 4711 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) 4712 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) 4713 */ 4714 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4715 4716 unsigned tmp1 = get_temp_index(emit); 4717 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4718 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4719 4720 unsigned tmp2 = get_temp_index(emit); 4721 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4722 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4723 4724 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); 4725 4726 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, 4727 &inst->Src[0], &zero, FALSE); 4728 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, 4729 &zero, &inst->Src[0], FALSE); 4730 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], 4731 &tmp1_src, &neg_tmp2, FALSE); 4732 4733 free_temp_indexes(emit); 4734 4735 return TRUE; 4736} 4737 4738 4739/** 4740 * Emit code for TGSI_OPCODE_SUB instruction. 4741 */ 4742static boolean 4743emit_sub(struct svga_shader_emitter_v10 *emit, 4744 const struct tgsi_full_instruction *inst) 4745{ 4746 /* dst = SUB(s0, s1): 4747 * dst = s0 - s1 4748 * Translates into: 4749 * ADD dst, s0, neg(s1) 4750 */ 4751 struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]); 4752 4753 /* ADD dst, s0, neg(s1) */ 4754 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], 4755 &inst->Src[0], &neg_src1, 4756 inst->Instruction.Saturate); 4757 4758 return TRUE; 4759} 4760 4761 4762/** 4763 * Emit a comparison instruction. The dest register will get 4764 * 0 or ~0 values depending on the outcome of comparing src0 to src1. 4765 */ 4766static void 4767emit_comparison(struct svga_shader_emitter_v10 *emit, 4768 SVGA3dCmpFunc func, 4769 const struct tgsi_full_dst_register *dst, 4770 const struct tgsi_full_src_register *src0, 4771 const struct tgsi_full_src_register *src1) 4772{ 4773 struct tgsi_full_src_register immediate; 4774 VGPU10OpcodeToken0 opcode0; 4775 boolean swapSrc = FALSE; 4776 4777 /* Sanity checks for svga vs. gallium enums */ 4778 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); 4779 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); 4780 4781 opcode0.value = 0; 4782 4783 switch (func) { 4784 case SVGA3D_CMP_NEVER: 4785 immediate = make_immediate_reg_int(emit, 0); 4786 /* MOV dst, {0} */ 4787 begin_emit_instruction(emit); 4788 emit_dword(emit, VGPU10_OPCODE_MOV); 4789 emit_dst_register(emit, dst); 4790 emit_src_register(emit, &immediate); 4791 end_emit_instruction(emit); 4792 return; 4793 case SVGA3D_CMP_ALWAYS: 4794 immediate = make_immediate_reg_int(emit, -1); 4795 /* MOV dst, {-1} */ 4796 begin_emit_instruction(emit); 4797 emit_dword(emit, VGPU10_OPCODE_MOV); 4798 emit_dst_register(emit, dst); 4799 emit_src_register(emit, &immediate); 4800 end_emit_instruction(emit); 4801 return; 4802 case SVGA3D_CMP_LESS: 4803 opcode0.opcodeType = VGPU10_OPCODE_LT; 4804 break; 4805 case SVGA3D_CMP_EQUAL: 4806 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4807 break; 4808 case SVGA3D_CMP_LESSEQUAL: 4809 opcode0.opcodeType = VGPU10_OPCODE_GE; 4810 swapSrc = TRUE; 4811 break; 4812 case SVGA3D_CMP_GREATER: 4813 opcode0.opcodeType = VGPU10_OPCODE_LT; 4814 swapSrc = TRUE; 4815 break; 4816 case SVGA3D_CMP_NOTEQUAL: 4817 opcode0.opcodeType = VGPU10_OPCODE_NE; 4818 break; 4819 case SVGA3D_CMP_GREATEREQUAL: 4820 opcode0.opcodeType = VGPU10_OPCODE_GE; 4821 break; 4822 default: 4823 assert(!"Unexpected comparison mode"); 4824 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4825 } 4826 4827 begin_emit_instruction(emit); 4828 emit_dword(emit, opcode0.value); 4829 emit_dst_register(emit, dst); 4830 if (swapSrc) { 4831 emit_src_register(emit, src1); 4832 emit_src_register(emit, src0); 4833 } 4834 else { 4835 emit_src_register(emit, src0); 4836 emit_src_register(emit, src1); 4837 } 4838 end_emit_instruction(emit); 4839} 4840 4841 4842/** 4843 * Get texel/address offsets for a texture instruction. 4844 */ 4845static void 4846get_texel_offsets(const struct svga_shader_emitter_v10 *emit, 4847 const struct tgsi_full_instruction *inst, int offsets[3]) 4848{ 4849 if (inst->Texture.NumOffsets == 1) { 4850 /* According to OpenGL Shader Language spec the offsets are only 4851 * fetched from a previously-declared immediate/literal. 4852 */ 4853 const struct tgsi_texture_offset *off = inst->TexOffsets; 4854 const unsigned index = off[0].Index; 4855 const unsigned swizzleX = off[0].SwizzleX; 4856 const unsigned swizzleY = off[0].SwizzleY; 4857 const unsigned swizzleZ = off[0].SwizzleZ; 4858 const union tgsi_immediate_data *imm = emit->immediates[index]; 4859 4860 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); 4861 4862 offsets[0] = imm[swizzleX].Int; 4863 offsets[1] = imm[swizzleY].Int; 4864 offsets[2] = imm[swizzleZ].Int; 4865 } 4866 else { 4867 offsets[0] = offsets[1] = offsets[2] = 0; 4868 } 4869} 4870 4871 4872/** 4873 * Set up the coordinate register for texture sampling. 4874 * When we're sampling from a RECT texture we have to scale the 4875 * unnormalized coordinate to a normalized coordinate. 4876 * We do that by multiplying the coordinate by an "extra" constant. 4877 * An alternative would be to use the RESINFO instruction to query the 4878 * texture's size. 4879 */ 4880static struct tgsi_full_src_register 4881setup_texcoord(struct svga_shader_emitter_v10 *emit, 4882 unsigned unit, 4883 const struct tgsi_full_src_register *coord) 4884{ 4885 if (emit->key.tex[unit].unnormalized) { 4886 unsigned scale_index = emit->texcoord_scale_index[unit]; 4887 unsigned tmp = get_temp_index(emit); 4888 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4889 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4890 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); 4891 4892 /* MUL tmp, coord, const[] */ 4893 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, 4894 coord, &scale_src, FALSE); 4895 return tmp_src; 4896 } 4897 else { 4898 /* use texcoord as-is */ 4899 return *coord; 4900 } 4901} 4902 4903 4904/** 4905 * For SAMPLE_C instructions, emit the extra src register which indicates 4906 * the reference/comparision value. 4907 */ 4908static void 4909emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, 4910 unsigned target, 4911 const struct tgsi_full_src_register *coord) 4912{ 4913 struct tgsi_full_src_register coord_src_ref; 4914 unsigned component; 4915 4916 assert(tgsi_is_shadow_target(target)); 4917 4918 assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */ 4919 if (target == TGSI_TEXTURE_SHADOW2D_ARRAY || 4920 target == TGSI_TEXTURE_SHADOWCUBE) 4921 component = TGSI_SWIZZLE_W; 4922 else 4923 component = TGSI_SWIZZLE_Z; 4924 4925 coord_src_ref = scalar_src(coord, component); 4926 4927 emit_src_register(emit, &coord_src_ref); 4928} 4929 4930 4931/** 4932 * Info for implementing texture swizzles. 4933 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() 4934 * functions use this to encapsulate the extra steps needed to perform 4935 * a texture swizzle, or shadow/depth comparisons. 4936 * The shadow/depth comparison is only done here if for the cases where 4937 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). 4938 */ 4939struct tex_swizzle_info 4940{ 4941 boolean swizzled; 4942 boolean shadow_compare; 4943 unsigned unit; 4944 unsigned texture_target; /**< TGSI_TEXTURE_x */ 4945 struct tgsi_full_src_register tmp_src; 4946 struct tgsi_full_dst_register tmp_dst; 4947 const struct tgsi_full_dst_register *inst_dst; 4948 const struct tgsi_full_src_register *coord_src; 4949}; 4950 4951 4952/** 4953 * Do setup for handling texture swizzles or shadow compares. 4954 * \param unit the texture unit 4955 * \param inst the TGSI texture instruction 4956 * \param shadow_compare do shadow/depth comparison? 4957 * \param swz returns the swizzle info 4958 */ 4959static void 4960begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, 4961 unsigned unit, 4962 const struct tgsi_full_instruction *inst, 4963 boolean shadow_compare, 4964 struct tex_swizzle_info *swz) 4965{ 4966 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || 4967 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || 4968 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || 4969 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); 4970 4971 swz->shadow_compare = shadow_compare; 4972 swz->texture_target = inst->Texture.Texture; 4973 4974 if (swz->swizzled || shadow_compare) { 4975 /* Allocate temp register for the result of the SAMPLE instruction 4976 * and the source of the MOV/compare/swizzle instructions. 4977 */ 4978 unsigned tmp = get_temp_index(emit); 4979 swz->tmp_src = make_src_temp_reg(tmp); 4980 swz->tmp_dst = make_dst_temp_reg(tmp); 4981 4982 swz->unit = unit; 4983 } 4984 swz->inst_dst = &inst->Dst[0]; 4985 swz->coord_src = &inst->Src[0]; 4986} 4987 4988 4989/** 4990 * Returns the register to put the SAMPLE instruction results into. 4991 * This will either be the original instruction dst reg (if no swizzle 4992 * and no shadow comparison) or a temporary reg if there is a swizzle. 4993 */ 4994static const struct tgsi_full_dst_register * 4995get_tex_swizzle_dst(const struct tex_swizzle_info *swz) 4996{ 4997 return (swz->swizzled || swz->shadow_compare) 4998 ? &swz->tmp_dst : swz->inst_dst; 4999} 5000 5001 5002/** 5003 * This emits the MOV instruction that actually implements a texture swizzle 5004 * and/or shadow comparison. 5005 */ 5006static void 5007end_tex_swizzle(struct svga_shader_emitter_v10 *emit, 5008 const struct tex_swizzle_info *swz) 5009{ 5010 if (swz->shadow_compare) { 5011 /* Emit extra instructions to compare the fetched texel value against 5012 * a texture coordinate component. The result of the comparison 5013 * is 0.0 or 1.0. 5014 */ 5015 struct tgsi_full_src_register coord_src; 5016 struct tgsi_full_src_register texel_src = 5017 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); 5018 struct tgsi_full_src_register one = 5019 make_immediate_reg_float(emit, 1.0f); 5020 /* convert gallium comparison func to SVGA comparison func */ 5021 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; 5022 5023 assert(emit->unit == PIPE_SHADER_FRAGMENT); 5024 5025 switch (swz->texture_target) { 5026 case TGSI_TEXTURE_SHADOW2D: 5027 case TGSI_TEXTURE_SHADOWRECT: 5028 case TGSI_TEXTURE_SHADOW1D_ARRAY: 5029 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); 5030 break; 5031 case TGSI_TEXTURE_SHADOW1D: 5032 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y); 5033 break; 5034 case TGSI_TEXTURE_SHADOWCUBE: 5035 case TGSI_TEXTURE_SHADOW2D_ARRAY: 5036 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W); 5037 break; 5038 default: 5039 assert(!"Unexpected texture target in end_tex_swizzle()"); 5040 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); 5041 } 5042 5043 /* COMPARE tmp, coord, texel */ 5044 /* XXX it would seem that the texel and coord arguments should 5045 * be transposed here, but piglit tests indicate otherwise. 5046 */ 5047 emit_comparison(emit, compare_func, 5048 &swz->tmp_dst, &texel_src, &coord_src); 5049 5050 /* AND dest, tmp, {1.0} */ 5051 begin_emit_instruction(emit); 5052 emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); 5053 if (swz->swizzled) { 5054 emit_dst_register(emit, &swz->tmp_dst); 5055 } 5056 else { 5057 emit_dst_register(emit, swz->inst_dst); 5058 } 5059 emit_src_register(emit, &swz->tmp_src); 5060 emit_src_register(emit, &one); 5061 end_emit_instruction(emit); 5062 } 5063 5064 if (swz->swizzled) { 5065 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; 5066 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; 5067 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; 5068 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; 5069 unsigned writemask_0 = 0, writemask_1 = 0; 5070 boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]); 5071 5072 /* Swizzle w/out zero/one terms */ 5073 struct tgsi_full_src_register src_swizzled = 5074 swizzle_src(&swz->tmp_src, 5075 swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X, 5076 swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y, 5077 swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z, 5078 swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W); 5079 5080 /* MOV dst, color(tmp).<swizzle> */ 5081 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 5082 swz->inst_dst, &src_swizzled, FALSE); 5083 5084 /* handle swizzle zero terms */ 5085 writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) | 5086 ((swz_g == PIPE_SWIZZLE_0) << 1) | 5087 ((swz_b == PIPE_SWIZZLE_0) << 2) | 5088 ((swz_a == PIPE_SWIZZLE_0) << 3)); 5089 5090 if (writemask_0) { 5091 struct tgsi_full_src_register zero = int_tex ? 5092 make_immediate_reg_int(emit, 0) : 5093 make_immediate_reg_float(emit, 0.0f); 5094 struct tgsi_full_dst_register dst = 5095 writemask_dst(swz->inst_dst, writemask_0); 5096 5097 /* MOV dst.writemask_0, {0,0,0,0} */ 5098 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 5099 &dst, &zero, FALSE); 5100 } 5101 5102 /* handle swizzle one terms */ 5103 writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) | 5104 ((swz_g == PIPE_SWIZZLE_1) << 1) | 5105 ((swz_b == PIPE_SWIZZLE_1) << 2) | 5106 ((swz_a == PIPE_SWIZZLE_1) << 3)); 5107 5108 if (writemask_1) { 5109 struct tgsi_full_src_register one = int_tex ? 5110 make_immediate_reg_int(emit, 1) : 5111 make_immediate_reg_float(emit, 1.0f); 5112 struct tgsi_full_dst_register dst = 5113 writemask_dst(swz->inst_dst, writemask_1); 5114 5115 /* MOV dst.writemask_1, {1,1,1,1} */ 5116 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE); 5117 } 5118 } 5119} 5120 5121 5122/** 5123 * Emit code for TGSI_OPCODE_SAMPLE instruction. 5124 */ 5125static boolean 5126emit_sample(struct svga_shader_emitter_v10 *emit, 5127 const struct tgsi_full_instruction *inst) 5128{ 5129 const unsigned resource_unit = inst->Src[1].Register.Index; 5130 const unsigned sampler_unit = inst->Src[2].Register.Index; 5131 struct tgsi_full_src_register coord; 5132 int offsets[3]; 5133 struct tex_swizzle_info swz_info; 5134 5135 begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); 5136 5137 get_texel_offsets(emit, inst, offsets); 5138 5139 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); 5140 5141 /* SAMPLE dst, coord(s0), resource, sampler */ 5142 begin_emit_instruction(emit); 5143 5144 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, 5145 inst->Instruction.Saturate, offsets); 5146 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5147 emit_src_register(emit, &coord); 5148 emit_resource_register(emit, resource_unit); 5149 emit_sampler_register(emit, sampler_unit); 5150 end_emit_instruction(emit); 5151 5152 end_tex_swizzle(emit, &swz_info); 5153 5154 free_temp_indexes(emit); 5155 5156 return TRUE; 5157} 5158 5159 5160/** 5161 * Check if a texture instruction is valid. 5162 * An example of an invalid texture instruction is doing shadow comparison 5163 * with an integer-valued texture. 5164 * If we detect an invalid texture instruction, we replace it with: 5165 * MOV dst, {1,1,1,1}; 5166 * \return TRUE if valid, FALSE if invalid. 5167 */ 5168static boolean 5169is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, 5170 const struct tgsi_full_instruction *inst) 5171{ 5172 const unsigned unit = inst->Src[1].Register.Index; 5173 const unsigned target = inst->Texture.Texture; 5174 boolean valid = TRUE; 5175 5176 if (tgsi_is_shadow_target(target) && 5177 is_integer_type(emit->sampler_return_type[unit])) { 5178 debug_printf("Invalid SAMPLE_C with an integer texture!\n"); 5179 valid = FALSE; 5180 } 5181 /* XXX might check for other conditions in the future here */ 5182 5183 if (!valid) { 5184 /* emit a MOV dst, {1,1,1,1} instruction. */ 5185 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 5186 begin_emit_instruction(emit); 5187 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5188 emit_dst_register(emit, &inst->Dst[0]); 5189 emit_src_register(emit, &one); 5190 end_emit_instruction(emit); 5191 } 5192 5193 return valid; 5194} 5195 5196 5197/** 5198 * Emit code for TGSI_OPCODE_TEX (simple texture lookup) 5199 */ 5200static boolean 5201emit_tex(struct svga_shader_emitter_v10 *emit, 5202 const struct tgsi_full_instruction *inst) 5203{ 5204 const uint unit = inst->Src[1].Register.Index; 5205 unsigned target = inst->Texture.Texture; 5206 unsigned opcode; 5207 struct tgsi_full_src_register coord; 5208 int offsets[3]; 5209 struct tex_swizzle_info swz_info; 5210 5211 /* check that the sampler returns a float */ 5212 if (!is_valid_tex_instruction(emit, inst)) 5213 return TRUE; 5214 5215 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5216 5217 get_texel_offsets(emit, inst, offsets); 5218 5219 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5220 5221 /* SAMPLE dst, coord(s0), resource, sampler */ 5222 begin_emit_instruction(emit); 5223 5224 if (tgsi_is_shadow_target(target)) 5225 opcode = VGPU10_OPCODE_SAMPLE_C; 5226 else 5227 opcode = VGPU10_OPCODE_SAMPLE; 5228 5229 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5230 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5231 emit_src_register(emit, &coord); 5232 emit_resource_register(emit, unit); 5233 emit_sampler_register(emit, unit); 5234 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5235 emit_tex_compare_refcoord(emit, target, &coord); 5236 } 5237 end_emit_instruction(emit); 5238 5239 end_tex_swizzle(emit, &swz_info); 5240 5241 free_temp_indexes(emit); 5242 5243 return TRUE; 5244} 5245 5246 5247/** 5248 * Emit code for TGSI_OPCODE_TXP (projective texture) 5249 */ 5250static boolean 5251emit_txp(struct svga_shader_emitter_v10 *emit, 5252 const struct tgsi_full_instruction *inst) 5253{ 5254 const uint unit = inst->Src[1].Register.Index; 5255 unsigned target = inst->Texture.Texture; 5256 unsigned opcode; 5257 int offsets[3]; 5258 unsigned tmp = get_temp_index(emit); 5259 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 5260 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 5261 struct tgsi_full_src_register src0_wwww = 5262 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5263 struct tgsi_full_src_register coord; 5264 struct tex_swizzle_info swz_info; 5265 5266 /* check that the sampler returns a float */ 5267 if (!is_valid_tex_instruction(emit, inst)) 5268 return TRUE; 5269 5270 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5271 5272 get_texel_offsets(emit, inst, offsets); 5273 5274 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5275 5276 /* DIV tmp, coord, coord.wwww */ 5277 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, 5278 &coord, &src0_wwww, FALSE); 5279 5280 /* SAMPLE dst, coord(tmp), resource, sampler */ 5281 begin_emit_instruction(emit); 5282 5283 if (tgsi_is_shadow_target(target)) 5284 opcode = VGPU10_OPCODE_SAMPLE_C; 5285 else 5286 opcode = VGPU10_OPCODE_SAMPLE; 5287 5288 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5289 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5290 emit_src_register(emit, &tmp_src); /* projected coord */ 5291 emit_resource_register(emit, unit); 5292 emit_sampler_register(emit, unit); 5293 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5294 emit_tex_compare_refcoord(emit, target, &tmp_src); 5295 } 5296 end_emit_instruction(emit); 5297 5298 end_tex_swizzle(emit, &swz_info); 5299 5300 free_temp_indexes(emit); 5301 5302 return TRUE; 5303} 5304 5305 5306/* 5307 * Emit code for TGSI_OPCODE_XPD instruction. 5308 */ 5309static boolean 5310emit_xpd(struct svga_shader_emitter_v10 *emit, 5311 const struct tgsi_full_instruction *inst) 5312{ 5313 /* dst.x = src0.y * src1.z - src1.y * src0.z 5314 * dst.y = src0.z * src1.x - src1.z * src0.x 5315 * dst.z = src0.x * src1.y - src1.x * src0.y 5316 * dst.w = 1 5317 */ 5318 struct tgsi_full_src_register s0_xxxx = 5319 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 5320 struct tgsi_full_src_register s0_yyyy = 5321 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 5322 struct tgsi_full_src_register s0_zzzz = 5323 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 5324 5325 struct tgsi_full_src_register s1_xxxx = 5326 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 5327 struct tgsi_full_src_register s1_yyyy = 5328 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 5329 struct tgsi_full_src_register s1_zzzz = 5330 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z); 5331 5332 unsigned tmp1 = get_temp_index(emit); 5333 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 5334 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 5335 5336 unsigned tmp2 = get_temp_index(emit); 5337 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 5338 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 5339 struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src); 5340 5341 unsigned tmp3 = get_temp_index(emit); 5342 struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3); 5343 struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3); 5344 struct tgsi_full_dst_register tmp3_dst_x = 5345 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X); 5346 struct tgsi_full_dst_register tmp3_dst_y = 5347 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y); 5348 struct tgsi_full_dst_register tmp3_dst_z = 5349 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z); 5350 struct tgsi_full_dst_register tmp3_dst_w = 5351 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W); 5352 5353 /* Note: we put all the intermediate computations into tmp3 in case 5354 * the XPD dest register is that same as one of the src regs (in which 5355 * case we could clobber a src reg before we're done with it) . 5356 * 5357 * Note: we could get by with just one temp register instead of three 5358 * since we're doing scalar operations and there's enough room in one 5359 * temp for everything. 5360 */ 5361 5362 /* MUL tmp1, src0.y, src1.z */ 5363 /* MUL tmp2, src1.y, src0.z */ 5364 /* ADD tmp3.x, tmp1, -tmp2 */ 5365 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 5366 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, 5367 &s0_yyyy, &s1_zzzz, FALSE); 5368 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, 5369 &s1_yyyy, &s0_zzzz, FALSE); 5370 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x, 5371 &tmp1_src, &neg_tmp2_src, FALSE); 5372 } 5373 5374 /* MUL tmp1, src0.z, src1.x */ 5375 /* MUL tmp2, src1.z, src0.x */ 5376 /* ADD tmp3.y, tmp1, -tmp2 */ 5377 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 5378 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz, 5379 &s1_xxxx, FALSE); 5380 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz, 5381 &s0_xxxx, FALSE); 5382 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y, 5383 &tmp1_src, &neg_tmp2_src, FALSE); 5384 } 5385 5386 /* MUL tmp1, src0.x, src1.y */ 5387 /* MUL tmp2, src1.x, src0.y */ 5388 /* ADD tmp3.z, tmp1, -tmp2 */ 5389 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 5390 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx, 5391 &s1_yyyy, FALSE); 5392 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx, 5393 &s0_yyyy, FALSE); 5394 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z, 5395 &tmp1_src, &neg_tmp2_src, FALSE); 5396 } 5397 5398 /* MOV tmp3.w, 1.0 */ 5399 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 5400 struct tgsi_full_src_register one = 5401 make_immediate_reg_float(emit, 1.0f); 5402 5403 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE); 5404 } 5405 5406 /* MOV dst, tmp3 */ 5407 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src, 5408 inst->Instruction.Saturate); 5409 5410 5411 free_temp_indexes(emit); 5412 5413 return TRUE; 5414} 5415 5416 5417/** 5418 * Emit code for TGSI_OPCODE_TXD (explicit derivatives) 5419 */ 5420static boolean 5421emit_txd(struct svga_shader_emitter_v10 *emit, 5422 const struct tgsi_full_instruction *inst) 5423{ 5424 const uint unit = inst->Src[3].Register.Index; 5425 unsigned target = inst->Texture.Texture; 5426 int offsets[3]; 5427 struct tgsi_full_src_register coord; 5428 struct tex_swizzle_info swz_info; 5429 5430 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5431 &swz_info); 5432 5433 get_texel_offsets(emit, inst, offsets); 5434 5435 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5436 5437 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ 5438 begin_emit_instruction(emit); 5439 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, 5440 inst->Instruction.Saturate, offsets); 5441 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5442 emit_src_register(emit, &coord); 5443 emit_resource_register(emit, unit); 5444 emit_sampler_register(emit, unit); 5445 emit_src_register(emit, &inst->Src[1]); /* Xderiv */ 5446 emit_src_register(emit, &inst->Src[2]); /* Yderiv */ 5447 end_emit_instruction(emit); 5448 5449 end_tex_swizzle(emit, &swz_info); 5450 5451 free_temp_indexes(emit); 5452 5453 return TRUE; 5454} 5455 5456 5457/** 5458 * Emit code for TGSI_OPCODE_TXF (texel fetch) 5459 */ 5460static boolean 5461emit_txf(struct svga_shader_emitter_v10 *emit, 5462 const struct tgsi_full_instruction *inst) 5463{ 5464 const uint unit = inst->Src[1].Register.Index; 5465 const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture); 5466 int offsets[3]; 5467 struct tex_swizzle_info swz_info; 5468 5469 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5470 5471 get_texel_offsets(emit, inst, offsets); 5472 5473 if (msaa) { 5474 /* Fetch one sample from an MSAA texture */ 5475 struct tgsi_full_src_register sampleIndex = 5476 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5477 /* LD_MS dst, coord(s0), resource, sampleIndex */ 5478 begin_emit_instruction(emit); 5479 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, 5480 inst->Instruction.Saturate, offsets); 5481 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5482 emit_src_register(emit, &inst->Src[0]); 5483 emit_resource_register(emit, unit); 5484 emit_src_register(emit, &sampleIndex); 5485 end_emit_instruction(emit); 5486 } 5487 else { 5488 /* Fetch one texel specified by integer coordinate */ 5489 /* LD dst, coord(s0), resource */ 5490 begin_emit_instruction(emit); 5491 emit_sample_opcode(emit, VGPU10_OPCODE_LD, 5492 inst->Instruction.Saturate, offsets); 5493 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5494 emit_src_register(emit, &inst->Src[0]); 5495 emit_resource_register(emit, unit); 5496 end_emit_instruction(emit); 5497 } 5498 5499 end_tex_swizzle(emit, &swz_info); 5500 5501 free_temp_indexes(emit); 5502 5503 return TRUE; 5504} 5505 5506 5507/** 5508 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) 5509 * or TGSI_OPCODE_TXB2 (for cube shadow maps). 5510 */ 5511static boolean 5512emit_txl_txb(struct svga_shader_emitter_v10 *emit, 5513 const struct tgsi_full_instruction *inst) 5514{ 5515 unsigned target = inst->Texture.Texture; 5516 unsigned opcode, unit; 5517 int offsets[3]; 5518 struct tgsi_full_src_register coord, lod_bias; 5519 struct tex_swizzle_info swz_info; 5520 5521 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || 5522 inst->Instruction.Opcode == TGSI_OPCODE_TXB || 5523 inst->Instruction.Opcode == TGSI_OPCODE_TXB2); 5524 5525 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { 5526 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 5527 unit = inst->Src[2].Register.Index; 5528 } 5529 else { 5530 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5531 unit = inst->Src[1].Register.Index; 5532 } 5533 5534 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5535 &swz_info); 5536 5537 get_texel_offsets(emit, inst, offsets); 5538 5539 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5540 5541 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ 5542 begin_emit_instruction(emit); 5543 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { 5544 opcode = VGPU10_OPCODE_SAMPLE_L; 5545 } 5546 else { 5547 opcode = VGPU10_OPCODE_SAMPLE_B; 5548 } 5549 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5550 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5551 emit_src_register(emit, &coord); 5552 emit_resource_register(emit, unit); 5553 emit_sampler_register(emit, unit); 5554 emit_src_register(emit, &lod_bias); 5555 end_emit_instruction(emit); 5556 5557 end_tex_swizzle(emit, &swz_info); 5558 5559 free_temp_indexes(emit); 5560 5561 return TRUE; 5562} 5563 5564 5565/** 5566 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. 5567 */ 5568static boolean 5569emit_txq(struct svga_shader_emitter_v10 *emit, 5570 const struct tgsi_full_instruction *inst) 5571{ 5572 const uint unit = inst->Src[1].Register.Index; 5573 5574 if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) { 5575 /* RESINFO does not support querying texture buffers, so we instead 5576 * store texture buffer sizes in shader constants, then copy them to 5577 * implement TXQ instead of emitting RESINFO. 5578 * MOV dst, const[texture_buffer_size_index[unit]] 5579 */ 5580 struct tgsi_full_src_register size_src = 5581 make_src_const_reg(emit->texture_buffer_size_index[unit]); 5582 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src, 5583 FALSE); 5584 } else { 5585 /* RESINFO dst, srcMipLevel, resource */ 5586 begin_emit_instruction(emit); 5587 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); 5588 emit_dst_register(emit, &inst->Dst[0]); 5589 emit_src_register(emit, &inst->Src[0]); 5590 emit_resource_register(emit, unit); 5591 end_emit_instruction(emit); 5592 } 5593 5594 free_temp_indexes(emit); 5595 5596 return TRUE; 5597} 5598 5599 5600/** 5601 * Emit a simple instruction (like ADD, MUL, MIN, etc). 5602 */ 5603static boolean 5604emit_simple(struct svga_shader_emitter_v10 *emit, 5605 const struct tgsi_full_instruction *inst) 5606{ 5607 const unsigned opcode = inst->Instruction.Opcode; 5608 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5609 unsigned i; 5610 5611 begin_emit_instruction(emit); 5612 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5613 inst->Instruction.Saturate); 5614 for (i = 0; i < op->num_dst; i++) { 5615 emit_dst_register(emit, &inst->Dst[i]); 5616 } 5617 for (i = 0; i < op->num_src; i++) { 5618 emit_src_register(emit, &inst->Src[i]); 5619 } 5620 end_emit_instruction(emit); 5621 5622 return TRUE; 5623} 5624 5625 5626/** 5627 * We only special case the MOV instruction to try to detect constant 5628 * color writes in the fragment shader. 5629 */ 5630static boolean 5631emit_mov(struct svga_shader_emitter_v10 *emit, 5632 const struct tgsi_full_instruction *inst) 5633{ 5634 const struct tgsi_full_src_register *src = &inst->Src[0]; 5635 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 5636 5637 if (emit->unit == PIPE_SHADER_FRAGMENT && 5638 dst->Register.File == TGSI_FILE_OUTPUT && 5639 dst->Register.Index == 0 && 5640 src->Register.File == TGSI_FILE_CONSTANT && 5641 !src->Register.Indirect) { 5642 emit->constant_color_output = TRUE; 5643 } 5644 5645 return emit_simple(emit, inst); 5646} 5647 5648 5649/** 5650 * Emit a simple VGPU10 instruction which writes to multiple dest registers, 5651 * where TGSI only uses one dest register. 5652 */ 5653static boolean 5654emit_simple_1dst(struct svga_shader_emitter_v10 *emit, 5655 const struct tgsi_full_instruction *inst, 5656 unsigned dst_count, 5657 unsigned dst_index) 5658{ 5659 const unsigned opcode = inst->Instruction.Opcode; 5660 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5661 unsigned i; 5662 5663 begin_emit_instruction(emit); 5664 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5665 inst->Instruction.Saturate); 5666 5667 for (i = 0; i < dst_count; i++) { 5668 if (i == dst_index) { 5669 emit_dst_register(emit, &inst->Dst[0]); 5670 } else { 5671 emit_null_dst_register(emit); 5672 } 5673 } 5674 5675 for (i = 0; i < op->num_src; i++) { 5676 emit_src_register(emit, &inst->Src[i]); 5677 } 5678 end_emit_instruction(emit); 5679 5680 return TRUE; 5681} 5682 5683 5684/** 5685 * Translate a single TGSI instruction to VGPU10. 5686 */ 5687static boolean 5688emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, 5689 unsigned inst_number, 5690 const struct tgsi_full_instruction *inst) 5691{ 5692 const unsigned opcode = inst->Instruction.Opcode; 5693 5694 switch (opcode) { 5695 case TGSI_OPCODE_ADD: 5696 case TGSI_OPCODE_AND: 5697 case TGSI_OPCODE_BGNLOOP: 5698 case TGSI_OPCODE_BRK: 5699 case TGSI_OPCODE_CEIL: 5700 case TGSI_OPCODE_CONT: 5701 case TGSI_OPCODE_DDX: 5702 case TGSI_OPCODE_DDY: 5703 case TGSI_OPCODE_DIV: 5704 case TGSI_OPCODE_DP2: 5705 case TGSI_OPCODE_DP3: 5706 case TGSI_OPCODE_DP4: 5707 case TGSI_OPCODE_ELSE: 5708 case TGSI_OPCODE_ENDIF: 5709 case TGSI_OPCODE_ENDLOOP: 5710 case TGSI_OPCODE_ENDSUB: 5711 case TGSI_OPCODE_F2I: 5712 case TGSI_OPCODE_F2U: 5713 case TGSI_OPCODE_FLR: 5714 case TGSI_OPCODE_FRC: 5715 case TGSI_OPCODE_FSEQ: 5716 case TGSI_OPCODE_FSGE: 5717 case TGSI_OPCODE_FSLT: 5718 case TGSI_OPCODE_FSNE: 5719 case TGSI_OPCODE_I2F: 5720 case TGSI_OPCODE_IMAX: 5721 case TGSI_OPCODE_IMIN: 5722 case TGSI_OPCODE_INEG: 5723 case TGSI_OPCODE_ISGE: 5724 case TGSI_OPCODE_ISHR: 5725 case TGSI_OPCODE_ISLT: 5726 case TGSI_OPCODE_MAD: 5727 case TGSI_OPCODE_MAX: 5728 case TGSI_OPCODE_MIN: 5729 case TGSI_OPCODE_MUL: 5730 case TGSI_OPCODE_NOP: 5731 case TGSI_OPCODE_NOT: 5732 case TGSI_OPCODE_OR: 5733 case TGSI_OPCODE_RET: 5734 case TGSI_OPCODE_UADD: 5735 case TGSI_OPCODE_USEQ: 5736 case TGSI_OPCODE_USGE: 5737 case TGSI_OPCODE_USLT: 5738 case TGSI_OPCODE_UMIN: 5739 case TGSI_OPCODE_UMAD: 5740 case TGSI_OPCODE_UMAX: 5741 case TGSI_OPCODE_ROUND: 5742 case TGSI_OPCODE_SQRT: 5743 case TGSI_OPCODE_SHL: 5744 case TGSI_OPCODE_TRUNC: 5745 case TGSI_OPCODE_U2F: 5746 case TGSI_OPCODE_UCMP: 5747 case TGSI_OPCODE_USHR: 5748 case TGSI_OPCODE_USNE: 5749 case TGSI_OPCODE_XOR: 5750 /* simple instructions */ 5751 return emit_simple(emit, inst); 5752 5753 case TGSI_OPCODE_MOV: 5754 return emit_mov(emit, inst); 5755 case TGSI_OPCODE_EMIT: 5756 return emit_vertex(emit, inst); 5757 case TGSI_OPCODE_ENDPRIM: 5758 return emit_endprim(emit, inst); 5759 case TGSI_OPCODE_ABS: 5760 return emit_abs(emit, inst); 5761 case TGSI_OPCODE_IABS: 5762 return emit_iabs(emit, inst); 5763 case TGSI_OPCODE_ARL: 5764 /* fall-through */ 5765 case TGSI_OPCODE_UARL: 5766 return emit_arl_uarl(emit, inst); 5767 case TGSI_OPCODE_BGNSUB: 5768 /* no-op */ 5769 return TRUE; 5770 case TGSI_OPCODE_CAL: 5771 return emit_cal(emit, inst); 5772 case TGSI_OPCODE_CMP: 5773 return emit_cmp(emit, inst); 5774 case TGSI_OPCODE_COS: 5775 return emit_sincos(emit, inst); 5776 case TGSI_OPCODE_DP2A: 5777 return emit_dp2a(emit, inst); 5778 case TGSI_OPCODE_DPH: 5779 return emit_dph(emit, inst); 5780 case TGSI_OPCODE_DST: 5781 return emit_dst(emit, inst); 5782 case TGSI_OPCODE_EX2: 5783 return emit_ex2(emit, inst); 5784 case TGSI_OPCODE_EXP: 5785 return emit_exp(emit, inst); 5786 case TGSI_OPCODE_IF: 5787 return emit_if(emit, inst); 5788 case TGSI_OPCODE_KILL: 5789 return emit_kill(emit, inst); 5790 case TGSI_OPCODE_KILL_IF: 5791 return emit_kill_if(emit, inst); 5792 case TGSI_OPCODE_LG2: 5793 return emit_lg2(emit, inst); 5794 case TGSI_OPCODE_LIT: 5795 return emit_lit(emit, inst); 5796 case TGSI_OPCODE_LOG: 5797 return emit_log(emit, inst); 5798 case TGSI_OPCODE_LRP: 5799 return emit_lrp(emit, inst); 5800 case TGSI_OPCODE_POW: 5801 return emit_pow(emit, inst); 5802 case TGSI_OPCODE_RCP: 5803 return emit_rcp(emit, inst); 5804 case TGSI_OPCODE_RSQ: 5805 return emit_rsq(emit, inst); 5806 case TGSI_OPCODE_SAMPLE: 5807 return emit_sample(emit, inst); 5808 case TGSI_OPCODE_SCS: 5809 return emit_scs(emit, inst); 5810 case TGSI_OPCODE_SEQ: 5811 return emit_seq(emit, inst); 5812 case TGSI_OPCODE_SGE: 5813 return emit_sge(emit, inst); 5814 case TGSI_OPCODE_SGT: 5815 return emit_sgt(emit, inst); 5816 case TGSI_OPCODE_SIN: 5817 return emit_sincos(emit, inst); 5818 case TGSI_OPCODE_SLE: 5819 return emit_sle(emit, inst); 5820 case TGSI_OPCODE_SLT: 5821 return emit_slt(emit, inst); 5822 case TGSI_OPCODE_SNE: 5823 return emit_sne(emit, inst); 5824 case TGSI_OPCODE_SSG: 5825 return emit_ssg(emit, inst); 5826 case TGSI_OPCODE_ISSG: 5827 return emit_issg(emit, inst); 5828 case TGSI_OPCODE_SUB: 5829 return emit_sub(emit, inst); 5830 case TGSI_OPCODE_TEX: 5831 return emit_tex(emit, inst); 5832 case TGSI_OPCODE_TXP: 5833 return emit_txp(emit, inst); 5834 case TGSI_OPCODE_TXB: 5835 case TGSI_OPCODE_TXB2: 5836 case TGSI_OPCODE_TXL: 5837 return emit_txl_txb(emit, inst); 5838 case TGSI_OPCODE_TXD: 5839 return emit_txd(emit, inst); 5840 case TGSI_OPCODE_TXF: 5841 return emit_txf(emit, inst); 5842 case TGSI_OPCODE_TXQ: 5843 return emit_txq(emit, inst); 5844 case TGSI_OPCODE_UIF: 5845 return emit_if(emit, inst); 5846 case TGSI_OPCODE_XPD: 5847 return emit_xpd(emit, inst); 5848 case TGSI_OPCODE_UMUL_HI: 5849 case TGSI_OPCODE_IMUL_HI: 5850 case TGSI_OPCODE_UDIV: 5851 case TGSI_OPCODE_IDIV: 5852 /* These cases use only the FIRST of two destination registers */ 5853 return emit_simple_1dst(emit, inst, 2, 0); 5854 case TGSI_OPCODE_UMUL: 5855 case TGSI_OPCODE_UMOD: 5856 case TGSI_OPCODE_MOD: 5857 /* These cases use only the SECOND of two destination registers */ 5858 return emit_simple_1dst(emit, inst, 2, 1); 5859 case TGSI_OPCODE_END: 5860 if (!emit_post_helpers(emit)) 5861 return FALSE; 5862 return emit_simple(emit, inst); 5863 5864 default: 5865 debug_printf("Unimplemented tgsi instruction %s\n", 5866 tgsi_get_opcode_name(opcode)); 5867 return FALSE; 5868 } 5869 5870 return TRUE; 5871} 5872 5873 5874/** 5875 * Emit the extra instructions to adjust the vertex position. 5876 * There are two possible adjustments: 5877 * 1. Converting from Gallium to VGPU10 coordinate space by applying the 5878 * "prescale" and "pretranslate" values. 5879 * 2. Undoing the viewport transformation when we use the swtnl/draw path. 5880 * \param vs_pos_tmp_index which temporary register contains the vertex pos. 5881 */ 5882static void 5883emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, 5884 unsigned vs_pos_tmp_index) 5885{ 5886 struct tgsi_full_src_register tmp_pos_src; 5887 struct tgsi_full_dst_register pos_dst; 5888 5889 /* Don't bother to emit any extra vertex instructions if vertex position is 5890 * not written out 5891 */ 5892 if (emit->vposition.out_index == INVALID_INDEX) 5893 return; 5894 5895 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index); 5896 pos_dst = make_dst_output_reg(emit->vposition.out_index); 5897 5898 /* If non-adjusted vertex position register index 5899 * is valid, copy the vertex position from the temporary 5900 * vertex position register before it is modified by the 5901 * prescale computation. 5902 */ 5903 if (emit->vposition.so_index != INVALID_INDEX) { 5904 struct tgsi_full_dst_register pos_so_dst = 5905 make_dst_output_reg(emit->vposition.so_index); 5906 5907 /* MOV pos_so, tmp_pos */ 5908 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, 5909 &tmp_pos_src, FALSE); 5910 } 5911 5912 if (emit->vposition.need_prescale) { 5913 /* This code adjusts the vertex position to match the VGPU10 convention. 5914 * If p is the position computed by the shader (usually by applying the 5915 * modelview and projection matrices), the new position q is computed by: 5916 * 5917 * q.x = p.w * trans.x + p.x * scale.x 5918 * q.y = p.w * trans.y + p.y * scale.y 5919 * q.z = p.w * trans.z + p.z * scale.z; 5920 * q.w = p.w * trans.w + p.w; 5921 */ 5922 struct tgsi_full_src_register tmp_pos_src_w = 5923 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5924 struct tgsi_full_dst_register tmp_pos_dst = 5925 make_dst_temp_reg(vs_pos_tmp_index); 5926 struct tgsi_full_dst_register tmp_pos_dst_xyz = 5927 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ); 5928 5929 struct tgsi_full_src_register prescale_scale = 5930 make_src_const_reg(emit->vposition.prescale_scale_index); 5931 struct tgsi_full_src_register prescale_trans = 5932 make_src_const_reg(emit->vposition.prescale_trans_index); 5933 5934 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */ 5935 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz, 5936 &tmp_pos_src, &prescale_scale, FALSE); 5937 5938 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */ 5939 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w, 5940 &prescale_trans, &tmp_pos_src, FALSE); 5941 } 5942 else if (emit->key.vs.undo_viewport) { 5943 /* This code computes the final vertex position from the temporary 5944 * vertex position by undoing the viewport transformation and the 5945 * divide-by-W operation (we convert window coords back to clip coords). 5946 * This is needed when we use the 'draw' module for fallbacks. 5947 * If p is the temp pos in window coords, then the NDC coord q is: 5948 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w 5949 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w 5950 * q.z = p.z * p.w 5951 * q.w = p.w 5952 * CONST[vs_viewport_index] contains: 5953 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans } 5954 */ 5955 struct tgsi_full_dst_register tmp_pos_dst = 5956 make_dst_temp_reg(vs_pos_tmp_index); 5957 struct tgsi_full_dst_register tmp_pos_dst_xy = 5958 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY); 5959 struct tgsi_full_src_register tmp_pos_src_wwww = 5960 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5961 5962 struct tgsi_full_dst_register pos_dst_xyz = 5963 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ); 5964 struct tgsi_full_dst_register pos_dst_w = 5965 writemask_dst(&pos_dst, TGSI_WRITEMASK_W); 5966 5967 struct tgsi_full_src_register vp_xyzw = 5968 make_src_const_reg(emit->vs.viewport_index); 5969 struct tgsi_full_src_register vp_zwww = 5970 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, 5971 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 5972 5973 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */ 5974 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy, 5975 &tmp_pos_src, &vp_zwww, FALSE); 5976 5977 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */ 5978 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy, 5979 &tmp_pos_src, &vp_xyzw, FALSE); 5980 5981 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */ 5982 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz, 5983 &tmp_pos_src, &tmp_pos_src_wwww, FALSE); 5984 5985 /* MOV pos.w, tmp_pos.w */ 5986 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, 5987 &tmp_pos_src, FALSE); 5988 } 5989 else if (vs_pos_tmp_index != INVALID_INDEX) { 5990 /* This code is to handle the case where the temporary vertex 5991 * position register is created when the vertex shader has stream 5992 * output and prescale is disabled because rasterization is to be 5993 * discarded. 5994 */ 5995 struct tgsi_full_dst_register pos_dst = 5996 make_dst_output_reg(emit->vposition.out_index); 5997 5998 /* MOV pos, tmp_pos */ 5999 begin_emit_instruction(emit); 6000 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 6001 emit_dst_register(emit, &pos_dst); 6002 emit_src_register(emit, &tmp_pos_src); 6003 end_emit_instruction(emit); 6004 } 6005} 6006 6007static void 6008emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) 6009{ 6010 if (emit->clip_mode == CLIP_DISTANCE) { 6011 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */ 6012 emit_clip_distance_instructions(emit); 6013 6014 } else if (emit->clip_mode == CLIP_VERTEX) { 6015 /* Convert TGSI CLIPVERTEX to CLIPDIST */ 6016 emit_clip_vertex_instructions(emit); 6017 } 6018 6019 /** 6020 * Emit vertex position and take care of legacy user planes only if 6021 * there is a valid vertex position register index. 6022 * This is to take care of the case 6023 * where the shader doesn't output vertex position. Then in 6024 * this case, don't bother to emit more vertex instructions. 6025 */ 6026 if (emit->vposition.out_index == INVALID_INDEX) 6027 return; 6028 6029 /** 6030 * Emit per-vertex clipping instructions for legacy user defined clip planes. 6031 * NOTE: we must emit the clip distance instructions before the 6032 * emit_vpos_instructions() call since the later function will change 6033 * the TEMP[vs_pos_tmp_index] value. 6034 */ 6035 if (emit->clip_mode == CLIP_LEGACY) { 6036 /* Emit CLIPDIST for legacy user defined clip planes */ 6037 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index); 6038 } 6039} 6040 6041 6042/** 6043 * Emit extra per-vertex instructions. This includes clip-coordinate 6044 * space conversion and computing clip distances. This is called for 6045 * each GS emit-vertex instruction and at the end of VS translation. 6046 */ 6047static void 6048emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) 6049{ 6050 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; 6051 6052 /* Emit clipping instructions based on clipping mode */ 6053 emit_clipping_instructions(emit); 6054 6055 /** 6056 * Reset the temporary vertex position register index 6057 * so that emit_dst_register() will use the real vertex position output 6058 */ 6059 emit->vposition.tmp_index = INVALID_INDEX; 6060 6061 /* Emit vertex position instructions */ 6062 emit_vpos_instructions(emit, vs_pos_tmp_index); 6063 6064 /* Restore original vposition.tmp_index value for the next GS vertex. 6065 * It doesn't matter for VS. 6066 */ 6067 emit->vposition.tmp_index = vs_pos_tmp_index; 6068} 6069 6070/** 6071 * Translate the TGSI_OPCODE_EMIT GS instruction. 6072 */ 6073static boolean 6074emit_vertex(struct svga_shader_emitter_v10 *emit, 6075 const struct tgsi_full_instruction *inst) 6076{ 6077 unsigned ret = TRUE; 6078 6079 assert(emit->unit == PIPE_SHADER_GEOMETRY); 6080 6081 emit_vertex_instructions(emit); 6082 6083 /* We can't use emit_simple() because the TGSI instruction has one 6084 * operand (vertex stream number) which we must ignore for VGPU10. 6085 */ 6086 begin_emit_instruction(emit); 6087 emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); 6088 end_emit_instruction(emit); 6089 6090 return ret; 6091} 6092 6093 6094/** 6095 * Emit the extra code to convert from VGPU10's boolean front-face 6096 * register to TGSI's signed front-face register. 6097 * 6098 * TODO: Make temporary front-face register a scalar. 6099 */ 6100static void 6101emit_frontface_instructions(struct svga_shader_emitter_v10 *emit) 6102{ 6103 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6104 6105 if (emit->fs.face_input_index != INVALID_INDEX) { 6106 /* convert vgpu10 boolean face register to gallium +/-1 value */ 6107 struct tgsi_full_dst_register tmp_dst = 6108 make_dst_temp_reg(emit->fs.face_tmp_index); 6109 struct tgsi_full_src_register one = 6110 make_immediate_reg_float(emit, 1.0f); 6111 struct tgsi_full_src_register neg_one = 6112 make_immediate_reg_float(emit, -1.0f); 6113 6114 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */ 6115 begin_emit_instruction(emit); 6116 emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE); 6117 emit_dst_register(emit, &tmp_dst); 6118 emit_face_register(emit); 6119 emit_src_register(emit, &one); 6120 emit_src_register(emit, &neg_one); 6121 end_emit_instruction(emit); 6122 } 6123} 6124 6125 6126/** 6127 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w. 6128 */ 6129static void 6130emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) 6131{ 6132 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6133 6134 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 6135 struct tgsi_full_dst_register tmp_dst = 6136 make_dst_temp_reg(emit->fs.fragcoord_tmp_index); 6137 struct tgsi_full_dst_register tmp_dst_xyz = 6138 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ); 6139 struct tgsi_full_dst_register tmp_dst_w = 6140 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 6141 struct tgsi_full_src_register one = 6142 make_immediate_reg_float(emit, 1.0f); 6143 struct tgsi_full_src_register fragcoord = 6144 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index); 6145 6146 /* save the input index */ 6147 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index; 6148 /* set to invalid to prevent substitution in emit_src_register() */ 6149 emit->fs.fragcoord_input_index = INVALID_INDEX; 6150 6151 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */ 6152 begin_emit_instruction(emit); 6153 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 6154 emit_dst_register(emit, &tmp_dst_xyz); 6155 emit_src_register(emit, &fragcoord); 6156 end_emit_instruction(emit); 6157 6158 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */ 6159 begin_emit_instruction(emit); 6160 emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE); 6161 emit_dst_register(emit, &tmp_dst_w); 6162 emit_src_register(emit, &one); 6163 emit_src_register(emit, &fragcoord); 6164 end_emit_instruction(emit); 6165 6166 /* restore saved value */ 6167 emit->fs.fragcoord_input_index = fragcoord_input_index; 6168 } 6169} 6170 6171 6172/** 6173 * Emit extra instructions to adjust VS inputs/attributes. This can 6174 * mean casting a vertex attribute from int to float or setting the 6175 * W component to 1, or both. 6176 */ 6177static void 6178emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) 6179{ 6180 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1; 6181 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof; 6182 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof; 6183 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra; 6184 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm; 6185 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled; 6186 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled; 6187 6188 unsigned adjust_mask = (save_w_1_mask | 6189 save_itof_mask | 6190 save_utof_mask | 6191 save_is_bgra_mask | 6192 save_puint_to_snorm_mask | 6193 save_puint_to_uscaled_mask | 6194 save_puint_to_sscaled_mask); 6195 6196 assert(emit->unit == PIPE_SHADER_VERTEX); 6197 6198 if (adjust_mask) { 6199 struct tgsi_full_src_register one = 6200 make_immediate_reg_float(emit, 1.0f); 6201 6202 struct tgsi_full_src_register one_int = 6203 make_immediate_reg_int(emit, 1); 6204 6205 /* We need to turn off these bitmasks while emitting the 6206 * instructions below, then restore them afterward. 6207 */ 6208 emit->key.vs.adjust_attrib_w_1 = 0; 6209 emit->key.vs.adjust_attrib_itof = 0; 6210 emit->key.vs.adjust_attrib_utof = 0; 6211 emit->key.vs.attrib_is_bgra = 0; 6212 emit->key.vs.attrib_puint_to_snorm = 0; 6213 emit->key.vs.attrib_puint_to_uscaled = 0; 6214 emit->key.vs.attrib_puint_to_sscaled = 0; 6215 6216 while (adjust_mask) { 6217 unsigned index = u_bit_scan(&adjust_mask); 6218 6219 /* skip the instruction if this vertex attribute is not being used */ 6220 if (emit->info.input_usage_mask[index] == 0) 6221 continue; 6222 6223 unsigned tmp = emit->vs.adjusted_input[index]; 6224 struct tgsi_full_src_register input_src = 6225 make_src_reg(TGSI_FILE_INPUT, index); 6226 6227 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6228 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6229 struct tgsi_full_dst_register tmp_dst_w = 6230 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 6231 6232 /* ITOF/UTOF/MOV tmp, input[index] */ 6233 if (save_itof_mask & (1 << index)) { 6234 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, 6235 &tmp_dst, &input_src, FALSE); 6236 } 6237 else if (save_utof_mask & (1 << index)) { 6238 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF, 6239 &tmp_dst, &input_src, FALSE); 6240 } 6241 else if (save_puint_to_snorm_mask & (1 << index)) { 6242 emit_puint_to_snorm(emit, &tmp_dst, &input_src); 6243 } 6244 else if (save_puint_to_uscaled_mask & (1 << index)) { 6245 emit_puint_to_uscaled(emit, &tmp_dst, &input_src); 6246 } 6247 else if (save_puint_to_sscaled_mask & (1 << index)) { 6248 emit_puint_to_sscaled(emit, &tmp_dst, &input_src); 6249 } 6250 else { 6251 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index)); 6252 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6253 &tmp_dst, &input_src, FALSE); 6254 } 6255 6256 if (save_is_bgra_mask & (1 << index)) { 6257 emit_swap_r_b(emit, &tmp_dst, &tmp_src); 6258 } 6259 6260 if (save_w_1_mask & (1 << index)) { 6261 /* MOV tmp.w, 1.0 */ 6262 if (emit->key.vs.attrib_is_pure_int & (1 << index)) { 6263 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6264 &tmp_dst_w, &one_int, FALSE); 6265 } 6266 else { 6267 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6268 &tmp_dst_w, &one, FALSE); 6269 } 6270 } 6271 } 6272 6273 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask; 6274 emit->key.vs.adjust_attrib_itof = save_itof_mask; 6275 emit->key.vs.adjust_attrib_utof = save_utof_mask; 6276 emit->key.vs.attrib_is_bgra = save_is_bgra_mask; 6277 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask; 6278 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask; 6279 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask; 6280 } 6281} 6282 6283 6284/** 6285 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed 6286 * to implement some instructions. We pre-allocate those values here 6287 * in the immediate constant buffer. 6288 */ 6289static void 6290alloc_common_immediates(struct svga_shader_emitter_v10 *emit) 6291{ 6292 unsigned n = 0; 6293 6294 emit->common_immediate_pos[n++] = 6295 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); 6296 6297 emit->common_immediate_pos[n++] = 6298 alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f); 6299 6300 emit->common_immediate_pos[n++] = 6301 alloc_immediate_int4(emit, 0, 1, 0, -1); 6302 6303 if (emit->key.vs.attrib_puint_to_snorm) { 6304 emit->common_immediate_pos[n++] = 6305 alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 6306 } 6307 6308 if (emit->key.vs.attrib_puint_to_uscaled) { 6309 emit->common_immediate_pos[n++] = 6310 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); 6311 } 6312 6313 if (emit->key.vs.attrib_puint_to_sscaled) { 6314 emit->common_immediate_pos[n++] = 6315 alloc_immediate_int4(emit, 22, 12, 2, 0); 6316 6317 emit->common_immediate_pos[n++] = 6318 alloc_immediate_int4(emit, 22, 30, 0, 0); 6319 } 6320 6321 assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); 6322 emit->num_common_immediates = n; 6323} 6324 6325 6326/** 6327 * Emit any extra/helper declarations/code that we might need between 6328 * the declaration section and code section. 6329 */ 6330static boolean 6331emit_pre_helpers(struct svga_shader_emitter_v10 *emit) 6332{ 6333 /* Properties */ 6334 if (emit->unit == PIPE_SHADER_GEOMETRY) 6335 emit_property_instructions(emit); 6336 6337 /* Declare inputs */ 6338 if (!emit_input_declarations(emit)) 6339 return FALSE; 6340 6341 /* Declare outputs */ 6342 if (!emit_output_declarations(emit)) 6343 return FALSE; 6344 6345 /* Declare temporary registers */ 6346 emit_temporaries_declaration(emit); 6347 6348 /* Declare constant registers */ 6349 emit_constant_declaration(emit); 6350 6351 /* Declare samplers and resources */ 6352 emit_sampler_declarations(emit); 6353 emit_resource_declarations(emit); 6354 6355 /* Declare clip distance output registers */ 6356 if (emit->unit == PIPE_SHADER_VERTEX || 6357 emit->unit == PIPE_SHADER_GEOMETRY) { 6358 emit_clip_distance_declarations(emit); 6359 } 6360 6361 alloc_common_immediates(emit); 6362 6363 if (emit->unit == PIPE_SHADER_FRAGMENT && 6364 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6365 float alpha = emit->key.fs.alpha_ref; 6366 emit->fs.alpha_ref_index = 6367 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha); 6368 } 6369 6370 /* Now, emit the constant block containing all the immediates 6371 * declared by shader, as well as the extra ones seen above. 6372 */ 6373 emit_vgpu10_immediates_block(emit); 6374 6375 if (emit->unit == PIPE_SHADER_FRAGMENT) { 6376 emit_frontface_instructions(emit); 6377 emit_fragcoord_instructions(emit); 6378 } 6379 else if (emit->unit == PIPE_SHADER_VERTEX) { 6380 emit_vertex_attrib_instructions(emit); 6381 } 6382 6383 return TRUE; 6384} 6385 6386 6387/** 6388 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w 6389 * against the alpha reference value and discards the fragment if the 6390 * comparison fails. 6391 */ 6392static void 6393emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, 6394 unsigned fs_color_tmp_index) 6395{ 6396 /* compare output color's alpha to alpha ref and kill */ 6397 unsigned tmp = get_temp_index(emit); 6398 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6399 struct tgsi_full_src_register tmp_src_x = 6400 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 6401 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6402 struct tgsi_full_src_register color_src = 6403 make_src_temp_reg(fs_color_tmp_index); 6404 struct tgsi_full_src_register color_src_w = 6405 scalar_src(&color_src, TGSI_SWIZZLE_W); 6406 struct tgsi_full_src_register ref_src = 6407 make_src_immediate_reg(emit->fs.alpha_ref_index); 6408 struct tgsi_full_dst_register color_dst = 6409 make_dst_output_reg(emit->fs.color_out_index[0]); 6410 6411 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6412 6413 /* dst = src0 'alpha_func' src1 */ 6414 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst, 6415 &color_src_w, &ref_src); 6416 6417 /* DISCARD if dst.x == 0 */ 6418 begin_emit_instruction(emit); 6419 emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */ 6420 emit_src_register(emit, &tmp_src_x); 6421 end_emit_instruction(emit); 6422 6423 /* If we don't need to broadcast the color below or set fragments to 6424 * white, emit final color here. 6425 */ 6426 if (emit->key.fs.write_color0_to_n_cbufs <= 1 && 6427 !emit->key.fs.white_fragments) { 6428 /* MOV output.color, tempcolor */ 6429 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6430 &color_src, FALSE); /* XXX saturate? */ 6431 } 6432 6433 free_temp_indexes(emit); 6434} 6435 6436 6437/** 6438 * When we need to emit white for all fragments (for emulating XOR logicop 6439 * mode), this function copies white into the temporary color output register. 6440 */ 6441static void 6442emit_set_color_white(struct svga_shader_emitter_v10 *emit, 6443 unsigned fs_color_tmp_index) 6444{ 6445 struct tgsi_full_dst_register color_dst = 6446 make_dst_temp_reg(fs_color_tmp_index); 6447 struct tgsi_full_src_register white = 6448 make_immediate_reg_float(emit, 1.0f); 6449 6450 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE); 6451} 6452 6453 6454/** 6455 * Emit instructions for writing a single color output to multiple 6456 * color buffers. 6457 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or 6458 * when key.fs.white_fragments is true). 6459 * property is set and the number of render targets is greater than one. 6460 * \param fs_color_tmp_index index of the temp register that holds the 6461 * color to broadcast. 6462 */ 6463static void 6464emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, 6465 unsigned fs_color_tmp_index) 6466{ 6467 const unsigned n = emit->key.fs.write_color0_to_n_cbufs; 6468 unsigned i; 6469 struct tgsi_full_src_register color_src = 6470 make_src_temp_reg(fs_color_tmp_index); 6471 6472 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6473 6474 for (i = 0; i < n; i++) { 6475 unsigned output_reg = emit->fs.color_out_index[i]; 6476 struct tgsi_full_dst_register color_dst = 6477 make_dst_output_reg(output_reg); 6478 6479 /* Fill in this semantic here since we'll use it later in 6480 * emit_dst_register(). 6481 */ 6482 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR; 6483 6484 /* MOV output.color[i], tempcolor */ 6485 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6486 &color_src, FALSE); /* XXX saturate? */ 6487 } 6488} 6489 6490 6491/** 6492 * Emit extra helper code after the original shader code, but before the 6493 * last END/RET instruction. 6494 * For vertex shaders this means emitting the extra code to apply the 6495 * prescale scale/translation. 6496 */ 6497static boolean 6498emit_post_helpers(struct svga_shader_emitter_v10 *emit) 6499{ 6500 if (emit->unit == PIPE_SHADER_VERTEX) { 6501 emit_vertex_instructions(emit); 6502 } 6503 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 6504 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index; 6505 6506 /* We no longer want emit_dst_register() to substitute the 6507 * temporary fragment color register for the real color output. 6508 */ 6509 emit->fs.color_tmp_index = INVALID_INDEX; 6510 6511 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6512 emit_alpha_test_instructions(emit, fs_color_tmp_index); 6513 } 6514 if (emit->key.fs.white_fragments) { 6515 emit_set_color_white(emit, fs_color_tmp_index); 6516 } 6517 if (emit->key.fs.write_color0_to_n_cbufs > 1 || 6518 emit->key.fs.white_fragments) { 6519 emit_broadcast_color_instructions(emit, fs_color_tmp_index); 6520 } 6521 } 6522 6523 return TRUE; 6524} 6525 6526 6527/** 6528 * Translate the TGSI tokens into VGPU10 tokens. 6529 */ 6530static boolean 6531emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, 6532 const struct tgsi_token *tokens) 6533{ 6534 struct tgsi_parse_context parse; 6535 boolean ret = TRUE; 6536 boolean pre_helpers_emitted = FALSE; 6537 unsigned inst_number = 0; 6538 6539 tgsi_parse_init(&parse, tokens); 6540 6541 while (!tgsi_parse_end_of_tokens(&parse)) { 6542 tgsi_parse_token(&parse); 6543 6544 switch (parse.FullToken.Token.Type) { 6545 case TGSI_TOKEN_TYPE_IMMEDIATE: 6546 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate); 6547 if (!ret) 6548 goto done; 6549 break; 6550 6551 case TGSI_TOKEN_TYPE_DECLARATION: 6552 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration); 6553 if (!ret) 6554 goto done; 6555 break; 6556 6557 case TGSI_TOKEN_TYPE_INSTRUCTION: 6558 if (!pre_helpers_emitted) { 6559 ret = emit_pre_helpers(emit); 6560 if (!ret) 6561 goto done; 6562 pre_helpers_emitted = TRUE; 6563 } 6564 ret = emit_vgpu10_instruction(emit, inst_number++, 6565 &parse.FullToken.FullInstruction); 6566 if (!ret) 6567 goto done; 6568 break; 6569 6570 case TGSI_TOKEN_TYPE_PROPERTY: 6571 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty); 6572 if (!ret) 6573 goto done; 6574 break; 6575 6576 default: 6577 break; 6578 } 6579 } 6580 6581done: 6582 tgsi_parse_free(&parse); 6583 return ret; 6584} 6585 6586 6587/** 6588 * Emit the first VGPU10 shader tokens. 6589 */ 6590static boolean 6591emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) 6592{ 6593 VGPU10ProgramToken ptoken; 6594 6595 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ 6596 ptoken.majorVersion = 4; 6597 ptoken.minorVersion = 0; 6598 ptoken.programType = translate_shader_type(emit->unit); 6599 if (!emit_dword(emit, ptoken.value)) 6600 return FALSE; 6601 6602 /* Second token: total length of shader, in tokens. We can't fill this 6603 * in until we're all done. Emit zero for now. 6604 */ 6605 return emit_dword(emit, 0); 6606} 6607 6608 6609static boolean 6610emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) 6611{ 6612 VGPU10ProgramToken *tokens; 6613 6614 /* Replace the second token with total shader length */ 6615 tokens = (VGPU10ProgramToken *) emit->buf; 6616 tokens[1].value = emit_get_num_tokens(emit); 6617 6618 return TRUE; 6619} 6620 6621 6622/** 6623 * Modify the FS to read the BCOLORs and use the FACE register 6624 * to choose between the front/back colors. 6625 */ 6626static const struct tgsi_token * 6627transform_fs_twoside(const struct tgsi_token *tokens) 6628{ 6629 if (0) { 6630 debug_printf("Before tgsi_add_two_side ------------------\n"); 6631 tgsi_dump(tokens,0); 6632 } 6633 tokens = tgsi_add_two_side(tokens); 6634 if (0) { 6635 debug_printf("After tgsi_add_two_side ------------------\n"); 6636 tgsi_dump(tokens, 0); 6637 } 6638 return tokens; 6639} 6640 6641 6642/** 6643 * Modify the FS to do polygon stipple. 6644 */ 6645static const struct tgsi_token * 6646transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, 6647 const struct tgsi_token *tokens) 6648{ 6649 const struct tgsi_token *new_tokens; 6650 unsigned unit; 6651 6652 if (0) { 6653 debug_printf("Before pstipple ------------------\n"); 6654 tgsi_dump(tokens,0); 6655 } 6656 6657 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, 6658 TGSI_FILE_INPUT); 6659 6660 emit->fs.pstipple_sampler_unit = unit; 6661 6662 /* Setup texture state for stipple */ 6663 emit->sampler_target[unit] = TGSI_TEXTURE_2D; 6664 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; 6665 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; 6666 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; 6667 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; 6668 6669 if (0) { 6670 debug_printf("After pstipple ------------------\n"); 6671 tgsi_dump(new_tokens, 0); 6672 } 6673 6674 return new_tokens; 6675} 6676 6677/** 6678 * Modify the FS to support anti-aliasing point. 6679 */ 6680static const struct tgsi_token * 6681transform_fs_aapoint(const struct tgsi_token *tokens, 6682 int aa_coord_index) 6683{ 6684 if (0) { 6685 debug_printf("Before tgsi_add_aa_point ------------------\n"); 6686 tgsi_dump(tokens,0); 6687 } 6688 tokens = tgsi_add_aa_point(tokens, aa_coord_index); 6689 if (0) { 6690 debug_printf("After tgsi_add_aa_point ------------------\n"); 6691 tgsi_dump(tokens, 0); 6692 } 6693 return tokens; 6694} 6695 6696/** 6697 * This is the main entrypoint for the TGSI -> VPGU10 translator. 6698 */ 6699struct svga_shader_variant * 6700svga_tgsi_vgpu10_translate(struct svga_context *svga, 6701 const struct svga_shader *shader, 6702 const struct svga_compile_key *key, 6703 unsigned unit) 6704{ 6705 struct svga_shader_variant *variant = NULL; 6706 struct svga_shader_emitter_v10 *emit; 6707 const struct tgsi_token *tokens = shader->tokens; 6708 struct svga_vertex_shader *vs = svga->curr.vs; 6709 struct svga_geometry_shader *gs = svga->curr.gs; 6710 6711 assert(unit == PIPE_SHADER_VERTEX || 6712 unit == PIPE_SHADER_GEOMETRY || 6713 unit == PIPE_SHADER_FRAGMENT); 6714 6715 /* These two flags cannot be used together */ 6716 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1); 6717 6718 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE); 6719 /* 6720 * Setup the code emitter 6721 */ 6722 emit = alloc_emitter(); 6723 if (!emit) 6724 goto done; 6725 6726 emit->unit = unit; 6727 emit->key = *key; 6728 6729 emit->vposition.need_prescale = (emit->key.vs.need_prescale || 6730 emit->key.gs.need_prescale); 6731 emit->vposition.tmp_index = INVALID_INDEX; 6732 emit->vposition.so_index = INVALID_INDEX; 6733 emit->vposition.out_index = INVALID_INDEX; 6734 6735 emit->fs.color_tmp_index = INVALID_INDEX; 6736 emit->fs.face_input_index = INVALID_INDEX; 6737 emit->fs.fragcoord_input_index = INVALID_INDEX; 6738 6739 emit->gs.prim_id_index = INVALID_INDEX; 6740 6741 emit->clip_dist_out_index = INVALID_INDEX; 6742 emit->clip_dist_tmp_index = INVALID_INDEX; 6743 emit->clip_dist_so_index = INVALID_INDEX; 6744 emit->clip_vertex_out_index = INVALID_INDEX; 6745 6746 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { 6747 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; 6748 } 6749 6750 if (unit == PIPE_SHADER_FRAGMENT) { 6751 if (key->fs.light_twoside) { 6752 tokens = transform_fs_twoside(tokens); 6753 } 6754 if (key->fs.pstipple) { 6755 const struct tgsi_token *new_tokens = 6756 transform_fs_pstipple(emit, tokens); 6757 if (tokens != shader->tokens) { 6758 /* free the two-sided shader tokens */ 6759 tgsi_free_tokens(tokens); 6760 } 6761 tokens = new_tokens; 6762 } 6763 if (key->fs.aa_point) { 6764 tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index); 6765 } 6766 } 6767 6768 if (SVGA_DEBUG & DEBUG_TGSI) { 6769 debug_printf("#####################################\n"); 6770 debug_printf("### TGSI Shader %u\n", shader->id); 6771 tgsi_dump(tokens, 0); 6772 } 6773 6774 /** 6775 * Rescan the header if the token string is different from the one 6776 * included in the shader; otherwise, the header info is already up-to-date 6777 */ 6778 if (tokens != shader->tokens) { 6779 tgsi_scan_shader(tokens, &emit->info); 6780 } else { 6781 emit->info = shader->info; 6782 } 6783 6784 emit->num_outputs = emit->info.num_outputs; 6785 6786 if (unit == PIPE_SHADER_FRAGMENT) { 6787 /* Compute FS input remapping to match the output from VS/GS */ 6788 if (gs) { 6789 svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage); 6790 } else { 6791 assert(vs); 6792 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6793 } 6794 } else if (unit == PIPE_SHADER_GEOMETRY) { 6795 assert(vs); 6796 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6797 } 6798 6799 determine_clipping_mode(emit); 6800 6801 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) { 6802 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) { 6803 /* if there is stream output declarations associated 6804 * with this shader or the shader writes to ClipDistance 6805 * then reserve extra registers for the non-adjusted vertex position 6806 * and the ClipDistance shadow copy 6807 */ 6808 emit->vposition.so_index = emit->num_outputs++; 6809 6810 if (emit->clip_mode == CLIP_DISTANCE) { 6811 emit->clip_dist_so_index = emit->num_outputs++; 6812 if (emit->info.num_written_clipdistance > 4) 6813 emit->num_outputs++; 6814 } 6815 } 6816 } 6817 6818 /* 6819 * Do actual shader translation. 6820 */ 6821 if (!emit_vgpu10_header(emit)) { 6822 debug_printf("svga: emit VGPU10 header failed\n"); 6823 goto cleanup; 6824 } 6825 6826 if (!emit_vgpu10_instructions(emit, tokens)) { 6827 debug_printf("svga: emit VGPU10 instructions failed\n"); 6828 goto cleanup; 6829 } 6830 6831 if (!emit_vgpu10_tail(emit)) { 6832 debug_printf("svga: emit VGPU10 tail failed\n"); 6833 goto cleanup; 6834 } 6835 6836 if (emit->register_overflow) { 6837 goto cleanup; 6838 } 6839 6840 /* 6841 * Create, initialize the 'variant' object. 6842 */ 6843 variant = svga_new_shader_variant(svga); 6844 if (!variant) 6845 goto cleanup; 6846 6847 variant->shader = shader; 6848 variant->nr_tokens = emit_get_num_tokens(emit); 6849 variant->tokens = (const unsigned *)emit->buf; 6850 emit->buf = NULL; /* buffer is no longer owed by emitter context */ 6851 memcpy(&variant->key, key, sizeof(*key)); 6852 variant->id = UTIL_BITMASK_INVALID_INDEX; 6853 6854 /* The extra constant starting offset starts with the number of 6855 * shader constants declared in the shader. 6856 */ 6857 variant->extra_const_start = emit->num_shader_consts[0]; 6858 if (key->gs.wide_point) { 6859 /** 6860 * The extra constant added in the transformed shader 6861 * for inverse viewport scale is to be supplied by the driver. 6862 * So the extra constant starting offset needs to be reduced by 1. 6863 */ 6864 assert(variant->extra_const_start > 0); 6865 variant->extra_const_start--; 6866 } 6867 6868 variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; 6869 6870 /* If there was exactly one write to a fragment shader output register 6871 * and it came from a constant buffer, we know all fragments will have 6872 * the same color (except for blending). 6873 */ 6874 variant->constant_color_output = 6875 emit->constant_color_output && emit->num_output_writes == 1; 6876 6877 /** keep track in the variant if flat interpolation is used 6878 * for any of the varyings. 6879 */ 6880 variant->uses_flat_interp = emit->uses_flat_interp; 6881 6882 if (tokens != shader->tokens) { 6883 tgsi_free_tokens(tokens); 6884 } 6885 6886cleanup: 6887 free_emitter(emit); 6888 6889done: 6890 SVGA_STATS_TIME_POP(svga_sws(svga)); 6891 return variant; 6892} 6893