svga_tgsi_vgpu10.c revision 8cc9a8aa2a97ca9e7a36a993954a3480d44c13d3
1/********************************************************** 2 * Copyright 1998-2013 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26/** 27 * @file svga_tgsi_vgpu10.c 28 * 29 * TGSI -> VGPU10 shader translation. 30 * 31 * \author Mingcheng Chen 32 * \author Brian Paul 33 */ 34 35#include "pipe/p_compiler.h" 36#include "pipe/p_shader_tokens.h" 37#include "pipe/p_defines.h" 38#include "tgsi/tgsi_build.h" 39#include "tgsi/tgsi_dump.h" 40#include "tgsi/tgsi_info.h" 41#include "tgsi/tgsi_parse.h" 42#include "tgsi/tgsi_scan.h" 43#include "tgsi/tgsi_two_side.h" 44#include "tgsi/tgsi_aa_point.h" 45#include "tgsi/tgsi_util.h" 46#include "util/u_math.h" 47#include "util/u_memory.h" 48#include "util/u_bitmask.h" 49#include "util/u_debug.h" 50#include "util/u_pstipple.h" 51 52#include "svga_context.h" 53#include "svga_debug.h" 54#include "svga_link.h" 55#include "svga_shader.h" 56#include "svga_tgsi.h" 57 58#include "VGPU10ShaderTokens.h" 59 60 61#define INVALID_INDEX 99999 62#define MAX_INTERNAL_TEMPS 3 63#define MAX_SYSTEM_VALUES 4 64#define MAX_IMMEDIATE_COUNT \ 65 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) 66#define MAX_TEMP_ARRAYS 64 /* Enough? */ 67 68 69/** 70 * Clipping is complicated. There's four different cases which we 71 * handle during VS/GS shader translation: 72 */ 73enum clipping_mode 74{ 75 CLIP_NONE, /**< No clipping enabled */ 76 CLIP_LEGACY, /**< The shader has no clipping declarations or code but 77 * one or more user-defined clip planes are enabled. We 78 * generate extra code to emit clip distances. 79 */ 80 CLIP_DISTANCE, /**< The shader already declares clip distance output 81 * registers and has code to write to them. 82 */ 83 CLIP_VERTEX /**< The shader declares a clip vertex output register and 84 * has code that writes to the register. We convert the 85 * clipvertex position into one or more clip distances. 86 */ 87}; 88 89 90struct svga_shader_emitter_v10 91{ 92 /* The token output buffer */ 93 unsigned size; 94 char *buf; 95 char *ptr; 96 97 /* Information about the shader and state (does not change) */ 98 struct svga_compile_key key; 99 struct tgsi_shader_info info; 100 unsigned unit; 101 102 unsigned inst_start_token; 103 boolean discard_instruction; /**< throw away current instruction? */ 104 105 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; 106 unsigned num_immediates; /**< Number of immediates emitted */ 107 unsigned common_immediate_pos[8]; /**< literals for common immediates */ 108 unsigned num_common_immediates; 109 boolean immediates_emitted; 110 111 unsigned num_outputs; /**< include any extra outputs */ 112 /** The first extra output is reserved for 113 * non-adjusted vertex position for 114 * stream output purpose 115 */ 116 117 /* Temporary Registers */ 118 unsigned num_shader_temps; /**< num of temps used by original shader */ 119 unsigned internal_temp_count; /**< currently allocated internal temps */ 120 struct { 121 unsigned start, size; 122 } temp_arrays[MAX_TEMP_ARRAYS]; 123 unsigned num_temp_arrays; 124 125 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ 126 struct { 127 unsigned arrayId, index; 128 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ 129 130 /** Number of constants used by original shader for each constant buffer. 131 * The size should probably always match with that of svga_state.constbufs. 132 */ 133 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; 134 135 /* Samplers */ 136 unsigned num_samplers; 137 138 /* Address regs (really implemented with temps) */ 139 unsigned num_address_regs; 140 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; 141 142 /* Output register usage masks */ 143 ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; 144 145 /* To map TGSI system value index to VGPU shader input indexes */ 146 ubyte system_value_indexes[MAX_SYSTEM_VALUES]; 147 148 struct { 149 /* vertex position scale/translation */ 150 unsigned out_index; /**< the real position output reg */ 151 unsigned tmp_index; /**< the fake/temp position output reg */ 152 unsigned so_index; /**< the non-adjusted position output reg */ 153 unsigned prescale_scale_index, prescale_trans_index; 154 boolean need_prescale; 155 } vposition; 156 157 /* For vertex shaders only */ 158 struct { 159 /* viewport constant */ 160 unsigned viewport_index; 161 162 /* temp index of adjusted vertex attributes */ 163 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS]; 164 } vs; 165 166 /* For fragment shaders only */ 167 struct { 168 /* apha test */ 169 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */ 170 unsigned color_tmp_index; /**< fake/temp color output reg */ 171 unsigned alpha_ref_index; /**< immediate constant for alpha ref */ 172 173 /* front-face */ 174 unsigned face_input_index; /**< real fragment shader face reg (bool) */ 175 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ 176 177 unsigned pstipple_sampler_unit; 178 179 unsigned fragcoord_input_index; /**< real fragment position input reg */ 180 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ 181 } fs; 182 183 /* For geometry shaders only */ 184 struct { 185 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */ 186 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */ 187 unsigned input_size; /**< size of input arrays */ 188 unsigned prim_id_index; /**< primitive id register index */ 189 unsigned max_out_vertices; /**< maximum number of output vertices */ 190 } gs; 191 192 /* For vertex or geometry shaders */ 193 enum clipping_mode clip_mode; 194 unsigned clip_dist_out_index; /**< clip distance output register index */ 195 unsigned clip_dist_tmp_index; /**< clip distance temporary register */ 196 unsigned clip_dist_so_index; /**< clip distance shadow copy */ 197 198 /** Index of temporary holding the clipvertex coordinate */ 199 unsigned clip_vertex_out_index; /**< clip vertex output register index */ 200 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */ 201 202 /* user clip plane constant slot indexes */ 203 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; 204 205 unsigned num_output_writes; 206 boolean constant_color_output; 207 208 boolean uses_flat_interp; 209 210 /* For all shaders: const reg index for RECT coord scaling */ 211 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; 212 213 /* For all shaders: const reg index for texture buffer size */ 214 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; 215 216 /* VS/GS/FS Linkage info */ 217 struct shader_linkage linkage; 218 219 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ 220}; 221 222 223static boolean 224emit_post_helpers(struct svga_shader_emitter_v10 *emit); 225 226static boolean 227emit_vertex(struct svga_shader_emitter_v10 *emit, 228 const struct tgsi_full_instruction *inst); 229 230static char err_buf[128]; 231 232static boolean 233expand(struct svga_shader_emitter_v10 *emit) 234{ 235 char *new_buf; 236 unsigned newsize = emit->size * 2; 237 238 if (emit->buf != err_buf) 239 new_buf = REALLOC(emit->buf, emit->size, newsize); 240 else 241 new_buf = NULL; 242 243 if (!new_buf) { 244 emit->ptr = err_buf; 245 emit->buf = err_buf; 246 emit->size = sizeof(err_buf); 247 return FALSE; 248 } 249 250 emit->size = newsize; 251 emit->ptr = new_buf + (emit->ptr - emit->buf); 252 emit->buf = new_buf; 253 return TRUE; 254} 255 256/** 257 * Create and initialize a new svga_shader_emitter_v10 object. 258 */ 259static struct svga_shader_emitter_v10 * 260alloc_emitter(void) 261{ 262 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit)); 263 264 if (!emit) 265 return NULL; 266 267 /* to initialize the output buffer */ 268 emit->size = 512; 269 if (!expand(emit)) { 270 FREE(emit); 271 return NULL; 272 } 273 return emit; 274} 275 276/** 277 * Free an svga_shader_emitter_v10 object. 278 */ 279static void 280free_emitter(struct svga_shader_emitter_v10 *emit) 281{ 282 assert(emit); 283 FREE(emit->buf); /* will be NULL if translation succeeded */ 284 FREE(emit); 285} 286 287static inline boolean 288reserve(struct svga_shader_emitter_v10 *emit, 289 unsigned nr_dwords) 290{ 291 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) { 292 if (!expand(emit)) 293 return FALSE; 294 } 295 296 return TRUE; 297} 298 299static boolean 300emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword) 301{ 302 if (!reserve(emit, 1)) 303 return FALSE; 304 305 *(uint32 *)emit->ptr = dword; 306 emit->ptr += sizeof dword; 307 return TRUE; 308} 309 310static boolean 311emit_dwords(struct svga_shader_emitter_v10 *emit, 312 const uint32 *dwords, 313 unsigned nr) 314{ 315 if (!reserve(emit, nr)) 316 return FALSE; 317 318 memcpy(emit->ptr, dwords, nr * sizeof *dwords); 319 emit->ptr += nr * sizeof *dwords; 320 return TRUE; 321} 322 323/** Return the number of tokens in the emitter's buffer */ 324static unsigned 325emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit) 326{ 327 return (emit->ptr - emit->buf) / sizeof(unsigned); 328} 329 330 331/** 332 * Check for register overflow. If we overflow we'll set an 333 * error flag. This function can be called for register declarations 334 * or use as src/dst instruction operands. 335 * \param type register type. One of VGPU10_OPERAND_TYPE_x 336 or VGPU10_OPCODE_DCL_x 337 * \param index the register index 338 */ 339static void 340check_register_index(struct svga_shader_emitter_v10 *emit, 341 unsigned operandType, unsigned index) 342{ 343 bool overflow_before = emit->register_overflow; 344 345 switch (operandType) { 346 case VGPU10_OPERAND_TYPE_TEMP: 347 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP: 348 case VGPU10_OPCODE_DCL_TEMPS: 349 if (index >= VGPU10_MAX_TEMPS) { 350 emit->register_overflow = TRUE; 351 } 352 break; 353 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER: 354 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER: 355 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 356 emit->register_overflow = TRUE; 357 } 358 break; 359 case VGPU10_OPERAND_TYPE_INPUT: 360 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID: 361 case VGPU10_OPCODE_DCL_INPUT: 362 case VGPU10_OPCODE_DCL_INPUT_SGV: 363 case VGPU10_OPCODE_DCL_INPUT_SIV: 364 case VGPU10_OPCODE_DCL_INPUT_PS: 365 case VGPU10_OPCODE_DCL_INPUT_PS_SGV: 366 case VGPU10_OPCODE_DCL_INPUT_PS_SIV: 367 if ((emit->unit == PIPE_SHADER_VERTEX && 368 index >= VGPU10_MAX_VS_INPUTS) || 369 (emit->unit == PIPE_SHADER_GEOMETRY && 370 index >= VGPU10_MAX_GS_INPUTS) || 371 (emit->unit == PIPE_SHADER_FRAGMENT && 372 index >= VGPU10_MAX_FS_INPUTS)) { 373 emit->register_overflow = TRUE; 374 } 375 break; 376 case VGPU10_OPERAND_TYPE_OUTPUT: 377 case VGPU10_OPCODE_DCL_OUTPUT: 378 case VGPU10_OPCODE_DCL_OUTPUT_SGV: 379 case VGPU10_OPCODE_DCL_OUTPUT_SIV: 380 if ((emit->unit == PIPE_SHADER_VERTEX && 381 index >= VGPU10_MAX_VS_OUTPUTS) || 382 (emit->unit == PIPE_SHADER_GEOMETRY && 383 index >= VGPU10_MAX_GS_OUTPUTS) || 384 (emit->unit == PIPE_SHADER_FRAGMENT && 385 index >= VGPU10_MAX_FS_OUTPUTS)) { 386 emit->register_overflow = TRUE; 387 } 388 break; 389 case VGPU10_OPERAND_TYPE_SAMPLER: 390 case VGPU10_OPCODE_DCL_SAMPLER: 391 if (index >= VGPU10_MAX_SAMPLERS) { 392 emit->register_overflow = TRUE; 393 } 394 break; 395 case VGPU10_OPERAND_TYPE_RESOURCE: 396 case VGPU10_OPCODE_DCL_RESOURCE: 397 if (index >= VGPU10_MAX_RESOURCES) { 398 emit->register_overflow = TRUE; 399 } 400 break; 401 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: 402 if (index >= MAX_IMMEDIATE_COUNT) { 403 emit->register_overflow = TRUE; 404 } 405 break; 406 default: 407 assert(0); 408 ; /* nothing */ 409 } 410 411 if (emit->register_overflow && !overflow_before) { 412 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n", 413 operandType, index); 414 } 415} 416 417 418/** 419 * Examine misc state to determine the clipping mode. 420 */ 421static void 422determine_clipping_mode(struct svga_shader_emitter_v10 *emit) 423{ 424 if (emit->info.num_written_clipdistance > 0) { 425 emit->clip_mode = CLIP_DISTANCE; 426 } 427 else if (emit->info.writes_clipvertex) { 428 emit->clip_mode = CLIP_VERTEX; 429 } 430 else if (emit->key.clip_plane_enable) { 431 emit->clip_mode = CLIP_LEGACY; 432 } 433 else { 434 emit->clip_mode = CLIP_NONE; 435 } 436} 437 438 439/** 440 * For clip distance register declarations and clip distance register 441 * writes we need to mask the declaration usage or instruction writemask 442 * (respectively) against the set of the really-enabled clipping planes. 443 * 444 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables 445 * has a VS that writes to all 8 clip distance registers, but the plane enable 446 * flags are a subset of that. 447 * 448 * This function is used to apply the plane enable flags to the register 449 * declaration or instruction writemask. 450 * 451 * \param writemask the declaration usage mask or instruction writemask 452 * \param clip_reg_index which clip plane register is being declared/written. 453 * The legal values are 0 and 1 (two clip planes per 454 * register, for a total of 8 clip planes) 455 */ 456static unsigned 457apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit, 458 unsigned writemask, unsigned clip_reg_index) 459{ 460 unsigned shift; 461 462 assert(clip_reg_index < 2); 463 464 /* four clip planes per clip register: */ 465 shift = clip_reg_index * 4; 466 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf); 467 468 return writemask; 469} 470 471 472/** 473 * Translate gallium shader type into VGPU10 type. 474 */ 475static VGPU10_PROGRAM_TYPE 476translate_shader_type(unsigned type) 477{ 478 switch (type) { 479 case PIPE_SHADER_VERTEX: 480 return VGPU10_VERTEX_SHADER; 481 case PIPE_SHADER_GEOMETRY: 482 return VGPU10_GEOMETRY_SHADER; 483 case PIPE_SHADER_FRAGMENT: 484 return VGPU10_PIXEL_SHADER; 485 default: 486 assert(!"Unexpected shader type"); 487 return VGPU10_VERTEX_SHADER; 488 } 489} 490 491 492/** 493 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x 494 * Note: we only need to translate the opcodes for "simple" instructions, 495 * as seen below. All other opcodes are handled/translated specially. 496 */ 497static VGPU10_OPCODE_TYPE 498translate_opcode(unsigned opcode) 499{ 500 switch (opcode) { 501 case TGSI_OPCODE_MOV: 502 return VGPU10_OPCODE_MOV; 503 case TGSI_OPCODE_MUL: 504 return VGPU10_OPCODE_MUL; 505 case TGSI_OPCODE_ADD: 506 return VGPU10_OPCODE_ADD; 507 case TGSI_OPCODE_DP3: 508 return VGPU10_OPCODE_DP3; 509 case TGSI_OPCODE_DP4: 510 return VGPU10_OPCODE_DP4; 511 case TGSI_OPCODE_MIN: 512 return VGPU10_OPCODE_MIN; 513 case TGSI_OPCODE_MAX: 514 return VGPU10_OPCODE_MAX; 515 case TGSI_OPCODE_MAD: 516 return VGPU10_OPCODE_MAD; 517 case TGSI_OPCODE_SQRT: 518 return VGPU10_OPCODE_SQRT; 519 case TGSI_OPCODE_FRC: 520 return VGPU10_OPCODE_FRC; 521 case TGSI_OPCODE_FLR: 522 return VGPU10_OPCODE_ROUND_NI; 523 case TGSI_OPCODE_FSEQ: 524 return VGPU10_OPCODE_EQ; 525 case TGSI_OPCODE_FSGE: 526 return VGPU10_OPCODE_GE; 527 case TGSI_OPCODE_FSNE: 528 return VGPU10_OPCODE_NE; 529 case TGSI_OPCODE_DDX: 530 return VGPU10_OPCODE_DERIV_RTX; 531 case TGSI_OPCODE_DDY: 532 return VGPU10_OPCODE_DERIV_RTY; 533 case TGSI_OPCODE_RET: 534 return VGPU10_OPCODE_RET; 535 case TGSI_OPCODE_DIV: 536 return VGPU10_OPCODE_DIV; 537 case TGSI_OPCODE_IDIV: 538 return VGPU10_OPCODE_IDIV; 539 case TGSI_OPCODE_DP2: 540 return VGPU10_OPCODE_DP2; 541 case TGSI_OPCODE_BRK: 542 return VGPU10_OPCODE_BREAK; 543 case TGSI_OPCODE_IF: 544 return VGPU10_OPCODE_IF; 545 case TGSI_OPCODE_ELSE: 546 return VGPU10_OPCODE_ELSE; 547 case TGSI_OPCODE_ENDIF: 548 return VGPU10_OPCODE_ENDIF; 549 case TGSI_OPCODE_CEIL: 550 return VGPU10_OPCODE_ROUND_PI; 551 case TGSI_OPCODE_I2F: 552 return VGPU10_OPCODE_ITOF; 553 case TGSI_OPCODE_NOT: 554 return VGPU10_OPCODE_NOT; 555 case TGSI_OPCODE_TRUNC: 556 return VGPU10_OPCODE_ROUND_Z; 557 case TGSI_OPCODE_SHL: 558 return VGPU10_OPCODE_ISHL; 559 case TGSI_OPCODE_AND: 560 return VGPU10_OPCODE_AND; 561 case TGSI_OPCODE_OR: 562 return VGPU10_OPCODE_OR; 563 case TGSI_OPCODE_XOR: 564 return VGPU10_OPCODE_XOR; 565 case TGSI_OPCODE_CONT: 566 return VGPU10_OPCODE_CONTINUE; 567 case TGSI_OPCODE_EMIT: 568 return VGPU10_OPCODE_EMIT; 569 case TGSI_OPCODE_ENDPRIM: 570 return VGPU10_OPCODE_CUT; 571 case TGSI_OPCODE_BGNLOOP: 572 return VGPU10_OPCODE_LOOP; 573 case TGSI_OPCODE_ENDLOOP: 574 return VGPU10_OPCODE_ENDLOOP; 575 case TGSI_OPCODE_ENDSUB: 576 return VGPU10_OPCODE_RET; 577 case TGSI_OPCODE_NOP: 578 return VGPU10_OPCODE_NOP; 579 case TGSI_OPCODE_BREAKC: 580 return VGPU10_OPCODE_BREAKC; 581 case TGSI_OPCODE_END: 582 return VGPU10_OPCODE_RET; 583 case TGSI_OPCODE_F2I: 584 return VGPU10_OPCODE_FTOI; 585 case TGSI_OPCODE_IMAX: 586 return VGPU10_OPCODE_IMAX; 587 case TGSI_OPCODE_IMIN: 588 return VGPU10_OPCODE_IMIN; 589 case TGSI_OPCODE_UDIV: 590 case TGSI_OPCODE_UMOD: 591 case TGSI_OPCODE_MOD: 592 return VGPU10_OPCODE_UDIV; 593 case TGSI_OPCODE_IMUL_HI: 594 return VGPU10_OPCODE_IMUL; 595 case TGSI_OPCODE_INEG: 596 return VGPU10_OPCODE_INEG; 597 case TGSI_OPCODE_ISHR: 598 return VGPU10_OPCODE_ISHR; 599 case TGSI_OPCODE_ISGE: 600 return VGPU10_OPCODE_IGE; 601 case TGSI_OPCODE_ISLT: 602 return VGPU10_OPCODE_ILT; 603 case TGSI_OPCODE_F2U: 604 return VGPU10_OPCODE_FTOU; 605 case TGSI_OPCODE_UADD: 606 return VGPU10_OPCODE_IADD; 607 case TGSI_OPCODE_U2F: 608 return VGPU10_OPCODE_UTOF; 609 case TGSI_OPCODE_UCMP: 610 return VGPU10_OPCODE_MOVC; 611 case TGSI_OPCODE_UMAD: 612 return VGPU10_OPCODE_UMAD; 613 case TGSI_OPCODE_UMAX: 614 return VGPU10_OPCODE_UMAX; 615 case TGSI_OPCODE_UMIN: 616 return VGPU10_OPCODE_UMIN; 617 case TGSI_OPCODE_UMUL: 618 case TGSI_OPCODE_UMUL_HI: 619 return VGPU10_OPCODE_UMUL; 620 case TGSI_OPCODE_USEQ: 621 return VGPU10_OPCODE_IEQ; 622 case TGSI_OPCODE_USGE: 623 return VGPU10_OPCODE_UGE; 624 case TGSI_OPCODE_USHR: 625 return VGPU10_OPCODE_USHR; 626 case TGSI_OPCODE_USLT: 627 return VGPU10_OPCODE_ULT; 628 case TGSI_OPCODE_USNE: 629 return VGPU10_OPCODE_INE; 630 case TGSI_OPCODE_SWITCH: 631 return VGPU10_OPCODE_SWITCH; 632 case TGSI_OPCODE_CASE: 633 return VGPU10_OPCODE_CASE; 634 case TGSI_OPCODE_DEFAULT: 635 return VGPU10_OPCODE_DEFAULT; 636 case TGSI_OPCODE_ENDSWITCH: 637 return VGPU10_OPCODE_ENDSWITCH; 638 case TGSI_OPCODE_FSLT: 639 return VGPU10_OPCODE_LT; 640 case TGSI_OPCODE_ROUND: 641 return VGPU10_OPCODE_ROUND_NE; 642 default: 643 assert(!"Unexpected TGSI opcode in translate_opcode()"); 644 return VGPU10_OPCODE_NOP; 645 } 646} 647 648 649/** 650 * Translate a TGSI register file type into a VGPU10 operand type. 651 * \param array is the TGSI_FILE_TEMPORARY register an array? 652 */ 653static VGPU10_OPERAND_TYPE 654translate_register_file(enum tgsi_file_type file, boolean array) 655{ 656 switch (file) { 657 case TGSI_FILE_CONSTANT: 658 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 659 case TGSI_FILE_INPUT: 660 return VGPU10_OPERAND_TYPE_INPUT; 661 case TGSI_FILE_OUTPUT: 662 return VGPU10_OPERAND_TYPE_OUTPUT; 663 case TGSI_FILE_TEMPORARY: 664 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP 665 : VGPU10_OPERAND_TYPE_TEMP; 666 case TGSI_FILE_IMMEDIATE: 667 /* all immediates are 32-bit values at this time so 668 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time. 669 */ 670 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER; 671 case TGSI_FILE_SAMPLER: 672 return VGPU10_OPERAND_TYPE_SAMPLER; 673 case TGSI_FILE_SYSTEM_VALUE: 674 return VGPU10_OPERAND_TYPE_INPUT; 675 676 /* XXX TODO more cases to finish */ 677 678 default: 679 assert(!"Bad tgsi register file!"); 680 return VGPU10_OPERAND_TYPE_NULL; 681 } 682} 683 684 685/** 686 * Emit a null dst register 687 */ 688static void 689emit_null_dst_register(struct svga_shader_emitter_v10 *emit) 690{ 691 VGPU10OperandToken0 operand; 692 693 operand.value = 0; 694 operand.operandType = VGPU10_OPERAND_TYPE_NULL; 695 operand.numComponents = VGPU10_OPERAND_0_COMPONENT; 696 697 emit_dword(emit, operand.value); 698} 699 700 701/** 702 * If the given register is a temporary, return the array ID. 703 * Else return zero. 704 */ 705static unsigned 706get_temp_array_id(const struct svga_shader_emitter_v10 *emit, 707 unsigned file, unsigned index) 708{ 709 if (file == TGSI_FILE_TEMPORARY) { 710 return emit->temp_map[index].arrayId; 711 } 712 else { 713 return 0; 714 } 715} 716 717 718/** 719 * If the given register is a temporary, convert the index from a TGSI 720 * TEMPORARY index to a VGPU10 temp index. 721 */ 722static unsigned 723remap_temp_index(const struct svga_shader_emitter_v10 *emit, 724 unsigned file, unsigned index) 725{ 726 if (file == TGSI_FILE_TEMPORARY) { 727 return emit->temp_map[index].index; 728 } 729 else { 730 return index; 731 } 732} 733 734 735/** 736 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc). 737 * Note: the operandType field must already be initialized. 738 */ 739static VGPU10OperandToken0 740setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, 741 VGPU10OperandToken0 operand0, 742 unsigned file, 743 boolean indirect, boolean index2D, 744 unsigned tempArrayID) 745{ 746 unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_0D; 747 748 /* 749 * Compute index dimensions 750 */ 751 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 || 752 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 753 /* there's no swizzle for in-line immediates */ 754 indexDim = VGPU10_OPERAND_INDEX_0D; 755 assert(operand0.selectionMode == 0); 756 } 757 else { 758 if (index2D || 759 tempArrayID > 0 || 760 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 761 indexDim = VGPU10_OPERAND_INDEX_2D; 762 } 763 else { 764 indexDim = VGPU10_OPERAND_INDEX_1D; 765 } 766 } 767 768 /* 769 * Compute index representations (immediate, relative, etc). 770 */ 771 if (tempArrayID > 0) { 772 assert(file == TGSI_FILE_TEMPORARY); 773 /* First index is the array ID, second index is the array element */ 774 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 775 if (indirect) { 776 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 777 } 778 else { 779 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 780 } 781 } 782 else if (indirect) { 783 if (file == TGSI_FILE_CONSTANT) { 784 /* index[0] indicates which constant buffer while index[1] indicates 785 * the position in the constant buffer. 786 */ 787 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 788 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 789 } 790 else { 791 /* All other register files are 1-dimensional */ 792 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 793 } 794 } 795 else { 796 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 797 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 798 } 799 800 operand0.indexDimension = indexDim; 801 operand0.index0Representation = index0Rep; 802 operand0.index1Representation = index1Rep; 803 804 return operand0; 805} 806 807 808/** 809 * Emit the operand for expressing an address register for indirect indexing. 810 * Note that the address register is really just a temp register. 811 * \param addr_reg_index which address register to use 812 */ 813static void 814emit_indirect_register(struct svga_shader_emitter_v10 *emit, 815 unsigned addr_reg_index) 816{ 817 unsigned tmp_reg_index; 818 VGPU10OperandToken0 operand0; 819 820 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS); 821 822 tmp_reg_index = emit->address_reg_index[addr_reg_index]; 823 824 /* operand0 is a simple temporary register, selecting one component */ 825 operand0.value = 0; 826 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP; 827 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 828 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 829 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 830 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 831 operand0.swizzleX = 0; 832 operand0.swizzleY = 1; 833 operand0.swizzleZ = 2; 834 operand0.swizzleW = 3; 835 836 emit_dword(emit, operand0.value); 837 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index)); 838} 839 840 841/** 842 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens. 843 * \param emit the emitter context 844 * \param reg the TGSI dst register to translate 845 */ 846static void 847emit_dst_register(struct svga_shader_emitter_v10 *emit, 848 const struct tgsi_full_dst_register *reg) 849{ 850 unsigned file = reg->Register.File; 851 unsigned index = reg->Register.Index; 852 const unsigned sem_name = emit->info.output_semantic_name[index]; 853 const unsigned sem_index = emit->info.output_semantic_index[index]; 854 unsigned writemask = reg->Register.WriteMask; 855 const unsigned indirect = reg->Register.Indirect; 856 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 857 const unsigned index2d = reg->Register.Dimension; 858 VGPU10OperandToken0 operand0; 859 860 if (file == TGSI_FILE_OUTPUT) { 861 if (emit->unit == PIPE_SHADER_VERTEX || 862 emit->unit == PIPE_SHADER_GEOMETRY) { 863 if (index == emit->vposition.out_index && 864 emit->vposition.tmp_index != INVALID_INDEX) { 865 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the 866 * vertex position result in a temporary so that we can modify 867 * it in the post_helper() code. 868 */ 869 file = TGSI_FILE_TEMPORARY; 870 index = emit->vposition.tmp_index; 871 } 872 else if (sem_name == TGSI_SEMANTIC_CLIPDIST && 873 emit->clip_dist_tmp_index != INVALID_INDEX) { 874 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. 875 * We store the clip distance in a temporary first, then 876 * we'll copy it to the shadow copy and to CLIPDIST with the 877 * enabled planes mask in emit_clip_distance_instructions(). 878 */ 879 file = TGSI_FILE_TEMPORARY; 880 index = emit->clip_dist_tmp_index + sem_index; 881 } 882 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && 883 emit->clip_vertex_tmp_index != INVALID_INDEX) { 884 /* replace the CLIPVERTEX output register with a temporary */ 885 assert(emit->clip_mode == CLIP_VERTEX); 886 assert(sem_index == 0); 887 file = TGSI_FILE_TEMPORARY; 888 index = emit->clip_vertex_tmp_index; 889 } 890 } 891 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 892 if (sem_name == TGSI_SEMANTIC_POSITION) { 893 /* Fragment depth output register */ 894 operand0.value = 0; 895 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 896 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 897 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 898 emit_dword(emit, operand0.value); 899 return; 900 } 901 else if (index == emit->fs.color_out_index[0] && 902 emit->fs.color_tmp_index != INVALID_INDEX) { 903 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the 904 * fragment color result in a temporary so that we can read it 905 * it in the post_helper() code. 906 */ 907 file = TGSI_FILE_TEMPORARY; 908 index = emit->fs.color_tmp_index; 909 } 910 else { 911 /* Typically, for fragment shaders, the output register index 912 * matches the color semantic index. But not when we write to 913 * the fragment depth register. In that case, OUT[0] will be 914 * fragdepth and OUT[1] will be the 0th color output. We need 915 * to use the semantic index for color outputs. 916 */ 917 assert(sem_name == TGSI_SEMANTIC_COLOR); 918 index = emit->info.output_semantic_index[index]; 919 920 emit->num_output_writes++; 921 } 922 } 923 } 924 925 /* init operand tokens to all zero */ 926 operand0.value = 0; 927 928 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 929 930 /* the operand has a writemask */ 931 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 932 933 /* Which of the four dest components to write to. Note that we can use a 934 * simple assignment here since TGSI writemasks match VGPU10 writemasks. 935 */ 936 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X); 937 operand0.mask = writemask; 938 939 /* translate TGSI register file type to VGPU10 operand type */ 940 operand0.operandType = translate_register_file(file, tempArrayId > 0); 941 942 check_register_index(emit, operand0.operandType, index); 943 944 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 945 index2d, tempArrayId); 946 947 /* Emit tokens */ 948 emit_dword(emit, operand0.value); 949 if (tempArrayId > 0) { 950 emit_dword(emit, tempArrayId); 951 } 952 953 emit_dword(emit, remap_temp_index(emit, file, index)); 954 955 if (indirect) { 956 emit_indirect_register(emit, reg->Indirect.Index); 957 } 958} 959 960 961/** 962 * Translate a src register of a TGSI instruction and emit VGPU10 tokens. 963 */ 964static void 965emit_src_register(struct svga_shader_emitter_v10 *emit, 966 const struct tgsi_full_src_register *reg) 967{ 968 unsigned file = reg->Register.File; 969 unsigned index = reg->Register.Index; 970 const unsigned indirect = reg->Register.Indirect; 971 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 972 const unsigned index2d = reg->Register.Dimension; 973 const unsigned swizzleX = reg->Register.SwizzleX; 974 const unsigned swizzleY = reg->Register.SwizzleY; 975 const unsigned swizzleZ = reg->Register.SwizzleZ; 976 const unsigned swizzleW = reg->Register.SwizzleW; 977 const unsigned absolute = reg->Register.Absolute; 978 const unsigned negate = reg->Register.Negate; 979 bool is_prim_id = FALSE; 980 981 VGPU10OperandToken0 operand0; 982 VGPU10OperandToken1 operand1; 983 984 if (emit->unit == PIPE_SHADER_FRAGMENT && 985 file == TGSI_FILE_INPUT) { 986 if (index == emit->fs.face_input_index) { 987 /* Replace INPUT[FACE] with TEMP[FACE] */ 988 file = TGSI_FILE_TEMPORARY; 989 index = emit->fs.face_tmp_index; 990 } 991 else if (index == emit->fs.fragcoord_input_index) { 992 /* Replace INPUT[POSITION] with TEMP[POSITION] */ 993 file = TGSI_FILE_TEMPORARY; 994 index = emit->fs.fragcoord_tmp_index; 995 } 996 else { 997 /* We remap fragment shader inputs to that FS input indexes 998 * match up with VS/GS output indexes. 999 */ 1000 index = emit->linkage.input_map[index]; 1001 } 1002 } 1003 else if (emit->unit == PIPE_SHADER_GEOMETRY && 1004 file == TGSI_FILE_INPUT) { 1005 is_prim_id = (index == emit->gs.prim_id_index); 1006 index = emit->linkage.input_map[index]; 1007 } 1008 else if (emit->unit == PIPE_SHADER_VERTEX) { 1009 if (file == TGSI_FILE_INPUT) { 1010 /* if input is adjusted... */ 1011 if ((emit->key.vs.adjust_attrib_w_1 | 1012 emit->key.vs.adjust_attrib_itof | 1013 emit->key.vs.adjust_attrib_utof | 1014 emit->key.vs.attrib_is_bgra | 1015 emit->key.vs.attrib_puint_to_snorm | 1016 emit->key.vs.attrib_puint_to_uscaled | 1017 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) { 1018 file = TGSI_FILE_TEMPORARY; 1019 index = emit->vs.adjusted_input[index]; 1020 } 1021 } 1022 else if (file == TGSI_FILE_SYSTEM_VALUE) { 1023 assert(index < Elements(emit->system_value_indexes)); 1024 index = emit->system_value_indexes[index]; 1025 } 1026 } 1027 1028 operand0.value = operand1.value = 0; 1029 1030 if (is_prim_id) { 1031 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 1032 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 1033 } 1034 else { 1035 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1036 operand0.operandType = translate_register_file(file, tempArrayId > 0); 1037 } 1038 1039 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 1040 index2d, tempArrayId); 1041 1042 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 && 1043 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 1044 /* there's no swizzle for in-line immediates */ 1045 if (swizzleX == swizzleY && 1046 swizzleX == swizzleZ && 1047 swizzleX == swizzleW) { 1048 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1049 } 1050 else { 1051 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1052 } 1053 1054 operand0.swizzleX = swizzleX; 1055 operand0.swizzleY = swizzleY; 1056 operand0.swizzleZ = swizzleZ; 1057 operand0.swizzleW = swizzleW; 1058 1059 if (absolute || negate) { 1060 operand0.extended = 1; 1061 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER; 1062 if (absolute && !negate) 1063 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS; 1064 if (!absolute && negate) 1065 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG; 1066 if (absolute && negate) 1067 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG; 1068 } 1069 } 1070 1071 /* Emit the operand tokens */ 1072 emit_dword(emit, operand0.value); 1073 if (operand0.extended) 1074 emit_dword(emit, operand1.value); 1075 1076 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) { 1077 /* Emit the four float/int in-line immediate values */ 1078 unsigned *c; 1079 assert(index < Elements(emit->immediates)); 1080 assert(file == TGSI_FILE_IMMEDIATE); 1081 assert(swizzleX < 4); 1082 assert(swizzleY < 4); 1083 assert(swizzleZ < 4); 1084 assert(swizzleW < 4); 1085 c = (unsigned *) emit->immediates[index]; 1086 emit_dword(emit, c[swizzleX]); 1087 emit_dword(emit, c[swizzleY]); 1088 emit_dword(emit, c[swizzleZ]); 1089 emit_dword(emit, c[swizzleW]); 1090 } 1091 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) { 1092 /* Emit the register index(es) */ 1093 if (index2d || 1094 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 1095 emit_dword(emit, reg->Dimension.Index); 1096 } 1097 1098 if (tempArrayId > 0) { 1099 emit_dword(emit, tempArrayId); 1100 } 1101 1102 emit_dword(emit, remap_temp_index(emit, file, index)); 1103 1104 if (indirect) { 1105 emit_indirect_register(emit, reg->Indirect.Index); 1106 } 1107 } 1108} 1109 1110 1111/** 1112 * Emit a resource operand (for use with a SAMPLE instruction). 1113 */ 1114static void 1115emit_resource_register(struct svga_shader_emitter_v10 *emit, 1116 unsigned resource_number) 1117{ 1118 VGPU10OperandToken0 operand0; 1119 1120 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number); 1121 1122 /* init */ 1123 operand0.value = 0; 1124 1125 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 1126 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1127 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1128 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1129 operand0.swizzleX = VGPU10_COMPONENT_X; 1130 operand0.swizzleY = VGPU10_COMPONENT_Y; 1131 operand0.swizzleZ = VGPU10_COMPONENT_Z; 1132 operand0.swizzleW = VGPU10_COMPONENT_W; 1133 1134 emit_dword(emit, operand0.value); 1135 emit_dword(emit, resource_number); 1136} 1137 1138 1139/** 1140 * Emit a sampler operand (for use with a SAMPLE instruction). 1141 */ 1142static void 1143emit_sampler_register(struct svga_shader_emitter_v10 *emit, 1144 unsigned sampler_number) 1145{ 1146 VGPU10OperandToken0 operand0; 1147 1148 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); 1149 1150 /* init */ 1151 operand0.value = 0; 1152 1153 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 1154 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1155 1156 emit_dword(emit, operand0.value); 1157 emit_dword(emit, sampler_number); 1158} 1159 1160 1161/** 1162 * Emit an operand which reads the IS_FRONT_FACING register. 1163 */ 1164static void 1165emit_face_register(struct svga_shader_emitter_v10 *emit) 1166{ 1167 VGPU10OperandToken0 operand0; 1168 unsigned index = emit->linkage.input_map[emit->fs.face_input_index]; 1169 1170 /* init */ 1171 operand0.value = 0; 1172 1173 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT; 1174 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1175 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1176 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1177 1178 operand0.swizzleX = VGPU10_COMPONENT_X; 1179 operand0.swizzleY = VGPU10_COMPONENT_X; 1180 operand0.swizzleZ = VGPU10_COMPONENT_X; 1181 operand0.swizzleW = VGPU10_COMPONENT_X; 1182 1183 emit_dword(emit, operand0.value); 1184 emit_dword(emit, index); 1185} 1186 1187 1188/** 1189 * Emit the token for a VGPU10 opcode. 1190 * \param saturate clamp result to [0,1]? 1191 */ 1192static void 1193emit_opcode(struct svga_shader_emitter_v10 *emit, 1194 unsigned vgpu10_opcode, boolean saturate) 1195{ 1196 VGPU10OpcodeToken0 token0; 1197 1198 token0.value = 0; /* init all fields to zero */ 1199 token0.opcodeType = vgpu10_opcode; 1200 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1201 token0.saturate = saturate; 1202 1203 emit_dword(emit, token0.value); 1204} 1205 1206 1207/** 1208 * Emit the token for a VGPU10 resinfo instruction. 1209 * \param modifier return type modifier, _uint or _rcpFloat. 1210 * TODO: We may want to remove this parameter if it will 1211 * only ever be used as _uint. 1212 */ 1213static void 1214emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit, 1215 VGPU10_RESINFO_RETURN_TYPE modifier) 1216{ 1217 VGPU10OpcodeToken0 token0; 1218 1219 token0.value = 0; /* init all fields to zero */ 1220 token0.opcodeType = VGPU10_OPCODE_RESINFO; 1221 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1222 token0.resinfoReturnType = modifier; 1223 1224 emit_dword(emit, token0.value); 1225} 1226 1227 1228/** 1229 * Emit opcode tokens for a texture sample instruction. Texture instructions 1230 * can be rather complicated (texel offsets, etc) so we have this specialized 1231 * function. 1232 */ 1233static void 1234emit_sample_opcode(struct svga_shader_emitter_v10 *emit, 1235 unsigned vgpu10_opcode, boolean saturate, 1236 const int offsets[3]) 1237{ 1238 VGPU10OpcodeToken0 token0; 1239 VGPU10OpcodeToken1 token1; 1240 1241 token0.value = 0; /* init all fields to zero */ 1242 token0.opcodeType = vgpu10_opcode; 1243 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1244 token0.saturate = saturate; 1245 1246 if (offsets[0] || offsets[1] || offsets[2]) { 1247 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1248 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1249 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1250 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1251 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1252 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1253 1254 token0.extended = 1; 1255 token1.value = 0; 1256 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS; 1257 token1.offsetU = offsets[0]; 1258 token1.offsetV = offsets[1]; 1259 token1.offsetW = offsets[2]; 1260 } 1261 1262 emit_dword(emit, token0.value); 1263 if (token0.extended) { 1264 emit_dword(emit, token1.value); 1265 } 1266} 1267 1268 1269/** 1270 * Emit a DISCARD opcode token. 1271 * If nonzero is set, we'll discard the fragment if the X component is not 0. 1272 * Otherwise, we'll discard the fragment if the X component is 0. 1273 */ 1274static void 1275emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero) 1276{ 1277 VGPU10OpcodeToken0 opcode0; 1278 1279 opcode0.value = 0; 1280 opcode0.opcodeType = VGPU10_OPCODE_DISCARD; 1281 if (nonzero) 1282 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 1283 1284 emit_dword(emit, opcode0.value); 1285} 1286 1287 1288/** 1289 * We need to call this before we begin emitting a VGPU10 instruction. 1290 */ 1291static void 1292begin_emit_instruction(struct svga_shader_emitter_v10 *emit) 1293{ 1294 assert(emit->inst_start_token == 0); 1295 /* Save location of the instruction's VGPU10OpcodeToken0 token. 1296 * Note, we can't save a pointer because it would become invalid if 1297 * we have to realloc the output buffer. 1298 */ 1299 emit->inst_start_token = emit_get_num_tokens(emit); 1300} 1301 1302 1303/** 1304 * We need to call this after we emit the last token of a VGPU10 instruction. 1305 * This function patches in the opcode token's instructionLength field. 1306 */ 1307static void 1308end_emit_instruction(struct svga_shader_emitter_v10 *emit) 1309{ 1310 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; 1311 unsigned inst_length; 1312 1313 assert(emit->inst_start_token > 0); 1314 1315 if (emit->discard_instruction) { 1316 /* Back up the emit->ptr to where this instruction started so 1317 * that we discard the current instruction. 1318 */ 1319 emit->ptr = (char *) (tokens + emit->inst_start_token); 1320 } 1321 else { 1322 /* Compute instruction length and patch that into the start of 1323 * the instruction. 1324 */ 1325 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token; 1326 1327 assert(inst_length > 0); 1328 1329 tokens[emit->inst_start_token].instructionLength = inst_length; 1330 } 1331 1332 emit->inst_start_token = 0; /* reset to zero for error checking */ 1333 emit->discard_instruction = FALSE; 1334} 1335 1336 1337/** 1338 * Return index for a free temporary register. 1339 */ 1340static unsigned 1341get_temp_index(struct svga_shader_emitter_v10 *emit) 1342{ 1343 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS); 1344 return emit->num_shader_temps + emit->internal_temp_count++; 1345} 1346 1347 1348/** 1349 * Release the temporaries which were generated by get_temp_index(). 1350 */ 1351static void 1352free_temp_indexes(struct svga_shader_emitter_v10 *emit) 1353{ 1354 emit->internal_temp_count = 0; 1355} 1356 1357 1358/** 1359 * Create a tgsi_full_src_register. 1360 */ 1361static struct tgsi_full_src_register 1362make_src_reg(unsigned file, unsigned index) 1363{ 1364 struct tgsi_full_src_register reg; 1365 1366 memset(®, 0, sizeof(reg)); 1367 reg.Register.File = file; 1368 reg.Register.Index = index; 1369 reg.Register.SwizzleX = TGSI_SWIZZLE_X; 1370 reg.Register.SwizzleY = TGSI_SWIZZLE_Y; 1371 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1372 reg.Register.SwizzleW = TGSI_SWIZZLE_W; 1373 return reg; 1374} 1375 1376 1377/** 1378 * Create a tgsi_full_src_register for a temporary. 1379 */ 1380static struct tgsi_full_src_register 1381make_src_temp_reg(unsigned index) 1382{ 1383 return make_src_reg(TGSI_FILE_TEMPORARY, index); 1384} 1385 1386 1387/** 1388 * Create a tgsi_full_src_register for a constant. 1389 */ 1390static struct tgsi_full_src_register 1391make_src_const_reg(unsigned index) 1392{ 1393 return make_src_reg(TGSI_FILE_CONSTANT, index); 1394} 1395 1396 1397/** 1398 * Create a tgsi_full_src_register for an immediate constant. 1399 */ 1400static struct tgsi_full_src_register 1401make_src_immediate_reg(unsigned index) 1402{ 1403 return make_src_reg(TGSI_FILE_IMMEDIATE, index); 1404} 1405 1406 1407/** 1408 * Create a tgsi_full_dst_register. 1409 */ 1410static struct tgsi_full_dst_register 1411make_dst_reg(unsigned file, unsigned index) 1412{ 1413 struct tgsi_full_dst_register reg; 1414 1415 memset(®, 0, sizeof(reg)); 1416 reg.Register.File = file; 1417 reg.Register.Index = index; 1418 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW; 1419 return reg; 1420} 1421 1422 1423/** 1424 * Create a tgsi_full_dst_register for a temporary. 1425 */ 1426static struct tgsi_full_dst_register 1427make_dst_temp_reg(unsigned index) 1428{ 1429 return make_dst_reg(TGSI_FILE_TEMPORARY, index); 1430} 1431 1432 1433/** 1434 * Create a tgsi_full_dst_register for an output. 1435 */ 1436static struct tgsi_full_dst_register 1437make_dst_output_reg(unsigned index) 1438{ 1439 return make_dst_reg(TGSI_FILE_OUTPUT, index); 1440} 1441 1442 1443/** 1444 * Create negated tgsi_full_src_register. 1445 */ 1446static struct tgsi_full_src_register 1447negate_src(const struct tgsi_full_src_register *reg) 1448{ 1449 struct tgsi_full_src_register neg = *reg; 1450 neg.Register.Negate = !reg->Register.Negate; 1451 return neg; 1452} 1453 1454/** 1455 * Create absolute value of a tgsi_full_src_register. 1456 */ 1457static struct tgsi_full_src_register 1458absolute_src(const struct tgsi_full_src_register *reg) 1459{ 1460 struct tgsi_full_src_register absolute = *reg; 1461 absolute.Register.Absolute = 1; 1462 return absolute; 1463} 1464 1465 1466/** Return the named swizzle term from the src register */ 1467static inline unsigned 1468get_swizzle(const struct tgsi_full_src_register *reg, unsigned term) 1469{ 1470 switch (term) { 1471 case TGSI_SWIZZLE_X: 1472 return reg->Register.SwizzleX; 1473 case TGSI_SWIZZLE_Y: 1474 return reg->Register.SwizzleY; 1475 case TGSI_SWIZZLE_Z: 1476 return reg->Register.SwizzleZ; 1477 case TGSI_SWIZZLE_W: 1478 return reg->Register.SwizzleW; 1479 default: 1480 assert(!"Bad swizzle"); 1481 return TGSI_SWIZZLE_X; 1482 } 1483} 1484 1485 1486/** 1487 * Create swizzled tgsi_full_src_register. 1488 */ 1489static struct tgsi_full_src_register 1490swizzle_src(const struct tgsi_full_src_register *reg, 1491 unsigned swizzleX, unsigned swizzleY, 1492 unsigned swizzleZ, unsigned swizzleW) 1493{ 1494 struct tgsi_full_src_register swizzled = *reg; 1495 /* Note: we swizzle the current swizzle */ 1496 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX); 1497 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY); 1498 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ); 1499 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW); 1500 return swizzled; 1501} 1502 1503 1504/** 1505 * Create swizzled tgsi_full_src_register where all the swizzle 1506 * terms are the same. 1507 */ 1508static struct tgsi_full_src_register 1509scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle) 1510{ 1511 struct tgsi_full_src_register swizzled = *reg; 1512 /* Note: we swizzle the current swizzle */ 1513 swizzled.Register.SwizzleX = 1514 swizzled.Register.SwizzleY = 1515 swizzled.Register.SwizzleZ = 1516 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle); 1517 return swizzled; 1518} 1519 1520 1521/** 1522 * Create new tgsi_full_dst_register with writemask. 1523 * \param mask bitmask of TGSI_WRITEMASK_[XYZW] 1524 */ 1525static struct tgsi_full_dst_register 1526writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask) 1527{ 1528 struct tgsi_full_dst_register masked = *reg; 1529 masked.Register.WriteMask = mask; 1530 return masked; 1531} 1532 1533 1534/** 1535 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW. 1536 */ 1537static boolean 1538same_swizzle_terms(const struct tgsi_full_src_register *reg) 1539{ 1540 return (reg->Register.SwizzleX == reg->Register.SwizzleY && 1541 reg->Register.SwizzleY == reg->Register.SwizzleZ && 1542 reg->Register.SwizzleZ == reg->Register.SwizzleW); 1543} 1544 1545 1546/** 1547 * Search the vector for the value 'x' and return its position. 1548 */ 1549static int 1550find_imm_in_vec4(const union tgsi_immediate_data vec[4], 1551 union tgsi_immediate_data x) 1552{ 1553 unsigned i; 1554 for (i = 0; i < 4; i++) { 1555 if (vec[i].Int == x.Int) 1556 return i; 1557 } 1558 return -1; 1559} 1560 1561 1562/** 1563 * Helper used by make_immediate_reg(), make_immediate_reg_4(). 1564 */ 1565static int 1566find_immediate(struct svga_shader_emitter_v10 *emit, 1567 union tgsi_immediate_data x, unsigned startIndex) 1568{ 1569 const unsigned endIndex = emit->num_immediates; 1570 unsigned i; 1571 1572 assert(emit->immediates_emitted); 1573 1574 /* Search immediates for x, y, z, w */ 1575 for (i = startIndex; i < endIndex; i++) { 1576 if (x.Int == emit->immediates[i][0].Int || 1577 x.Int == emit->immediates[i][1].Int || 1578 x.Int == emit->immediates[i][2].Int || 1579 x.Int == emit->immediates[i][3].Int) { 1580 return i; 1581 } 1582 } 1583 /* Should never try to use an immediate value that wasn't pre-declared */ 1584 assert(!"find_immediate() failed!"); 1585 return -1; 1586} 1587 1588 1589/** 1590 * Return a tgsi_full_src_register for an immediate/literal 1591 * union tgsi_immediate_data[4] value. 1592 * Note: the values must have been previously declared/allocated in 1593 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same 1594 * vec4 immediate. 1595 */ 1596static struct tgsi_full_src_register 1597make_immediate_reg_4(struct svga_shader_emitter_v10 *emit, 1598 const union tgsi_immediate_data imm[4]) 1599{ 1600 struct tgsi_full_src_register reg; 1601 unsigned i; 1602 1603 for (i = 0; i < emit->num_common_immediates; i++) { 1604 /* search for first component value */ 1605 int immpos = find_immediate(emit, imm[0], i); 1606 int x, y, z, w; 1607 1608 assert(immpos >= 0); 1609 1610 /* find remaining components within the immediate vector */ 1611 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]); 1612 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]); 1613 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]); 1614 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]); 1615 1616 if (x >=0 && y >= 0 && z >= 0 && w >= 0) { 1617 /* found them all */ 1618 memset(®, 0, sizeof(reg)); 1619 reg.Register.File = TGSI_FILE_IMMEDIATE; 1620 reg.Register.Index = immpos; 1621 reg.Register.SwizzleX = x; 1622 reg.Register.SwizzleY = y; 1623 reg.Register.SwizzleZ = z; 1624 reg.Register.SwizzleW = w; 1625 return reg; 1626 } 1627 /* else, keep searching */ 1628 } 1629 1630 assert(!"Failed to find immediate register!"); 1631 1632 /* Just return IMM[0].xxxx */ 1633 memset(®, 0, sizeof(reg)); 1634 reg.Register.File = TGSI_FILE_IMMEDIATE; 1635 return reg; 1636} 1637 1638 1639/** 1640 * Return a tgsi_full_src_register for an immediate/literal 1641 * union tgsi_immediate_data value of the form {value, value, value, value}. 1642 * \sa make_immediate_reg_4() regarding allowed values. 1643 */ 1644static struct tgsi_full_src_register 1645make_immediate_reg(struct svga_shader_emitter_v10 *emit, 1646 union tgsi_immediate_data value) 1647{ 1648 struct tgsi_full_src_register reg; 1649 int immpos = find_immediate(emit, value, 0); 1650 1651 assert(immpos >= 0); 1652 1653 memset(®, 0, sizeof(reg)); 1654 reg.Register.File = TGSI_FILE_IMMEDIATE; 1655 reg.Register.Index = immpos; 1656 reg.Register.SwizzleX = 1657 reg.Register.SwizzleY = 1658 reg.Register.SwizzleZ = 1659 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value); 1660 1661 return reg; 1662} 1663 1664 1665/** 1666 * Return a tgsi_full_src_register for an immediate/literal float[4] value. 1667 * \sa make_immediate_reg_4() regarding allowed values. 1668 */ 1669static struct tgsi_full_src_register 1670make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit, 1671 float x, float y, float z, float w) 1672{ 1673 union tgsi_immediate_data imm[4]; 1674 imm[0].Float = x; 1675 imm[1].Float = y; 1676 imm[2].Float = z; 1677 imm[3].Float = w; 1678 return make_immediate_reg_4(emit, imm); 1679} 1680 1681 1682/** 1683 * Return a tgsi_full_src_register for an immediate/literal float value 1684 * of the form {value, value, value, value}. 1685 * \sa make_immediate_reg_4() regarding allowed values. 1686 */ 1687static struct tgsi_full_src_register 1688make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value) 1689{ 1690 union tgsi_immediate_data imm; 1691 imm.Float = value; 1692 return make_immediate_reg(emit, imm); 1693} 1694 1695 1696/** 1697 * Return a tgsi_full_src_register for an immediate/literal int[4] vector. 1698 */ 1699static struct tgsi_full_src_register 1700make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit, 1701 int x, int y, int z, int w) 1702{ 1703 union tgsi_immediate_data imm[4]; 1704 imm[0].Int = x; 1705 imm[1].Int = y; 1706 imm[2].Int = z; 1707 imm[3].Int = w; 1708 return make_immediate_reg_4(emit, imm); 1709} 1710 1711 1712/** 1713 * Return a tgsi_full_src_register for an immediate/literal int value 1714 * of the form {value, value, value, value}. 1715 * \sa make_immediate_reg_4() regarding allowed values. 1716 */ 1717static struct tgsi_full_src_register 1718make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value) 1719{ 1720 union tgsi_immediate_data imm; 1721 imm.Int = value; 1722 return make_immediate_reg(emit, imm); 1723} 1724 1725 1726/** 1727 * Allocate space for a union tgsi_immediate_data[4] immediate. 1728 * \return the index/position of the immediate. 1729 */ 1730static unsigned 1731alloc_immediate_4(struct svga_shader_emitter_v10 *emit, 1732 const union tgsi_immediate_data imm[4]) 1733{ 1734 unsigned n = emit->num_immediates++; 1735 assert(!emit->immediates_emitted); 1736 assert(n < Elements(emit->immediates)); 1737 emit->immediates[n][0] = imm[0]; 1738 emit->immediates[n][1] = imm[1]; 1739 emit->immediates[n][2] = imm[2]; 1740 emit->immediates[n][3] = imm[3]; 1741 return n; 1742} 1743 1744 1745/** 1746 * Allocate space for a float[4] immediate. 1747 * \return the index/position of the immediate. 1748 */ 1749static unsigned 1750alloc_immediate_float4(struct svga_shader_emitter_v10 *emit, 1751 float x, float y, float z, float w) 1752{ 1753 union tgsi_immediate_data imm[4]; 1754 imm[0].Float = x; 1755 imm[1].Float = y; 1756 imm[2].Float = z; 1757 imm[3].Float = w; 1758 return alloc_immediate_4(emit, imm); 1759} 1760 1761 1762/** 1763 * Allocate space for a int[4] immediate. 1764 * \return the index/position of the immediate. 1765 */ 1766static unsigned 1767alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, 1768 int x, int y, int z, int w) 1769{ 1770 union tgsi_immediate_data imm[4]; 1771 imm[0].Int = x; 1772 imm[1].Int = y; 1773 imm[2].Int = z; 1774 imm[3].Int = w; 1775 return alloc_immediate_4(emit, imm); 1776} 1777 1778 1779/** 1780 * Allocate a shader input to store a system value. 1781 */ 1782static unsigned 1783alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) 1784{ 1785 const unsigned n = emit->info.num_inputs + index; 1786 assert(index < Elements(emit->system_value_indexes)); 1787 emit->system_value_indexes[index] = n; 1788 return n; 1789} 1790 1791 1792/** 1793 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10. 1794 */ 1795static boolean 1796emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, 1797 const struct tgsi_full_immediate *imm) 1798{ 1799 /* We don't actually emit any code here. We just save the 1800 * immediate values and emit them later. 1801 */ 1802 alloc_immediate_4(emit, imm->u); 1803 return TRUE; 1804} 1805 1806 1807/** 1808 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block 1809 * containing all the immediate values previously allocated 1810 * with alloc_immediate_4(). 1811 */ 1812static boolean 1813emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit) 1814{ 1815 VGPU10OpcodeToken0 token; 1816 1817 assert(!emit->immediates_emitted); 1818 1819 token.value = 0; 1820 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA; 1821 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER; 1822 1823 /* Note: no begin/end_emit_instruction() calls */ 1824 emit_dword(emit, token.value); 1825 emit_dword(emit, 2 + 4 * emit->num_immediates); 1826 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates); 1827 1828 emit->immediates_emitted = TRUE; 1829 1830 return TRUE; 1831} 1832 1833 1834/** 1835 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10 1836 * interpolation mode. 1837 * \return a VGPU10_INTERPOLATION_x value 1838 */ 1839static unsigned 1840translate_interpolation(const struct svga_shader_emitter_v10 *emit, 1841 unsigned interp, unsigned interpolate_loc) 1842{ 1843 if (interp == TGSI_INTERPOLATE_COLOR) { 1844 interp = emit->key.fs.flatshade ? 1845 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE; 1846 } 1847 1848 switch (interp) { 1849 case TGSI_INTERPOLATE_CONSTANT: 1850 return VGPU10_INTERPOLATION_CONSTANT; 1851 case TGSI_INTERPOLATE_LINEAR: 1852 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1853 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID : 1854 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; 1855 case TGSI_INTERPOLATE_PERSPECTIVE: 1856 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1857 VGPU10_INTERPOLATION_LINEAR_CENTROID : 1858 VGPU10_INTERPOLATION_LINEAR; 1859 default: 1860 assert(!"Unexpected interpolation mode"); 1861 return VGPU10_INTERPOLATION_CONSTANT; 1862 } 1863} 1864 1865 1866/** 1867 * Translate a TGSI property to VGPU10. 1868 * Don't emit any instructions yet, only need to gather the primitive property information. 1869 * The output primitive topology might be changed later. The final property instructions 1870 * will be emitted as part of the pre-helper code. 1871 */ 1872static boolean 1873emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, 1874 const struct tgsi_full_property *prop) 1875{ 1876 static const VGPU10_PRIMITIVE primType[] = { 1877 VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */ 1878 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */ 1879 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */ 1880 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */ 1881 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */ 1882 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */ 1883 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */ 1884 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */ 1885 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1886 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1887 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1888 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1889 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1890 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1891 }; 1892 1893 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = { 1894 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */ 1895 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */ 1896 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */ 1897 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */ 1898 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */ 1899 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */ 1900 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */ 1901 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */ 1902 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1903 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1904 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1905 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1906 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1907 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1908 }; 1909 1910 static const unsigned inputArraySize[] = { 1911 0, /* VGPU10_PRIMITIVE_UNDEFINED */ 1912 1, /* VGPU10_PRIMITIVE_POINT */ 1913 2, /* VGPU10_PRIMITIVE_LINE */ 1914 3, /* VGPU10_PRIMITIVE_TRIANGLE */ 1915 0, 1916 0, 1917 4, /* VGPU10_PRIMITIVE_LINE_ADJ */ 1918 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */ 1919 }; 1920 1921 switch (prop->Property.PropertyName) { 1922 case TGSI_PROPERTY_GS_INPUT_PRIM: 1923 assert(prop->u[0].Data < Elements(primType)); 1924 emit->gs.prim_type = primType[prop->u[0].Data]; 1925 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED); 1926 emit->gs.input_size = inputArraySize[emit->gs.prim_type]; 1927 break; 1928 1929 case TGSI_PROPERTY_GS_OUTPUT_PRIM: 1930 assert(prop->u[0].Data < Elements(primTopology)); 1931 emit->gs.prim_topology = primTopology[prop->u[0].Data]; 1932 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED); 1933 break; 1934 1935 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: 1936 emit->gs.max_out_vertices = prop->u[0].Data; 1937 break; 1938 1939 default: 1940 break; 1941 } 1942 1943 return TRUE; 1944} 1945 1946 1947static void 1948emit_property_instruction(struct svga_shader_emitter_v10 *emit, 1949 VGPU10OpcodeToken0 opcode0, unsigned nData, 1950 unsigned data) 1951{ 1952 begin_emit_instruction(emit); 1953 emit_dword(emit, opcode0.value); 1954 if (nData) 1955 emit_dword(emit, data); 1956 end_emit_instruction(emit); 1957} 1958 1959 1960/** 1961 * Emit property instructions 1962 */ 1963static void 1964emit_property_instructions(struct svga_shader_emitter_v10 *emit) 1965{ 1966 VGPU10OpcodeToken0 opcode0; 1967 1968 assert(emit->unit == PIPE_SHADER_GEOMETRY); 1969 1970 /* emit input primitive type declaration */ 1971 opcode0.value = 0; 1972 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE; 1973 opcode0.primitive = emit->gs.prim_type; 1974 emit_property_instruction(emit, opcode0, 0, 0); 1975 1976 /* emit output primitive topology declaration */ 1977 opcode0.value = 0; 1978 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; 1979 opcode0.primitiveTopology = emit->gs.prim_topology; 1980 emit_property_instruction(emit, opcode0, 0, 0); 1981 1982 /* emit max output vertices */ 1983 opcode0.value = 0; 1984 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT; 1985 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices); 1986} 1987 1988 1989/** 1990 * Emit a vgpu10 declaration "instruction". 1991 * \param index the register index 1992 * \param size array size of the operand. In most cases, it is 1, 1993 * but for inputs to geometry shader, the array size varies 1994 * depending on the primitive type. 1995 */ 1996static void 1997emit_decl_instruction(struct svga_shader_emitter_v10 *emit, 1998 VGPU10OpcodeToken0 opcode0, 1999 VGPU10OperandToken0 operand0, 2000 VGPU10NameToken name_token, 2001 unsigned index, unsigned size) 2002{ 2003 assert(opcode0.opcodeType); 2004 assert(operand0.mask); 2005 2006 begin_emit_instruction(emit); 2007 emit_dword(emit, opcode0.value); 2008 2009 emit_dword(emit, operand0.value); 2010 2011 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { 2012 /* Next token is the index of the register to declare */ 2013 emit_dword(emit, index); 2014 } 2015 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { 2016 /* Next token is the size of the register */ 2017 emit_dword(emit, size); 2018 2019 /* Followed by the index of the register */ 2020 emit_dword(emit, index); 2021 } 2022 2023 if (name_token.value) { 2024 emit_dword(emit, name_token.value); 2025 } 2026 2027 end_emit_instruction(emit); 2028} 2029 2030 2031/** 2032 * Emit the declaration for a shader input. 2033 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx 2034 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x 2035 * \param dim index dimension 2036 * \param index the input register index 2037 * \param size array size of the operand. In most cases, it is 1, 2038 * but for inputs to geometry shader, the array size varies 2039 * depending on the primitive type. 2040 * \param name one of VGPU10_NAME_x 2041 * \parma numComp number of components 2042 * \param selMode component selection mode 2043 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2044 * \param interpMode interpolation mode 2045 */ 2046static void 2047emit_input_declaration(struct svga_shader_emitter_v10 *emit, 2048 unsigned opcodeType, unsigned operandType, 2049 unsigned dim, unsigned index, unsigned size, 2050 unsigned name, unsigned numComp, 2051 unsigned selMode, unsigned usageMask, 2052 unsigned interpMode) 2053{ 2054 VGPU10OpcodeToken0 opcode0; 2055 VGPU10OperandToken0 operand0; 2056 VGPU10NameToken name_token; 2057 2058 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2059 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || 2060 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || 2061 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || 2062 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); 2063 assert(operandType == VGPU10_OPERAND_TYPE_INPUT || 2064 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID); 2065 assert(numComp <= VGPU10_OPERAND_4_COMPONENT); 2066 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); 2067 assert(dim <= VGPU10_OPERAND_INDEX_3D); 2068 assert(name == VGPU10_NAME_UNDEFINED || 2069 name == VGPU10_NAME_POSITION || 2070 name == VGPU10_NAME_INSTANCE_ID || 2071 name == VGPU10_NAME_VERTEX_ID || 2072 name == VGPU10_NAME_PRIMITIVE_ID || 2073 name == VGPU10_NAME_IS_FRONT_FACE); 2074 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || 2075 interpMode == VGPU10_INTERPOLATION_CONSTANT || 2076 interpMode == VGPU10_INTERPOLATION_LINEAR || 2077 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || 2078 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || 2079 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID); 2080 2081 check_register_index(emit, opcodeType, index); 2082 2083 opcode0.value = operand0.value = name_token.value = 0; 2084 2085 opcode0.opcodeType = opcodeType; 2086 opcode0.interpolationMode = interpMode; 2087 2088 operand0.operandType = operandType; 2089 operand0.numComponents = numComp; 2090 operand0.selectionMode = selMode; 2091 operand0.mask = usageMask; 2092 operand0.indexDimension = dim; 2093 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2094 if (dim == VGPU10_OPERAND_INDEX_2D) 2095 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2096 2097 name_token.name = name; 2098 2099 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); 2100} 2101 2102 2103/** 2104 * Emit the declaration for a shader output. 2105 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx 2106 * \param index the output register index 2107 * \param name one of VGPU10_NAME_x 2108 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2109 */ 2110static void 2111emit_output_declaration(struct svga_shader_emitter_v10 *emit, 2112 unsigned type, unsigned index, 2113 unsigned name, unsigned usageMask) 2114{ 2115 VGPU10OpcodeToken0 opcode0; 2116 VGPU10OperandToken0 operand0; 2117 VGPU10NameToken name_token; 2118 2119 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2120 assert(type == VGPU10_OPCODE_DCL_OUTPUT || 2121 type == VGPU10_OPCODE_DCL_OUTPUT_SGV || 2122 type == VGPU10_OPCODE_DCL_OUTPUT_SIV); 2123 assert(name == VGPU10_NAME_UNDEFINED || 2124 name == VGPU10_NAME_POSITION || 2125 name == VGPU10_NAME_PRIMITIVE_ID || 2126 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || 2127 name == VGPU10_NAME_CLIP_DISTANCE); 2128 2129 check_register_index(emit, type, index); 2130 2131 opcode0.value = operand0.value = name_token.value = 0; 2132 2133 opcode0.opcodeType = type; 2134 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; 2135 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2136 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2137 operand0.mask = usageMask; 2138 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2139 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2140 2141 name_token.name = name; 2142 2143 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); 2144} 2145 2146 2147/** 2148 * Emit the declaration for the fragment depth output. 2149 */ 2150static void 2151emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) 2152{ 2153 VGPU10OpcodeToken0 opcode0; 2154 VGPU10OperandToken0 operand0; 2155 VGPU10NameToken name_token; 2156 2157 assert(emit->unit == PIPE_SHADER_FRAGMENT); 2158 2159 opcode0.value = operand0.value = name_token.value = 0; 2160 2161 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; 2162 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 2163 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 2164 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 2165 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2166 2167 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); 2168} 2169 2170 2171/** 2172 * Emit the declaration for a system value input/output. 2173 */ 2174static void 2175emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, 2176 unsigned semantic_name, unsigned index) 2177{ 2178 switch (semantic_name) { 2179 case TGSI_SEMANTIC_INSTANCEID: 2180 index = alloc_system_value_index(emit, index); 2181 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2182 VGPU10_OPERAND_TYPE_INPUT, 2183 VGPU10_OPERAND_INDEX_1D, 2184 index, 1, 2185 VGPU10_NAME_INSTANCE_ID, 2186 VGPU10_OPERAND_4_COMPONENT, 2187 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2188 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2189 VGPU10_INTERPOLATION_UNDEFINED); 2190 break; 2191 case TGSI_SEMANTIC_VERTEXID: 2192 index = alloc_system_value_index(emit, index); 2193 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2194 VGPU10_OPERAND_TYPE_INPUT, 2195 VGPU10_OPERAND_INDEX_1D, 2196 index, 1, 2197 VGPU10_NAME_VERTEX_ID, 2198 VGPU10_OPERAND_4_COMPONENT, 2199 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2200 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2201 VGPU10_INTERPOLATION_UNDEFINED); 2202 break; 2203 default: 2204 ; /* XXX */ 2205 } 2206} 2207 2208/** 2209 * Translate a TGSI declaration to VGPU10. 2210 */ 2211static boolean 2212emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, 2213 const struct tgsi_full_declaration *decl) 2214{ 2215 switch (decl->Declaration.File) { 2216 case TGSI_FILE_INPUT: 2217 /* do nothing - see emit_input_declarations() */ 2218 return TRUE; 2219 2220 case TGSI_FILE_OUTPUT: 2221 assert(decl->Range.First == decl->Range.Last); 2222 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; 2223 return TRUE; 2224 2225 case TGSI_FILE_TEMPORARY: 2226 /* Don't declare the temps here. Just keep track of how many 2227 * and emit the declaration later. 2228 */ 2229 if (decl->Declaration.Array) { 2230 /* Indexed temporary array. Save the start index of the array 2231 * and the size of the array. 2232 */ 2233 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); 2234 unsigned i; 2235 2236 assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); 2237 2238 /* Save this array so we can emit the declaration for it later */ 2239 emit->temp_arrays[arrayID].start = decl->Range.First; 2240 emit->temp_arrays[arrayID].size = 2241 decl->Range.Last - decl->Range.First + 1; 2242 2243 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); 2244 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); 2245 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); 2246 2247 /* Fill in the temp_map entries for this array */ 2248 for (i = decl->Range.First; i <= decl->Range.Last; i++) { 2249 emit->temp_map[i].arrayId = arrayID; 2250 emit->temp_map[i].index = i - decl->Range.First; 2251 } 2252 } 2253 2254 /* for all temps, indexed or not, keep track of highest index */ 2255 emit->num_shader_temps = MAX2(emit->num_shader_temps, 2256 decl->Range.Last + 1); 2257 return TRUE; 2258 2259 case TGSI_FILE_CONSTANT: 2260 /* Don't declare constants here. Just keep track and emit later. */ 2261 { 2262 unsigned constbuf = 0, num_consts; 2263 if (decl->Declaration.Dimension) { 2264 constbuf = decl->Dim.Index2D; 2265 } 2266 /* We throw an assertion here when, in fact, the shader should never 2267 * have linked due to constbuf index out of bounds, so we shouldn't 2268 * have reached here. 2269 */ 2270 assert(constbuf < Elements(emit->num_shader_consts)); 2271 2272 num_consts = MAX2(emit->num_shader_consts[constbuf], 2273 decl->Range.Last + 1); 2274 2275 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 2276 debug_printf("Warning: constant buffer is declared to size [%u]" 2277 " but [%u] is the limit.\n", 2278 num_consts, 2279 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2280 } 2281 /* The linker doesn't enforce the max UBO size so we clamp here */ 2282 emit->num_shader_consts[constbuf] = 2283 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2284 } 2285 return TRUE; 2286 2287 case TGSI_FILE_IMMEDIATE: 2288 assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); 2289 return FALSE; 2290 2291 case TGSI_FILE_SYSTEM_VALUE: 2292 emit_system_value_declaration(emit, decl->Semantic.Name, 2293 decl->Range.First); 2294 return TRUE; 2295 2296 case TGSI_FILE_SAMPLER: 2297 /* Don't declare samplers here. Just keep track and emit later. */ 2298 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); 2299 return TRUE; 2300 2301#if 0 2302 case TGSI_FILE_RESOURCE: 2303 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ 2304 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ 2305 assert(!"TGSI_FILE_RESOURCE not handled yet"); 2306 return FALSE; 2307#endif 2308 2309 case TGSI_FILE_ADDRESS: 2310 emit->num_address_regs = MAX2(emit->num_address_regs, 2311 decl->Range.Last + 1); 2312 return TRUE; 2313 2314 case TGSI_FILE_SAMPLER_VIEW: 2315 /* Not used at this time, but maybe in the future. 2316 * See emit_resource_declarations(). 2317 */ 2318 return TRUE; 2319 2320 default: 2321 assert(!"Unexpected type of declaration"); 2322 return FALSE; 2323 } 2324} 2325 2326 2327 2328/** 2329 * Emit all input declarations. 2330 */ 2331static boolean 2332emit_input_declarations(struct svga_shader_emitter_v10 *emit) 2333{ 2334 unsigned i; 2335 2336 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2337 2338 for (i = 0; i < emit->linkage.num_inputs; i++) { 2339 unsigned semantic_name = emit->info.input_semantic_name[i]; 2340 unsigned usage_mask = emit->info.input_usage_mask[i]; 2341 unsigned index = emit->linkage.input_map[i]; 2342 unsigned type, interpolationMode, name; 2343 2344 if (usage_mask == 0) 2345 continue; /* register is not actually used */ 2346 2347 if (semantic_name == TGSI_SEMANTIC_POSITION) { 2348 /* fragment position input */ 2349 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2350 interpolationMode = VGPU10_INTERPOLATION_LINEAR; 2351 name = VGPU10_NAME_POSITION; 2352 if (usage_mask & TGSI_WRITEMASK_W) { 2353 /* we need to replace use of 'w' with '1/w' */ 2354 emit->fs.fragcoord_input_index = i; 2355 } 2356 } 2357 else if (semantic_name == TGSI_SEMANTIC_FACE) { 2358 /* fragment front-facing input */ 2359 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2360 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2361 name = VGPU10_NAME_IS_FRONT_FACE; 2362 emit->fs.face_input_index = i; 2363 } 2364 else if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2365 /* primitive ID */ 2366 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2367 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2368 name = VGPU10_NAME_PRIMITIVE_ID; 2369 } 2370 else { 2371 /* general fragment input */ 2372 type = VGPU10_OPCODE_DCL_INPUT_PS; 2373 interpolationMode = 2374 translate_interpolation(emit, 2375 emit->info.input_interpolate[i], 2376 emit->info.input_interpolate_loc[i]); 2377 2378 /* keeps track if flat interpolation mode is being used */ 2379 emit->uses_flat_interp = emit->uses_flat_interp || 2380 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); 2381 2382 name = VGPU10_NAME_UNDEFINED; 2383 } 2384 2385 emit_input_declaration(emit, type, 2386 VGPU10_OPERAND_TYPE_INPUT, 2387 VGPU10_OPERAND_INDEX_1D, index, 1, 2388 name, 2389 VGPU10_OPERAND_4_COMPONENT, 2390 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2391 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2392 interpolationMode); 2393 } 2394 } 2395 else if (emit->unit == PIPE_SHADER_GEOMETRY) { 2396 2397 for (i = 0; i < emit->info.num_inputs; i++) { 2398 unsigned semantic_name = emit->info.input_semantic_name[i]; 2399 unsigned usage_mask = emit->info.input_usage_mask[i]; 2400 unsigned index = emit->linkage.input_map[i]; 2401 unsigned opcodeType, operandType; 2402 unsigned numComp, selMode; 2403 unsigned name; 2404 unsigned dim; 2405 2406 if (usage_mask == 0) 2407 continue; /* register is not actually used */ 2408 2409 opcodeType = VGPU10_OPCODE_DCL_INPUT; 2410 operandType = VGPU10_OPERAND_TYPE_INPUT; 2411 numComp = VGPU10_OPERAND_4_COMPONENT; 2412 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2413 name = VGPU10_NAME_UNDEFINED; 2414 2415 /* all geometry shader inputs are two dimensional except gl_PrimitiveID */ 2416 dim = VGPU10_OPERAND_INDEX_2D; 2417 2418 if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2419 /* Primitive ID */ 2420 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 2421 dim = VGPU10_OPERAND_INDEX_0D; 2422 numComp = VGPU10_OPERAND_0_COMPONENT; 2423 selMode = 0; 2424 2425 /* also save the register index so we can check for 2426 * primitive id when emit src register. We need to modify the 2427 * operand type, index dimension when emit primitive id src reg. 2428 */ 2429 emit->gs.prim_id_index = i; 2430 } 2431 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2432 /* vertex position input */ 2433 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; 2434 name = VGPU10_NAME_POSITION; 2435 } 2436 2437 emit_input_declaration(emit, opcodeType, operandType, 2438 dim, index, 2439 emit->gs.input_size, 2440 name, 2441 numComp, selMode, 2442 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2443 VGPU10_INTERPOLATION_UNDEFINED); 2444 } 2445 } 2446 else { 2447 assert(emit->unit == PIPE_SHADER_VERTEX); 2448 2449 for (i = 0; i < emit->info.num_inputs; i++) { 2450 unsigned usage_mask = emit->info.input_usage_mask[i]; 2451 unsigned index = i; 2452 2453 if (usage_mask == 0) 2454 continue; /* register is not actually used */ 2455 2456 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 2457 VGPU10_OPERAND_TYPE_INPUT, 2458 VGPU10_OPERAND_INDEX_1D, index, 1, 2459 VGPU10_NAME_UNDEFINED, 2460 VGPU10_OPERAND_4_COMPONENT, 2461 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2462 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2463 VGPU10_INTERPOLATION_UNDEFINED); 2464 } 2465 } 2466 2467 return TRUE; 2468} 2469 2470 2471/** 2472 * Emit all output declarations. 2473 */ 2474static boolean 2475emit_output_declarations(struct svga_shader_emitter_v10 *emit) 2476{ 2477 unsigned i; 2478 2479 for (i = 0; i < emit->info.num_outputs; i++) { 2480 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ 2481 const unsigned semantic_name = emit->info.output_semantic_name[i]; 2482 const unsigned semantic_index = emit->info.output_semantic_index[i]; 2483 unsigned index = i; 2484 2485 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2486 if (semantic_name == TGSI_SEMANTIC_COLOR) { 2487 assert(semantic_index < Elements(emit->fs.color_out_index)); 2488 2489 emit->fs.color_out_index[semantic_index] = index; 2490 2491 /* The semantic index is the shader's color output/buffer index */ 2492 emit_output_declaration(emit, 2493 VGPU10_OPCODE_DCL_OUTPUT, semantic_index, 2494 VGPU10_NAME_UNDEFINED, 2495 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2496 2497 if (semantic_index == 0) { 2498 if (emit->key.fs.write_color0_to_n_cbufs > 1) { 2499 /* Emit declarations for the additional color outputs 2500 * for broadcasting. 2501 */ 2502 unsigned j; 2503 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { 2504 /* Allocate a new output index */ 2505 unsigned idx = emit->info.num_outputs + j - 1; 2506 emit->fs.color_out_index[j] = idx; 2507 emit_output_declaration(emit, 2508 VGPU10_OPCODE_DCL_OUTPUT, idx, 2509 VGPU10_NAME_UNDEFINED, 2510 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2511 emit->info.output_semantic_index[idx] = j; 2512 } 2513 } 2514 } 2515 else { 2516 assert(!emit->key.fs.write_color0_to_n_cbufs); 2517 } 2518 } 2519 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2520 /* Fragment depth output */ 2521 emit_fragdepth_output_declaration(emit); 2522 } 2523 else { 2524 assert(!"Bad output semantic name"); 2525 } 2526 } 2527 else { 2528 /* VS or GS */ 2529 unsigned name, type; 2530 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2531 2532 switch (semantic_name) { 2533 case TGSI_SEMANTIC_POSITION: 2534 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2535 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2536 name = VGPU10_NAME_POSITION; 2537 /* Save the index of the vertex position output register */ 2538 emit->vposition.out_index = index; 2539 break; 2540 case TGSI_SEMANTIC_CLIPDIST: 2541 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2542 name = VGPU10_NAME_CLIP_DISTANCE; 2543 /* save the starting index of the clip distance output register */ 2544 if (semantic_index == 0) 2545 emit->clip_dist_out_index = index; 2546 writemask = emit->output_usage_mask[index]; 2547 writemask = apply_clip_plane_mask(emit, writemask, semantic_index); 2548 if (writemask == 0x0) { 2549 continue; /* discard this do-nothing declaration */ 2550 } 2551 break; 2552 case TGSI_SEMANTIC_PRIMID: 2553 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2554 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2555 name = VGPU10_NAME_PRIMITIVE_ID; 2556 break; 2557 case TGSI_SEMANTIC_LAYER: 2558 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2559 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2560 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; 2561 break; 2562 case TGSI_SEMANTIC_CLIPVERTEX: 2563 type = VGPU10_OPCODE_DCL_OUTPUT; 2564 name = VGPU10_NAME_UNDEFINED; 2565 emit->clip_vertex_out_index = index; 2566 break; 2567 default: 2568 /* generic output */ 2569 type = VGPU10_OPCODE_DCL_OUTPUT; 2570 name = VGPU10_NAME_UNDEFINED; 2571 } 2572 2573 emit_output_declaration(emit, type, index, name, writemask); 2574 } 2575 } 2576 2577 if (emit->vposition.so_index != INVALID_INDEX && 2578 emit->vposition.out_index != INVALID_INDEX) { 2579 2580 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2581 2582 /* Emit the declaration for the non-adjusted vertex position 2583 * for stream output purpose 2584 */ 2585 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2586 emit->vposition.so_index, 2587 VGPU10_NAME_UNDEFINED, 2588 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2589 } 2590 2591 if (emit->clip_dist_so_index != INVALID_INDEX && 2592 emit->clip_dist_out_index != INVALID_INDEX) { 2593 2594 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2595 2596 /* Emit the declaration for the clip distance shadow copy which 2597 * will be used for stream output purpose and for clip distance 2598 * varying variable 2599 */ 2600 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2601 emit->clip_dist_so_index, 2602 VGPU10_NAME_UNDEFINED, 2603 emit->output_usage_mask[emit->clip_dist_out_index]); 2604 2605 if (emit->info.num_written_clipdistance > 4) { 2606 /* for the second clip distance register, each handles 4 planes */ 2607 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2608 emit->clip_dist_so_index + 1, 2609 VGPU10_NAME_UNDEFINED, 2610 emit->output_usage_mask[emit->clip_dist_out_index+1]); 2611 } 2612 } 2613 2614 return TRUE; 2615} 2616 2617 2618/** 2619 * Emit the declaration for the temporary registers. 2620 */ 2621static boolean 2622emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) 2623{ 2624 unsigned total_temps, reg, i; 2625 2626 total_temps = emit->num_shader_temps; 2627 2628 /* Allocate extra temps for specially-implemented instructions, 2629 * such as LIT. 2630 */ 2631 total_temps += MAX_INTERNAL_TEMPS; 2632 2633 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { 2634 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || 2635 emit->key.clip_plane_enable || 2636 emit->vposition.so_index != INVALID_INDEX) { 2637 emit->vposition.tmp_index = total_temps; 2638 total_temps += 1; 2639 } 2640 2641 if (emit->unit == PIPE_SHADER_VERTEX) { 2642 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | 2643 emit->key.vs.adjust_attrib_itof | 2644 emit->key.vs.adjust_attrib_utof | 2645 emit->key.vs.attrib_is_bgra | 2646 emit->key.vs.attrib_puint_to_snorm | 2647 emit->key.vs.attrib_puint_to_uscaled | 2648 emit->key.vs.attrib_puint_to_sscaled); 2649 while (attrib_mask) { 2650 unsigned index = u_bit_scan(&attrib_mask); 2651 emit->vs.adjusted_input[index] = total_temps++; 2652 } 2653 } 2654 2655 if (emit->clip_mode == CLIP_DISTANCE) { 2656 /* We need to write the clip distance to a temporary register 2657 * first. Then it will be copied to the shadow copy for 2658 * the clip distance varying variable and stream output purpose. 2659 * It will also be copied to the actual CLIPDIST register 2660 * according to the enabled clip planes 2661 */ 2662 emit->clip_dist_tmp_index = total_temps++; 2663 if (emit->info.num_written_clipdistance > 4) 2664 total_temps++; /* second clip register */ 2665 } 2666 else if (emit->clip_mode == CLIP_VERTEX) { 2667 /* We need to convert the TGSI CLIPVERTEX output to one or more 2668 * clip distances. Allocate a temp reg for the clipvertex here. 2669 */ 2670 assert(emit->info.writes_clipvertex > 0); 2671 emit->clip_vertex_tmp_index = total_temps; 2672 total_temps++; 2673 } 2674 } 2675 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 2676 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS || 2677 emit->key.fs.white_fragments || 2678 emit->key.fs.write_color0_to_n_cbufs > 1) { 2679 /* Allocate a temp to hold the output color */ 2680 emit->fs.color_tmp_index = total_temps; 2681 total_temps += 1; 2682 } 2683 2684 if (emit->fs.face_input_index != INVALID_INDEX) { 2685 /* Allocate a temp for the +/-1 face register */ 2686 emit->fs.face_tmp_index = total_temps; 2687 total_temps += 1; 2688 } 2689 2690 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 2691 /* Allocate a temp for modified fragment position register */ 2692 emit->fs.fragcoord_tmp_index = total_temps; 2693 total_temps += 1; 2694 } 2695 } 2696 2697 for (i = 0; i < emit->num_address_regs; i++) { 2698 emit->address_reg_index[i] = total_temps++; 2699 } 2700 2701 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10 2702 * temp indexes. Basically, we compact all the non-array temp register 2703 * indexes into a consecutive series. 2704 * 2705 * Before, we may have some TGSI declarations like: 2706 * DCL TEMP[0..1], LOCAL 2707 * DCL TEMP[2..4], ARRAY(1), LOCAL 2708 * DCL TEMP[5..7], ARRAY(2), LOCAL 2709 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things 2710 * 2711 * After, we'll have a map like this: 2712 * temp_map[0] = { array 0, index 0 } 2713 * temp_map[1] = { array 0, index 1 } 2714 * temp_map[2] = { array 1, index 0 } 2715 * temp_map[3] = { array 1, index 1 } 2716 * temp_map[4] = { array 1, index 2 } 2717 * temp_map[5] = { array 2, index 0 } 2718 * temp_map[6] = { array 2, index 1 } 2719 * temp_map[7] = { array 2, index 2 } 2720 * temp_map[8] = { array 0, index 2 } 2721 * temp_map[9] = { array 0, index 3 } 2722 * 2723 * We'll declare two arrays of 3 elements, plus a set of four non-indexed 2724 * temps numbered 0..3 2725 * 2726 * Any time we emit a temporary register index, we'll have to use the 2727 * temp_map[] table to convert the TGSI index to the VGPU10 index. 2728 * 2729 * Finally, we recompute the total_temps value here. 2730 */ 2731 reg = 0; 2732 for (i = 0; i < total_temps; i++) { 2733 if (emit->temp_map[i].arrayId == 0) { 2734 emit->temp_map[i].index = reg++; 2735 } 2736 } 2737 total_temps = reg; 2738 2739 if (0) { 2740 debug_printf("total_temps %u\n", total_temps); 2741 for (i = 0; i < 30; i++) { 2742 debug_printf("temp %u -> array %u index %u\n", 2743 i, emit->temp_map[i].arrayId, emit->temp_map[i].index); 2744 } 2745 } 2746 2747 /* Emit declaration of ordinary temp registers */ 2748 if (total_temps > 0) { 2749 VGPU10OpcodeToken0 opcode0; 2750 2751 opcode0.value = 0; 2752 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS; 2753 2754 begin_emit_instruction(emit); 2755 emit_dword(emit, opcode0.value); 2756 emit_dword(emit, total_temps); 2757 end_emit_instruction(emit); 2758 } 2759 2760 /* Emit declarations for indexable temp arrays. Skip 0th entry since 2761 * it's unused. 2762 */ 2763 for (i = 1; i < emit->num_temp_arrays; i++) { 2764 unsigned num_temps = emit->temp_arrays[i].size; 2765 2766 if (num_temps > 0) { 2767 VGPU10OpcodeToken0 opcode0; 2768 2769 opcode0.value = 0; 2770 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP; 2771 2772 begin_emit_instruction(emit); 2773 emit_dword(emit, opcode0.value); 2774 emit_dword(emit, i); /* which array */ 2775 emit_dword(emit, num_temps); 2776 emit_dword(emit, 4); /* num components */ 2777 end_emit_instruction(emit); 2778 2779 total_temps += num_temps; 2780 } 2781 } 2782 2783 /* Check that the grand total of all regular and indexed temps is 2784 * under the limit. 2785 */ 2786 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1); 2787 2788 return TRUE; 2789} 2790 2791 2792static boolean 2793emit_constant_declaration(struct svga_shader_emitter_v10 *emit) 2794{ 2795 VGPU10OpcodeToken0 opcode0; 2796 VGPU10OperandToken0 operand0; 2797 unsigned total_consts, i; 2798 2799 opcode0.value = 0; 2800 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER; 2801 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED; 2802 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */ 2803 2804 operand0.value = 0; 2805 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2806 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D; 2807 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2808 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2809 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 2810 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 2811 operand0.swizzleX = 0; 2812 operand0.swizzleY = 1; 2813 operand0.swizzleZ = 2; 2814 operand0.swizzleW = 3; 2815 2816 /** 2817 * Emit declaration for constant buffer [0]. We also allocate 2818 * room for the extra constants here. 2819 */ 2820 total_consts = emit->num_shader_consts[0]; 2821 2822 /* Now, allocate constant slots for the "extra" constants */ 2823 2824 /* Vertex position scale/translation */ 2825 if (emit->vposition.need_prescale) { 2826 emit->vposition.prescale_scale_index = total_consts++; 2827 emit->vposition.prescale_trans_index = total_consts++; 2828 } 2829 2830 if (emit->unit == PIPE_SHADER_VERTEX) { 2831 if (emit->key.vs.undo_viewport) { 2832 emit->vs.viewport_index = total_consts++; 2833 } 2834 } 2835 2836 /* user-defined clip planes */ 2837 if (emit->key.clip_plane_enable) { 2838 unsigned n = util_bitcount(emit->key.clip_plane_enable); 2839 assert(emit->unit == PIPE_SHADER_VERTEX || 2840 emit->unit == PIPE_SHADER_GEOMETRY); 2841 for (i = 0; i < n; i++) { 2842 emit->clip_plane_const[i] = total_consts++; 2843 } 2844 } 2845 2846 /* Texcoord scale factors for RECT textures */ 2847 { 2848 for (i = 0; i < emit->num_samplers; i++) { 2849 if (emit->key.tex[i].unnormalized) { 2850 emit->texcoord_scale_index[i] = total_consts++; 2851 } 2852 } 2853 } 2854 2855 /* Texture buffer sizes */ 2856 for (i = 0; i < emit->num_samplers; i++) { 2857 if (emit->key.tex[i].texture_target == PIPE_BUFFER) { 2858 emit->texture_buffer_size_index[i] = total_consts++; 2859 } 2860 } 2861 2862 if (total_consts > 0) { 2863 begin_emit_instruction(emit); 2864 emit_dword(emit, opcode0.value); 2865 emit_dword(emit, operand0.value); 2866 emit_dword(emit, 0); /* which const buffer slot */ 2867 emit_dword(emit, total_consts); 2868 end_emit_instruction(emit); 2869 } 2870 2871 /* Declare remaining constant buffers (UBOs) */ 2872 for (i = 1; i < Elements(emit->num_shader_consts); i++) { 2873 if (emit->num_shader_consts[i] > 0) { 2874 begin_emit_instruction(emit); 2875 emit_dword(emit, opcode0.value); 2876 emit_dword(emit, operand0.value); 2877 emit_dword(emit, i); /* which const buffer slot */ 2878 emit_dword(emit, emit->num_shader_consts[i]); 2879 end_emit_instruction(emit); 2880 } 2881 } 2882 2883 return TRUE; 2884} 2885 2886 2887/** 2888 * Emit declarations for samplers. 2889 */ 2890static boolean 2891emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) 2892{ 2893 unsigned i; 2894 2895 for (i = 0; i < emit->num_samplers; i++) { 2896 VGPU10OpcodeToken0 opcode0; 2897 VGPU10OperandToken0 operand0; 2898 2899 opcode0.value = 0; 2900 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER; 2901 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT; 2902 2903 operand0.value = 0; 2904 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 2905 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 2906 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2907 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2908 2909 begin_emit_instruction(emit); 2910 emit_dword(emit, opcode0.value); 2911 emit_dword(emit, operand0.value); 2912 emit_dword(emit, i); 2913 end_emit_instruction(emit); 2914 } 2915 2916 return TRUE; 2917} 2918 2919 2920/** 2921 * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x. 2922 */ 2923static unsigned 2924pipe_texture_to_resource_dimension(unsigned target, bool msaa) 2925{ 2926 switch (target) { 2927 case PIPE_BUFFER: 2928 return VGPU10_RESOURCE_DIMENSION_BUFFER; 2929 case PIPE_TEXTURE_1D: 2930 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2931 case PIPE_TEXTURE_2D: 2932 case PIPE_TEXTURE_RECT: 2933 return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS 2934 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2935 case PIPE_TEXTURE_3D: 2936 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; 2937 case PIPE_TEXTURE_CUBE: 2938 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 2939 case PIPE_TEXTURE_1D_ARRAY: 2940 return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY; 2941 case PIPE_TEXTURE_2D_ARRAY: 2942 return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY 2943 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY; 2944 case PIPE_TEXTURE_CUBE_ARRAY: 2945 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY; 2946 default: 2947 assert(!"Unexpected resource type"); 2948 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2949 } 2950} 2951 2952 2953/** 2954 * Given a tgsi_return_type, return true iff it is an integer type. 2955 */ 2956static boolean 2957is_integer_type(enum tgsi_return_type type) 2958{ 2959 switch (type) { 2960 case TGSI_RETURN_TYPE_SINT: 2961 case TGSI_RETURN_TYPE_UINT: 2962 return TRUE; 2963 case TGSI_RETURN_TYPE_FLOAT: 2964 case TGSI_RETURN_TYPE_UNORM: 2965 case TGSI_RETURN_TYPE_SNORM: 2966 return FALSE; 2967 case TGSI_RETURN_TYPE_COUNT: 2968 default: 2969 assert(!"is_integer_type: Unknown tgsi_return_type"); 2970 return FALSE; 2971 } 2972} 2973 2974 2975/** 2976 * Emit declarations for resources. 2977 * XXX When we're sure that all TGSI shaders will be generated with 2978 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may 2979 * rework this code. 2980 */ 2981static boolean 2982emit_resource_declarations(struct svga_shader_emitter_v10 *emit) 2983{ 2984 unsigned i; 2985 2986 /* Emit resource decl for each sampler */ 2987 for (i = 0; i < emit->num_samplers; i++) { 2988 VGPU10OpcodeToken0 opcode0; 2989 VGPU10OperandToken0 operand0; 2990 VGPU10ResourceReturnTypeToken return_type; 2991 VGPU10_RESOURCE_RETURN_TYPE rt; 2992 2993 opcode0.value = 0; 2994 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; 2995 opcode0.resourceDimension = 2996 pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target, 2997 emit->key.tex[i].texture_msaa); 2998 operand0.value = 0; 2999 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 3000 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 3001 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 3002 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3003 3004#if 1 3005 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */ 3006 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1); 3007 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1); 3008 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1); 3009 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1); 3010 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1); 3011 assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT); 3012 rt = emit->key.tex[i].return_type + 1; 3013#else 3014 switch (emit->key.tex[i].return_type) { 3015 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break; 3016 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break; 3017 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break; 3018 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break; 3019 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break; 3020 case TGSI_RETURN_TYPE_COUNT: 3021 default: 3022 rt = VGPU10_RETURN_TYPE_FLOAT; 3023 assert(!"emit_resource_declarations: Unknown tgsi_return_type"); 3024 } 3025#endif 3026 3027 return_type.value = 0; 3028 return_type.component0 = rt; 3029 return_type.component1 = rt; 3030 return_type.component2 = rt; 3031 return_type.component3 = rt; 3032 3033 begin_emit_instruction(emit); 3034 emit_dword(emit, opcode0.value); 3035 emit_dword(emit, operand0.value); 3036 emit_dword(emit, i); 3037 emit_dword(emit, return_type.value); 3038 end_emit_instruction(emit); 3039 } 3040 3041 return TRUE; 3042} 3043 3044static void 3045emit_instruction_op1(struct svga_shader_emitter_v10 *emit, 3046 unsigned opcode, 3047 const struct tgsi_full_dst_register *dst, 3048 const struct tgsi_full_src_register *src, 3049 boolean saturate) 3050{ 3051 begin_emit_instruction(emit); 3052 emit_opcode(emit, opcode, saturate); 3053 emit_dst_register(emit, dst); 3054 emit_src_register(emit, src); 3055 end_emit_instruction(emit); 3056} 3057 3058static void 3059emit_instruction_op2(struct svga_shader_emitter_v10 *emit, 3060 unsigned opcode, 3061 const struct tgsi_full_dst_register *dst, 3062 const struct tgsi_full_src_register *src1, 3063 const struct tgsi_full_src_register *src2, 3064 boolean saturate) 3065{ 3066 begin_emit_instruction(emit); 3067 emit_opcode(emit, opcode, saturate); 3068 emit_dst_register(emit, dst); 3069 emit_src_register(emit, src1); 3070 emit_src_register(emit, src2); 3071 end_emit_instruction(emit); 3072} 3073 3074static void 3075emit_instruction_op3(struct svga_shader_emitter_v10 *emit, 3076 unsigned opcode, 3077 const struct tgsi_full_dst_register *dst, 3078 const struct tgsi_full_src_register *src1, 3079 const struct tgsi_full_src_register *src2, 3080 const struct tgsi_full_src_register *src3, 3081 boolean saturate) 3082{ 3083 begin_emit_instruction(emit); 3084 emit_opcode(emit, opcode, saturate); 3085 emit_dst_register(emit, dst); 3086 emit_src_register(emit, src1); 3087 emit_src_register(emit, src2); 3088 emit_src_register(emit, src3); 3089 end_emit_instruction(emit); 3090} 3091 3092/** 3093 * Emit the actual clip distance instructions to be used for clipping 3094 * by copying the clip distance from the temporary registers to the 3095 * CLIPDIST registers written with the enabled planes mask. 3096 * Also copy the clip distance from the temporary to the clip distance 3097 * shadow copy register which will be referenced by the input shader 3098 */ 3099static void 3100emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) 3101{ 3102 struct tgsi_full_src_register tmp_clip_dist_src; 3103 struct tgsi_full_dst_register clip_dist_dst; 3104 3105 unsigned i; 3106 unsigned clip_plane_enable = emit->key.clip_plane_enable; 3107 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; 3108 int num_written_clipdist = emit->info.num_written_clipdistance; 3109 3110 assert(emit->clip_dist_out_index != INVALID_INDEX); 3111 assert(emit->clip_dist_tmp_index != INVALID_INDEX); 3112 3113 /** 3114 * Temporary reset the temporary clip dist register index so 3115 * that the copy to the real clip dist register will not 3116 * attempt to copy to the temporary register again 3117 */ 3118 emit->clip_dist_tmp_index = INVALID_INDEX; 3119 3120 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) { 3121 3122 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); 3123 3124 /** 3125 * copy to the shadow copy for use by varying variable and 3126 * stream output. All clip distances 3127 * will be written regardless of the enabled clipping planes. 3128 */ 3129 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3130 emit->clip_dist_so_index + i); 3131 3132 /* MOV clip_dist_so, tmp_clip_dist */ 3133 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3134 &tmp_clip_dist_src, FALSE); 3135 3136 /** 3137 * copy those clip distances to enabled clipping planes 3138 * to CLIPDIST registers for clipping 3139 */ 3140 if (clip_plane_enable & 0xf) { 3141 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3142 emit->clip_dist_out_index + i); 3143 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf); 3144 3145 /* MOV CLIPDIST, tmp_clip_dist */ 3146 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3147 &tmp_clip_dist_src, FALSE); 3148 } 3149 /* four clip planes per clip register */ 3150 clip_plane_enable >>= 4; 3151 } 3152 /** 3153 * set the temporary clip dist register index back to the 3154 * temporary index for the next vertex 3155 */ 3156 emit->clip_dist_tmp_index = clip_dist_tmp_index; 3157} 3158 3159/* Declare clip distance output registers for user-defined clip planes 3160 * or the TGSI_CLIPVERTEX output. 3161 */ 3162static void 3163emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) 3164{ 3165 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3166 unsigned index = emit->num_outputs; 3167 unsigned plane_mask; 3168 3169 assert(emit->unit == PIPE_SHADER_VERTEX || 3170 emit->unit == PIPE_SHADER_GEOMETRY); 3171 assert(num_clip_planes <= 8); 3172 3173 if (emit->clip_mode != CLIP_LEGACY && 3174 emit->clip_mode != CLIP_VERTEX) { 3175 return; 3176 } 3177 3178 if (num_clip_planes == 0) 3179 return; 3180 3181 /* Declare one or two clip output registers. The number of components 3182 * in the mask reflects the number of clip planes. For example, if 5 3183 * clip planes are needed, we'll declare outputs similar to: 3184 * dcl_output_siv o2.xyzw, clip_distance 3185 * dcl_output_siv o3.x, clip_distance 3186 */ 3187 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */ 3188 3189 plane_mask = (1 << num_clip_planes) - 1; 3190 if (plane_mask & 0xf) { 3191 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3192 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, 3193 VGPU10_NAME_CLIP_DISTANCE, cmask); 3194 emit->num_outputs++; 3195 } 3196 if (plane_mask & 0xf0) { 3197 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3198 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, 3199 VGPU10_NAME_CLIP_DISTANCE, cmask); 3200 emit->num_outputs++; 3201 } 3202} 3203 3204 3205/** 3206 * Emit the instructions for writing to the clip distance registers 3207 * to handle legacy/automatic clip planes. 3208 * For each clip plane, the distance is the dot product of the vertex 3209 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients. 3210 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE 3211 * output registers already declared. 3212 */ 3213static void 3214emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, 3215 unsigned vpos_tmp_index) 3216{ 3217 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3218 3219 assert(emit->clip_mode == CLIP_LEGACY); 3220 assert(num_clip_planes <= 8); 3221 3222 assert(emit->unit == PIPE_SHADER_VERTEX || 3223 emit->unit == PIPE_SHADER_GEOMETRY); 3224 3225 for (i = 0; i < num_clip_planes; i++) { 3226 struct tgsi_full_dst_register dst; 3227 struct tgsi_full_src_register plane_src, vpos_src; 3228 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3229 unsigned comp = i % 4; 3230 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3231 3232 /* create dst, src regs */ 3233 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3234 dst = writemask_dst(&dst, writemask); 3235 3236 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3237 vpos_src = make_src_temp_reg(vpos_tmp_index); 3238 3239 /* DP4 clip_dist, plane, vpos */ 3240 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3241 &plane_src, &vpos_src, FALSE); 3242 } 3243} 3244 3245 3246/** 3247 * Emit the instructions for computing the clip distance results from 3248 * the clip vertex temporary. 3249 * For each clip plane, the distance is the dot product of the clip vertex 3250 * position (found in a temp reg) and the clip plane coefficients. 3251 */ 3252static void 3253emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) 3254{ 3255 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable); 3256 unsigned i; 3257 struct tgsi_full_dst_register dst; 3258 struct tgsi_full_src_register clipvert_src; 3259 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index; 3260 3261 assert(emit->unit == PIPE_SHADER_VERTEX || 3262 emit->unit == PIPE_SHADER_GEOMETRY); 3263 3264 assert(emit->clip_mode == CLIP_VERTEX); 3265 3266 clipvert_src = make_src_temp_reg(clip_vertex_tmp); 3267 3268 for (i = 0; i < num_clip; i++) { 3269 struct tgsi_full_src_register plane_src; 3270 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3271 unsigned comp = i % 4; 3272 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3273 3274 /* create dst, src regs */ 3275 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3276 dst = writemask_dst(&dst, writemask); 3277 3278 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3279 3280 /* DP4 clip_dist, plane, vpos */ 3281 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3282 &plane_src, &clipvert_src, FALSE); 3283 } 3284 3285 /* copy temporary clip vertex register to the clip vertex register */ 3286 3287 assert(emit->clip_vertex_out_index != INVALID_INDEX); 3288 3289 /** 3290 * temporary reset the temporary clip vertex register index so 3291 * that copy to the clip vertex register will not attempt 3292 * to copy to the temporary register again 3293 */ 3294 emit->clip_vertex_tmp_index = INVALID_INDEX; 3295 3296 /* MOV clip_vertex, clip_vertex_tmp */ 3297 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index); 3298 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 3299 &dst, &clipvert_src, FALSE); 3300 3301 /** 3302 * set the temporary clip vertex register index back to the 3303 * temporary index for the next vertex 3304 */ 3305 emit->clip_vertex_tmp_index = clip_vertex_tmp; 3306} 3307 3308/** 3309 * Emit code to convert RGBA to BGRA 3310 */ 3311static void 3312emit_swap_r_b(struct svga_shader_emitter_v10 *emit, 3313 const struct tgsi_full_dst_register *dst, 3314 const struct tgsi_full_src_register *src) 3315{ 3316 struct tgsi_full_src_register bgra_src = 3317 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W); 3318 3319 begin_emit_instruction(emit); 3320 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 3321 emit_dst_register(emit, dst); 3322 emit_src_register(emit, &bgra_src); 3323 end_emit_instruction(emit); 3324} 3325 3326 3327/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */ 3328static void 3329emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit, 3330 const struct tgsi_full_dst_register *dst, 3331 const struct tgsi_full_src_register *src) 3332{ 3333 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f); 3334 struct tgsi_full_src_register two = 3335 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f); 3336 struct tgsi_full_src_register neg_two = 3337 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 3338 3339 unsigned val_tmp = get_temp_index(emit); 3340 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp); 3341 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp); 3342 3343 unsigned bias_tmp = get_temp_index(emit); 3344 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp); 3345 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp); 3346 3347 /* val = src * 2.0 */ 3348 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, 3349 src, &two, FALSE); 3350 3351 /* bias = src > 0.5 */ 3352 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, 3353 src, &half, FALSE); 3354 3355 /* bias = bias & -2.0 */ 3356 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst, 3357 &bias_src, &neg_two, FALSE); 3358 3359 /* dst = val + bias */ 3360 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst, 3361 &val_src, &bias_src, FALSE); 3362 3363 free_temp_indexes(emit); 3364} 3365 3366 3367/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */ 3368static void 3369emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit, 3370 const struct tgsi_full_dst_register *dst, 3371 const struct tgsi_full_src_register *src) 3372{ 3373 struct tgsi_full_src_register scale = 3374 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f); 3375 3376 /* dst = src * scale */ 3377 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE); 3378} 3379 3380 3381/** Convert from R32_UINT to 10_10_10_2_sscaled */ 3382static void 3383emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, 3384 const struct tgsi_full_dst_register *dst, 3385 const struct tgsi_full_src_register *src) 3386{ 3387 struct tgsi_full_src_register lshift = 3388 make_immediate_reg_int4(emit, 22, 12, 2, 0); 3389 struct tgsi_full_src_register rshift = 3390 make_immediate_reg_int4(emit, 22, 22, 22, 30); 3391 3392 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X); 3393 3394 unsigned tmp = get_temp_index(emit); 3395 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3396 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3397 3398 /* 3399 * r = (pixel << 22) >> 22; # signed int in [511, -512] 3400 * g = (pixel << 12) >> 22; # signed int in [511, -512] 3401 * b = (pixel << 2) >> 22; # signed int in [511, -512] 3402 * a = (pixel << 0) >> 30; # signed int in [1, -2] 3403 * dst = i_to_f(r,g,b,a); # convert to float 3404 */ 3405 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst, 3406 &src_xxxx, &lshift, FALSE); 3407 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst, 3408 &tmp_src, &rshift, FALSE); 3409 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE); 3410 3411 free_temp_indexes(emit); 3412} 3413 3414 3415/** 3416 * Emit code for TGSI_OPCODE_ABS instruction. 3417 */ 3418static boolean 3419emit_abs(struct svga_shader_emitter_v10 *emit, 3420 const struct tgsi_full_instruction *inst) 3421{ 3422 /* dst = ABS(s0): 3423 * dst = abs(s0) 3424 * Translates into: 3425 * MOV dst, abs(s0) 3426 */ 3427 struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]); 3428 3429 /* MOV dst, abs(s0) */ 3430 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 3431 &abs_src0, inst->Instruction.Saturate); 3432 3433 return TRUE; 3434} 3435 3436 3437/** 3438 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction. 3439 */ 3440static boolean 3441emit_arl_uarl(struct svga_shader_emitter_v10 *emit, 3442 const struct tgsi_full_instruction *inst) 3443{ 3444 unsigned index = inst->Dst[0].Register.Index; 3445 struct tgsi_full_dst_register dst; 3446 unsigned opcode; 3447 3448 assert(index < MAX_VGPU10_ADDR_REGS); 3449 dst = make_dst_temp_reg(emit->address_reg_index[index]); 3450 3451 /* ARL dst, s0 3452 * Translates into: 3453 * FTOI address_tmp, s0 3454 * 3455 * UARL dst, s0 3456 * Translates into: 3457 * MOV address_tmp, s0 3458 */ 3459 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL) 3460 opcode = VGPU10_OPCODE_FTOI; 3461 else 3462 opcode = VGPU10_OPCODE_MOV; 3463 3464 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE); 3465 3466 return TRUE; 3467} 3468 3469 3470/** 3471 * Emit code for TGSI_OPCODE_CAL instruction. 3472 */ 3473static boolean 3474emit_cal(struct svga_shader_emitter_v10 *emit, 3475 const struct tgsi_full_instruction *inst) 3476{ 3477 unsigned label = inst->Label.Label; 3478 VGPU10OperandToken0 operand; 3479 operand.value = 0; 3480 operand.operandType = VGPU10_OPERAND_TYPE_LABEL; 3481 3482 begin_emit_instruction(emit); 3483 emit_dword(emit, operand.value); 3484 emit_dword(emit, label); 3485 end_emit_instruction(emit); 3486 3487 return TRUE; 3488} 3489 3490 3491/** 3492 * Emit code for TGSI_OPCODE_IABS instruction. 3493 */ 3494static boolean 3495emit_iabs(struct svga_shader_emitter_v10 *emit, 3496 const struct tgsi_full_instruction *inst) 3497{ 3498 /* dst.x = (src0.x < 0) ? -src0.x : src0.x 3499 * dst.y = (src0.y < 0) ? -src0.y : src0.y 3500 * dst.z = (src0.z < 0) ? -src0.z : src0.z 3501 * dst.w = (src0.w < 0) ? -src0.w : src0.w 3502 * 3503 * Translates into 3504 * IMAX dst, src, neg(src) 3505 */ 3506 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); 3507 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], 3508 &inst->Src[0], &neg_src, FALSE); 3509 3510 return TRUE; 3511} 3512 3513 3514/** 3515 * Emit code for TGSI_OPCODE_CMP instruction. 3516 */ 3517static boolean 3518emit_cmp(struct svga_shader_emitter_v10 *emit, 3519 const struct tgsi_full_instruction *inst) 3520{ 3521 /* dst.x = (src0.x < 0) ? src1.x : src2.x 3522 * dst.y = (src0.y < 0) ? src1.y : src2.y 3523 * dst.z = (src0.z < 0) ? src1.z : src2.z 3524 * dst.w = (src0.w < 0) ? src1.w : src2.w 3525 * 3526 * Translates into 3527 * LT tmp, src0, 0.0 3528 * MOVC dst, tmp, src1, src2 3529 */ 3530 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3531 unsigned tmp = get_temp_index(emit); 3532 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3533 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3534 3535 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, 3536 &inst->Src[0], &zero, FALSE); 3537 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], 3538 &tmp_src, &inst->Src[1], &inst->Src[2], 3539 inst->Instruction.Saturate); 3540 3541 free_temp_indexes(emit); 3542 3543 return TRUE; 3544} 3545 3546 3547/** 3548 * Emit code for TGSI_OPCODE_DP2A instruction. 3549 */ 3550static boolean 3551emit_dp2a(struct svga_shader_emitter_v10 *emit, 3552 const struct tgsi_full_instruction *inst) 3553{ 3554 /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x 3555 * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x 3556 * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x 3557 * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x 3558 * Translate into 3559 * MAD tmp.x, s0.y, s1.y, s2.x 3560 * MAD tmp.x, s0.x, s1.x, tmp.x 3561 * MOV dst.xyzw, tmp.xxxx 3562 */ 3563 unsigned tmp = get_temp_index(emit); 3564 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3565 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3566 3567 struct tgsi_full_src_register tmp_src_xxxx = 3568 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3569 struct tgsi_full_dst_register tmp_dst_x = 3570 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3571 3572 struct tgsi_full_src_register src0_xxxx = 3573 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 3574 struct tgsi_full_src_register src0_yyyy = 3575 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 3576 struct tgsi_full_src_register src1_xxxx = 3577 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 3578 struct tgsi_full_src_register src1_yyyy = 3579 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 3580 struct tgsi_full_src_register src2_xxxx = 3581 scalar_src(&inst->Src[2], TGSI_SWIZZLE_X); 3582 3583 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy, 3584 &src1_yyyy, &src2_xxxx, FALSE); 3585 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx, 3586 &src1_xxxx, &tmp_src_xxxx, FALSE); 3587 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 3588 &tmp_src_xxxx, inst->Instruction.Saturate); 3589 3590 free_temp_indexes(emit); 3591 3592 return TRUE; 3593} 3594 3595 3596/** 3597 * Emit code for TGSI_OPCODE_DPH instruction. 3598 */ 3599static boolean 3600emit_dph(struct svga_shader_emitter_v10 *emit, 3601 const struct tgsi_full_instruction *inst) 3602{ 3603 /* 3604 * DP3 tmp, s0, s1 3605 * ADD dst, tmp, s1.wwww 3606 */ 3607 3608 struct tgsi_full_src_register s1_wwww = 3609 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, 3610 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 3611 3612 unsigned tmp = get_temp_index(emit); 3613 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3614 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3615 3616 /* DP3 tmp, s0, s1 */ 3617 emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0], 3618 &inst->Src[1], FALSE); 3619 3620 /* ADD dst, tmp, s1.wwww */ 3621 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src, 3622 &s1_wwww, inst->Instruction.Saturate); 3623 3624 free_temp_indexes(emit); 3625 3626 return TRUE; 3627} 3628 3629 3630/** 3631 * Emit code for TGSI_OPCODE_DST instruction. 3632 */ 3633static boolean 3634emit_dst(struct svga_shader_emitter_v10 *emit, 3635 const struct tgsi_full_instruction *inst) 3636{ 3637 /* 3638 * dst.x = 1 3639 * dst.y = src0.y * src1.y 3640 * dst.z = src0.z 3641 * dst.w = src1.w 3642 */ 3643 3644 struct tgsi_full_src_register s0_yyyy = 3645 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 3646 struct tgsi_full_src_register s0_zzzz = 3647 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 3648 struct tgsi_full_src_register s1_yyyy = 3649 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 3650 struct tgsi_full_src_register s1_wwww = 3651 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W); 3652 3653 /* 3654 * If dst and either src0 and src1 are the same we need 3655 * to create a temporary for it and insert a extra move. 3656 */ 3657 unsigned tmp_move = get_temp_index(emit); 3658 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3659 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3660 3661 /* MOV dst.x, 1.0 */ 3662 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3663 struct tgsi_full_dst_register dst_x = 3664 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3665 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3666 3667 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 3668 } 3669 3670 /* MUL dst.y, s0.y, s1.y */ 3671 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3672 struct tgsi_full_dst_register dst_y = 3673 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3674 3675 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, 3676 &s1_yyyy, inst->Instruction.Saturate); 3677 } 3678 3679 /* MOV dst.z, s0.z */ 3680 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3681 struct tgsi_full_dst_register dst_z = 3682 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3683 3684 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz, 3685 inst->Instruction.Saturate); 3686 } 3687 3688 /* MOV dst.w, s1.w */ 3689 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3690 struct tgsi_full_dst_register dst_w = 3691 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3692 3693 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww, 3694 inst->Instruction.Saturate); 3695 } 3696 3697 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3698 FALSE); 3699 free_temp_indexes(emit); 3700 3701 return TRUE; 3702} 3703 3704 3705 3706/** 3707 * Emit code for TGSI_OPCODE_ENDPRIM (GS only) 3708 */ 3709static boolean 3710emit_endprim(struct svga_shader_emitter_v10 *emit, 3711 const struct tgsi_full_instruction *inst) 3712{ 3713 assert(emit->unit == PIPE_SHADER_GEOMETRY); 3714 3715 /* We can't use emit_simple() because the TGSI instruction has one 3716 * operand (vertex stream number) which we must ignore for VGPU10. 3717 */ 3718 begin_emit_instruction(emit); 3719 emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); 3720 end_emit_instruction(emit); 3721 return TRUE; 3722} 3723 3724 3725/** 3726 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction. 3727 */ 3728static boolean 3729emit_ex2(struct svga_shader_emitter_v10 *emit, 3730 const struct tgsi_full_instruction *inst) 3731{ 3732 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x 3733 * while VGPU10 computes four values. 3734 * 3735 * dst = EX2(src): 3736 * dst.xyzw = 2.0 ^ src.x 3737 */ 3738 3739 struct tgsi_full_src_register src_xxxx = 3740 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3741 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3742 3743 /* EXP tmp, s0.xxxx */ 3744 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, 3745 inst->Instruction.Saturate); 3746 3747 return TRUE; 3748} 3749 3750 3751/** 3752 * Emit code for TGSI_OPCODE_EXP instruction. 3753 */ 3754static boolean 3755emit_exp(struct svga_shader_emitter_v10 *emit, 3756 const struct tgsi_full_instruction *inst) 3757{ 3758 /* 3759 * dst.x = 2 ^ floor(s0.x) 3760 * dst.y = s0.x - floor(s0.x) 3761 * dst.z = 2 ^ s0.x 3762 * dst.w = 1.0 3763 */ 3764 3765 struct tgsi_full_src_register src_xxxx = 3766 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 3767 unsigned tmp = get_temp_index(emit); 3768 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3769 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3770 3771 /* 3772 * If dst and src are the same we need to create 3773 * a temporary for it and insert a extra move. 3774 */ 3775 unsigned tmp_move = get_temp_index(emit); 3776 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3777 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3778 3779 /* only use X component of temp reg */ 3780 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3781 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3782 3783 /* ROUND_NI tmp.x, s0.x */ 3784 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 3785 &src_xxxx, FALSE); /* round to -infinity */ 3786 3787 /* EXP dst.x, tmp.x */ 3788 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3789 struct tgsi_full_dst_register dst_x = 3790 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3791 3792 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, 3793 inst->Instruction.Saturate); 3794 } 3795 3796 /* ADD dst.y, s0.x, -tmp */ 3797 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3798 struct tgsi_full_dst_register dst_y = 3799 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3800 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src); 3801 3802 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, 3803 &neg_tmp_src, inst->Instruction.Saturate); 3804 } 3805 3806 /* EXP dst.z, s0.x */ 3807 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3808 struct tgsi_full_dst_register dst_z = 3809 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3810 3811 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, 3812 inst->Instruction.Saturate); 3813 } 3814 3815 /* MOV dst.w, 1.0 */ 3816 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3817 struct tgsi_full_dst_register dst_w = 3818 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3819 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3820 3821 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, 3822 FALSE); 3823 } 3824 3825 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3826 FALSE); 3827 3828 free_temp_indexes(emit); 3829 3830 return TRUE; 3831} 3832 3833 3834/** 3835 * Emit code for TGSI_OPCODE_IF instruction. 3836 */ 3837static boolean 3838emit_if(struct svga_shader_emitter_v10 *emit, 3839 const struct tgsi_full_instruction *inst) 3840{ 3841 VGPU10OpcodeToken0 opcode0; 3842 3843 /* The src register should be a scalar */ 3844 assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY && 3845 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ && 3846 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW); 3847 3848 /* The only special thing here is that we need to set the 3849 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if 3850 * src.x is non-zero. 3851 */ 3852 opcode0.value = 0; 3853 opcode0.opcodeType = VGPU10_OPCODE_IF; 3854 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 3855 3856 begin_emit_instruction(emit); 3857 emit_dword(emit, opcode0.value); 3858 emit_src_register(emit, &inst->Src[0]); 3859 end_emit_instruction(emit); 3860 3861 return TRUE; 3862} 3863 3864 3865/** 3866 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of 3867 * the register components are negative). 3868 */ 3869static boolean 3870emit_kill_if(struct svga_shader_emitter_v10 *emit, 3871 const struct tgsi_full_instruction *inst) 3872{ 3873 unsigned tmp = get_temp_index(emit); 3874 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3875 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3876 3877 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3878 3879 struct tgsi_full_dst_register tmp_dst_x = 3880 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3881 struct tgsi_full_src_register tmp_src_xxxx = 3882 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3883 3884 /* tmp = src[0] < 0.0 */ 3885 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 3886 &zero, FALSE); 3887 3888 if (!same_swizzle_terms(&inst->Src[0])) { 3889 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to 3890 * logically OR the swizzle terms. Most uses of KILL_IF only 3891 * test one channel so it's good to avoid these extra steps. 3892 */ 3893 struct tgsi_full_src_register tmp_src_yyyy = 3894 scalar_src(&tmp_src, TGSI_SWIZZLE_Y); 3895 struct tgsi_full_src_register tmp_src_zzzz = 3896 scalar_src(&tmp_src, TGSI_SWIZZLE_Z); 3897 struct tgsi_full_src_register tmp_src_wwww = 3898 scalar_src(&tmp_src, TGSI_SWIZZLE_W); 3899 3900 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3901 &tmp_src_yyyy, FALSE); 3902 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3903 &tmp_src_zzzz, FALSE); 3904 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3905 &tmp_src_wwww, FALSE); 3906 } 3907 3908 begin_emit_instruction(emit); 3909 emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */ 3910 emit_src_register(emit, &tmp_src_xxxx); 3911 end_emit_instruction(emit); 3912 3913 free_temp_indexes(emit); 3914 3915 return TRUE; 3916} 3917 3918 3919/** 3920 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard). 3921 */ 3922static boolean 3923emit_kill(struct svga_shader_emitter_v10 *emit, 3924 const struct tgsi_full_instruction *inst) 3925{ 3926 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3927 3928 /* DISCARD if 0.0 is zero */ 3929 begin_emit_instruction(emit); 3930 emit_discard_opcode(emit, FALSE); 3931 emit_src_register(emit, &zero); 3932 end_emit_instruction(emit); 3933 3934 return TRUE; 3935} 3936 3937 3938/** 3939 * Emit code for TGSI_OPCODE_LG2 instruction. 3940 */ 3941static boolean 3942emit_lg2(struct svga_shader_emitter_v10 *emit, 3943 const struct tgsi_full_instruction *inst) 3944{ 3945 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x 3946 * while VGPU10 computes four values. 3947 * 3948 * dst = LG2(src): 3949 * dst.xyzw = log2(src.x) 3950 */ 3951 3952 struct tgsi_full_src_register src_xxxx = 3953 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3954 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3955 3956 /* LOG tmp, s0.xxxx */ 3957 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx, 3958 inst->Instruction.Saturate); 3959 3960 return TRUE; 3961} 3962 3963 3964/** 3965 * Emit code for TGSI_OPCODE_LIT instruction. 3966 */ 3967static boolean 3968emit_lit(struct svga_shader_emitter_v10 *emit, 3969 const struct tgsi_full_instruction *inst) 3970{ 3971 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3972 3973 /* 3974 * If dst and src are the same we need to create 3975 * a temporary for it and insert a extra move. 3976 */ 3977 unsigned tmp_move = get_temp_index(emit); 3978 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3979 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3980 3981 /* 3982 * dst.x = 1 3983 * dst.y = max(src.x, 0) 3984 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 3985 * dst.w = 1 3986 */ 3987 3988 /* MOV dst.x, 1.0 */ 3989 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3990 struct tgsi_full_dst_register dst_x = 3991 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3992 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 3993 } 3994 3995 /* MOV dst.w, 1.0 */ 3996 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3997 struct tgsi_full_dst_register dst_w = 3998 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3999 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 4000 } 4001 4002 /* MAX dst.y, src.x, 0.0 */ 4003 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4004 struct tgsi_full_dst_register dst_y = 4005 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 4006 struct tgsi_full_src_register zero = 4007 make_immediate_reg_float(emit, 0.0f); 4008 struct tgsi_full_src_register src_xxxx = 4009 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4010 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4011 4012 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, 4013 &zero, inst->Instruction.Saturate); 4014 } 4015 4016 /* 4017 * tmp1 = clamp(src.w, -128, 128); 4018 * MAX tmp1, src.w, -128 4019 * MIN tmp1, tmp1, 128 4020 * 4021 * tmp2 = max(tmp2, 0); 4022 * MAX tmp2, src.y, 0 4023 * 4024 * tmp1 = pow(tmp2, tmp1); 4025 * LOG tmp2, tmp2 4026 * MUL tmp1, tmp2, tmp1 4027 * EXP tmp1, tmp1 4028 * 4029 * tmp1 = (src.w == 0) ? 1 : tmp1; 4030 * EQ tmp2, 0, src.w 4031 * MOVC tmp1, tmp2, 1.0, tmp1 4032 * 4033 * dst.z = (0 < src.x) ? tmp1 : 0; 4034 * LT tmp2, 0, src.x 4035 * MOVC dst.z, tmp2, tmp1, 0.0 4036 */ 4037 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4038 struct tgsi_full_dst_register dst_z = 4039 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 4040 4041 unsigned tmp1 = get_temp_index(emit); 4042 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4043 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4044 unsigned tmp2 = get_temp_index(emit); 4045 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4046 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4047 4048 struct tgsi_full_src_register src_xxxx = 4049 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4050 struct tgsi_full_src_register src_yyyy = 4051 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 4052 struct tgsi_full_src_register src_wwww = 4053 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 4054 4055 struct tgsi_full_src_register zero = 4056 make_immediate_reg_float(emit, 0.0f); 4057 struct tgsi_full_src_register lowerbound = 4058 make_immediate_reg_float(emit, -128.0f); 4059 struct tgsi_full_src_register upperbound = 4060 make_immediate_reg_float(emit, 128.0f); 4061 4062 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, 4063 &lowerbound, FALSE); 4064 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, 4065 &upperbound, FALSE); 4066 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, 4067 &zero, FALSE); 4068 4069 /* POW tmp1, tmp2, tmp1 */ 4070 /* LOG tmp2, tmp2 */ 4071 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src, 4072 FALSE); 4073 4074 /* MUL tmp1, tmp2, tmp1 */ 4075 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, 4076 &tmp1_src, FALSE); 4077 4078 /* EXP tmp1, tmp1 */ 4079 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src, 4080 FALSE); 4081 4082 /* EQ tmp2, 0, src.w */ 4083 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, 4084 &src_wwww, FALSE); 4085 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ 4086 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, 4087 &tmp2_src, &one, &tmp1_src, FALSE); 4088 4089 /* LT tmp2, 0, src.x */ 4090 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, 4091 &src_xxxx, FALSE); 4092 /* MOVC dst.z, tmp2, tmp1, 0.0 */ 4093 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, 4094 &tmp2_src, &tmp1_src, &zero, FALSE); 4095 } 4096 4097 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 4098 FALSE); 4099 free_temp_indexes(emit); 4100 4101 return TRUE; 4102} 4103 4104 4105/** 4106 * Emit code for TGSI_OPCODE_LOG instruction. 4107 */ 4108static boolean 4109emit_log(struct svga_shader_emitter_v10 *emit, 4110 const struct tgsi_full_instruction *inst) 4111{ 4112 /* 4113 * dst.x = floor(lg2(abs(s0.x))) 4114 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) 4115 * dst.z = lg2(abs(s0.x)) 4116 * dst.w = 1.0 4117 */ 4118 4119 struct tgsi_full_src_register src_xxxx = 4120 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4121 unsigned tmp = get_temp_index(emit); 4122 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4123 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4124 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); 4125 4126 /* only use X component of temp reg */ 4127 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4128 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4129 4130 /* LOG tmp.x, abs(s0.x) */ 4131 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 4132 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, 4133 &abs_src_xxxx, FALSE); 4134 } 4135 4136 /* MOV dst.z, tmp.x */ 4137 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4138 struct tgsi_full_dst_register dst_z = 4139 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); 4140 4141 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, 4142 &tmp_src, inst->Instruction.Saturate); 4143 } 4144 4145 /* FLR tmp.x, tmp.x */ 4146 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { 4147 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 4148 &tmp_src, FALSE); 4149 } 4150 4151 /* MOV dst.x, tmp.x */ 4152 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4153 struct tgsi_full_dst_register dst_x = 4154 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 4155 4156 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src, 4157 inst->Instruction.Saturate); 4158 } 4159 4160 /* EXP tmp.x, tmp.x */ 4161 /* DIV dst.y, abs(s0.x), tmp.x */ 4162 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4163 struct tgsi_full_dst_register dst_y = 4164 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 4165 4166 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src, 4167 FALSE); 4168 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, 4169 &tmp_src, inst->Instruction.Saturate); 4170 } 4171 4172 /* MOV dst.w, 1.0 */ 4173 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4174 struct tgsi_full_dst_register dst_w = 4175 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); 4176 struct tgsi_full_src_register one = 4177 make_immediate_reg_float(emit, 1.0f); 4178 4179 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 4180 } 4181 4182 free_temp_indexes(emit); 4183 4184 return TRUE; 4185} 4186 4187 4188/** 4189 * Emit code for TGSI_OPCODE_LRP instruction. 4190 */ 4191static boolean 4192emit_lrp(struct svga_shader_emitter_v10 *emit, 4193 const struct tgsi_full_instruction *inst) 4194{ 4195 /* dst = LRP(s0, s1, s2): 4196 * dst = s0 * (s1 - s2) + s2 4197 * Translates into: 4198 * SUB tmp, s1, s2; tmp = s1 - s2 4199 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 4200 */ 4201 unsigned tmp = get_temp_index(emit); 4202 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); 4203 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); 4204 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); 4205 4206 /* ADD tmp, s1, -s2 */ 4207 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp, 4208 &inst->Src[1], &neg_src2, FALSE); 4209 4210 /* MAD dst, s1, tmp, s3 */ 4211 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], 4212 &inst->Src[0], &src_tmp, &inst->Src[2], 4213 inst->Instruction.Saturate); 4214 4215 free_temp_indexes(emit); 4216 4217 return TRUE; 4218} 4219 4220 4221/** 4222 * Emit code for TGSI_OPCODE_POW instruction. 4223 */ 4224static boolean 4225emit_pow(struct svga_shader_emitter_v10 *emit, 4226 const struct tgsi_full_instruction *inst) 4227{ 4228 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and 4229 * src1.x while VGPU10 computes four values. 4230 * 4231 * dst = POW(src0, src1): 4232 * dst.xyzw = src0.x ^ src1.x 4233 */ 4234 unsigned tmp = get_temp_index(emit); 4235 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4236 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4237 struct tgsi_full_src_register src0_xxxx = 4238 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4239 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4240 struct tgsi_full_src_register src1_xxxx = 4241 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4242 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4243 4244 /* LOG tmp, s0.xxxx */ 4245 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx, 4246 FALSE); 4247 4248 /* MUL tmp, tmp, s1.xxxx */ 4249 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src, 4250 &src1_xxxx, FALSE); 4251 4252 /* EXP tmp, s0.xxxx */ 4253 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], 4254 &tmp_src, inst->Instruction.Saturate); 4255 4256 /* free tmp */ 4257 free_temp_indexes(emit); 4258 4259 return TRUE; 4260} 4261 4262 4263/** 4264 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. 4265 */ 4266static boolean 4267emit_rcp(struct svga_shader_emitter_v10 *emit, 4268 const struct tgsi_full_instruction *inst) 4269{ 4270 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4271 4272 unsigned tmp = get_temp_index(emit); 4273 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4274 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4275 4276 struct tgsi_full_dst_register tmp_dst_x = 4277 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4278 struct tgsi_full_src_register tmp_src_xxxx = 4279 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4280 4281 /* DIV tmp.x, 1.0, s0 */ 4282 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one, 4283 &inst->Src[0], FALSE); 4284 4285 /* MOV dst, tmp.xxxx */ 4286 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4287 &tmp_src_xxxx, inst->Instruction.Saturate); 4288 4289 free_temp_indexes(emit); 4290 4291 return TRUE; 4292} 4293 4294 4295/** 4296 * Emit code for TGSI_OPCODE_RSQ instruction. 4297 */ 4298static boolean 4299emit_rsq(struct svga_shader_emitter_v10 *emit, 4300 const struct tgsi_full_instruction *inst) 4301{ 4302 /* dst = RSQ(src): 4303 * dst.xyzw = 1 / sqrt(src.x) 4304 * Translates into: 4305 * RSQ tmp, src.x 4306 * MOV dst, tmp.xxxx 4307 */ 4308 4309 unsigned tmp = get_temp_index(emit); 4310 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4311 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4312 4313 struct tgsi_full_dst_register tmp_dst_x = 4314 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4315 struct tgsi_full_src_register tmp_src_xxxx = 4316 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4317 4318 /* RSQ tmp, src.x */ 4319 emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x, 4320 &inst->Src[0], FALSE); 4321 4322 /* MOV dst, tmp.xxxx */ 4323 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4324 &tmp_src_xxxx, inst->Instruction.Saturate); 4325 4326 /* free tmp */ 4327 free_temp_indexes(emit); 4328 4329 return TRUE; 4330} 4331 4332 4333/** 4334 * Emit code for TGSI_OPCODE_SCS instruction. 4335 */ 4336static boolean 4337emit_scs(struct svga_shader_emitter_v10 *emit, 4338 const struct tgsi_full_instruction *inst) 4339{ 4340 /* dst.x = cos(src.x) 4341 * dst.y = sin(src.x) 4342 * dst.z = 0.0 4343 * dst.w = 1.0 4344 */ 4345 struct tgsi_full_dst_register dst_x = 4346 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 4347 struct tgsi_full_dst_register dst_y = 4348 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 4349 struct tgsi_full_dst_register dst_zw = 4350 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW); 4351 4352 struct tgsi_full_src_register zero_one = 4353 make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f); 4354 4355 begin_emit_instruction(emit); 4356 emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate); 4357 emit_dst_register(emit, &dst_y); 4358 emit_dst_register(emit, &dst_x); 4359 emit_src_register(emit, &inst->Src[0]); 4360 end_emit_instruction(emit); 4361 4362 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 4363 &dst_zw, &zero_one, inst->Instruction.Saturate); 4364 4365 return TRUE; 4366} 4367 4368 4369/** 4370 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. 4371 */ 4372static boolean 4373emit_seq(struct svga_shader_emitter_v10 *emit, 4374 const struct tgsi_full_instruction *inst) 4375{ 4376 /* dst = SEQ(s0, s1): 4377 * dst = s0 == s1 ? 1.0 : 0.0 (per component) 4378 * Translates into: 4379 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4380 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4381 */ 4382 unsigned tmp = get_temp_index(emit); 4383 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4384 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4385 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4386 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4387 4388 /* EQ tmp, s0, s1 */ 4389 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], 4390 &inst->Src[1], FALSE); 4391 4392 /* MOVC dst, tmp, one, zero */ 4393 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4394 &one, &zero, FALSE); 4395 4396 free_temp_indexes(emit); 4397 4398 return TRUE; 4399} 4400 4401 4402/** 4403 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. 4404 */ 4405static boolean 4406emit_sge(struct svga_shader_emitter_v10 *emit, 4407 const struct tgsi_full_instruction *inst) 4408{ 4409 /* dst = SGE(s0, s1): 4410 * dst = s0 >= s1 ? 1.0 : 0.0 (per component) 4411 * Translates into: 4412 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) 4413 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4414 */ 4415 unsigned tmp = get_temp_index(emit); 4416 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4417 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4418 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4419 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4420 4421 /* GE tmp, s0, s1 */ 4422 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], 4423 &inst->Src[1], FALSE); 4424 4425 /* MOVC dst, tmp, one, zero */ 4426 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4427 &one, &zero, FALSE); 4428 4429 free_temp_indexes(emit); 4430 4431 return TRUE; 4432} 4433 4434 4435/** 4436 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. 4437 */ 4438static boolean 4439emit_sgt(struct svga_shader_emitter_v10 *emit, 4440 const struct tgsi_full_instruction *inst) 4441{ 4442 /* dst = SGT(s0, s1): 4443 * dst = s0 > s1 ? 1.0 : 0.0 (per component) 4444 * Translates into: 4445 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) 4446 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4447 */ 4448 unsigned tmp = get_temp_index(emit); 4449 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4450 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4451 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4452 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4453 4454 /* LT tmp, s1, s0 */ 4455 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], 4456 &inst->Src[0], FALSE); 4457 4458 /* MOVC dst, tmp, one, zero */ 4459 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4460 &one, &zero, FALSE); 4461 4462 free_temp_indexes(emit); 4463 4464 return TRUE; 4465} 4466 4467 4468/** 4469 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. 4470 */ 4471static boolean 4472emit_sincos(struct svga_shader_emitter_v10 *emit, 4473 const struct tgsi_full_instruction *inst) 4474{ 4475 unsigned tmp = get_temp_index(emit); 4476 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4477 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4478 4479 struct tgsi_full_src_register tmp_src_xxxx = 4480 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4481 struct tgsi_full_dst_register tmp_dst_x = 4482 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4483 4484 begin_emit_instruction(emit); 4485 emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); 4486 4487 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) 4488 { 4489 emit_dst_register(emit, &tmp_dst_x); /* first destination register */ 4490 emit_null_dst_register(emit); /* second destination register */ 4491 } 4492 else { 4493 emit_null_dst_register(emit); 4494 emit_dst_register(emit, &tmp_dst_x); 4495 } 4496 4497 emit_src_register(emit, &inst->Src[0]); 4498 end_emit_instruction(emit); 4499 4500 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4501 &tmp_src_xxxx, inst->Instruction.Saturate); 4502 4503 free_temp_indexes(emit); 4504 4505 return TRUE; 4506} 4507 4508 4509/** 4510 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. 4511 */ 4512static boolean 4513emit_sle(struct svga_shader_emitter_v10 *emit, 4514 const struct tgsi_full_instruction *inst) 4515{ 4516 /* dst = SLE(s0, s1): 4517 * dst = s0 <= s1 ? 1.0 : 0.0 (per component) 4518 * Translates into: 4519 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) 4520 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4521 */ 4522 unsigned tmp = get_temp_index(emit); 4523 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4524 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4525 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4526 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4527 4528 /* GE tmp, s1, s0 */ 4529 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], 4530 &inst->Src[0], FALSE); 4531 4532 /* MOVC dst, tmp, one, zero */ 4533 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4534 &one, &zero, FALSE); 4535 4536 free_temp_indexes(emit); 4537 4538 return TRUE; 4539} 4540 4541 4542/** 4543 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. 4544 */ 4545static boolean 4546emit_slt(struct svga_shader_emitter_v10 *emit, 4547 const struct tgsi_full_instruction *inst) 4548{ 4549 /* dst = SLT(s0, s1): 4550 * dst = s0 < s1 ? 1.0 : 0.0 (per component) 4551 * Translates into: 4552 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) 4553 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4554 */ 4555 unsigned tmp = get_temp_index(emit); 4556 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4557 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4558 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4559 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4560 4561 /* LT tmp, s0, s1 */ 4562 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 4563 &inst->Src[1], FALSE); 4564 4565 /* MOVC dst, tmp, one, zero */ 4566 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4567 &one, &zero, FALSE); 4568 4569 free_temp_indexes(emit); 4570 4571 return TRUE; 4572} 4573 4574 4575/** 4576 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. 4577 */ 4578static boolean 4579emit_sne(struct svga_shader_emitter_v10 *emit, 4580 const struct tgsi_full_instruction *inst) 4581{ 4582 /* dst = SNE(s0, s1): 4583 * dst = s0 != s1 ? 1.0 : 0.0 (per component) 4584 * Translates into: 4585 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4586 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4587 */ 4588 unsigned tmp = get_temp_index(emit); 4589 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4590 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4591 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4592 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4593 4594 /* NE tmp, s0, s1 */ 4595 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], 4596 &inst->Src[1], FALSE); 4597 4598 /* MOVC dst, tmp, one, zero */ 4599 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4600 &one, &zero, FALSE); 4601 4602 free_temp_indexes(emit); 4603 4604 return TRUE; 4605} 4606 4607 4608/** 4609 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. 4610 */ 4611static boolean 4612emit_ssg(struct svga_shader_emitter_v10 *emit, 4613 const struct tgsi_full_instruction *inst) 4614{ 4615 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 4616 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 4617 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 4618 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 4619 * Translates into: 4620 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) 4621 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) 4622 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) 4623 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) 4624 */ 4625 struct tgsi_full_src_register zero = 4626 make_immediate_reg_float(emit, 0.0f); 4627 struct tgsi_full_src_register one = 4628 make_immediate_reg_float(emit, 1.0f); 4629 struct tgsi_full_src_register neg_one = 4630 make_immediate_reg_float(emit, -1.0f); 4631 4632 unsigned tmp1 = get_temp_index(emit); 4633 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4634 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4635 4636 unsigned tmp2 = get_temp_index(emit); 4637 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4638 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4639 4640 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], 4641 &zero, FALSE); 4642 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, 4643 &neg_one, &zero, FALSE); 4644 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, 4645 &inst->Src[0], FALSE); 4646 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, 4647 &one, &tmp2_src, FALSE); 4648 4649 free_temp_indexes(emit); 4650 4651 return TRUE; 4652} 4653 4654 4655/** 4656 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. 4657 */ 4658static boolean 4659emit_issg(struct svga_shader_emitter_v10 *emit, 4660 const struct tgsi_full_instruction *inst) 4661{ 4662 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 4663 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 4664 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 4665 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 4666 * Translates into: 4667 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) 4668 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) 4669 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) 4670 */ 4671 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4672 4673 unsigned tmp1 = get_temp_index(emit); 4674 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4675 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4676 4677 unsigned tmp2 = get_temp_index(emit); 4678 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4679 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4680 4681 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); 4682 4683 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, 4684 &inst->Src[0], &zero, FALSE); 4685 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, 4686 &zero, &inst->Src[0], FALSE); 4687 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], 4688 &tmp1_src, &neg_tmp2, FALSE); 4689 4690 free_temp_indexes(emit); 4691 4692 return TRUE; 4693} 4694 4695 4696/** 4697 * Emit code for TGSI_OPCODE_SUB instruction. 4698 */ 4699static boolean 4700emit_sub(struct svga_shader_emitter_v10 *emit, 4701 const struct tgsi_full_instruction *inst) 4702{ 4703 /* dst = SUB(s0, s1): 4704 * dst = s0 - s1 4705 * Translates into: 4706 * ADD dst, s0, neg(s1) 4707 */ 4708 struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]); 4709 4710 /* ADD dst, s0, neg(s1) */ 4711 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], 4712 &inst->Src[0], &neg_src1, 4713 inst->Instruction.Saturate); 4714 4715 return TRUE; 4716} 4717 4718 4719/** 4720 * Emit a comparison instruction. The dest register will get 4721 * 0 or ~0 values depending on the outcome of comparing src0 to src1. 4722 */ 4723static void 4724emit_comparison(struct svga_shader_emitter_v10 *emit, 4725 SVGA3dCmpFunc func, 4726 const struct tgsi_full_dst_register *dst, 4727 const struct tgsi_full_src_register *src0, 4728 const struct tgsi_full_src_register *src1) 4729{ 4730 struct tgsi_full_src_register immediate; 4731 VGPU10OpcodeToken0 opcode0; 4732 boolean swapSrc = FALSE; 4733 4734 /* Sanity checks for svga vs. gallium enums */ 4735 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); 4736 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); 4737 4738 opcode0.value = 0; 4739 4740 switch (func) { 4741 case SVGA3D_CMP_NEVER: 4742 immediate = make_immediate_reg_int(emit, 0); 4743 /* MOV dst, {0} */ 4744 begin_emit_instruction(emit); 4745 emit_dword(emit, VGPU10_OPCODE_MOV); 4746 emit_dst_register(emit, dst); 4747 emit_src_register(emit, &immediate); 4748 end_emit_instruction(emit); 4749 return; 4750 case SVGA3D_CMP_ALWAYS: 4751 immediate = make_immediate_reg_int(emit, -1); 4752 /* MOV dst, {-1} */ 4753 begin_emit_instruction(emit); 4754 emit_dword(emit, VGPU10_OPCODE_MOV); 4755 emit_dst_register(emit, dst); 4756 emit_src_register(emit, &immediate); 4757 end_emit_instruction(emit); 4758 return; 4759 case SVGA3D_CMP_LESS: 4760 opcode0.opcodeType = VGPU10_OPCODE_LT; 4761 break; 4762 case SVGA3D_CMP_EQUAL: 4763 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4764 break; 4765 case SVGA3D_CMP_LESSEQUAL: 4766 opcode0.opcodeType = VGPU10_OPCODE_GE; 4767 swapSrc = TRUE; 4768 break; 4769 case SVGA3D_CMP_GREATER: 4770 opcode0.opcodeType = VGPU10_OPCODE_LT; 4771 swapSrc = TRUE; 4772 break; 4773 case SVGA3D_CMP_NOTEQUAL: 4774 opcode0.opcodeType = VGPU10_OPCODE_NE; 4775 break; 4776 case SVGA3D_CMP_GREATEREQUAL: 4777 opcode0.opcodeType = VGPU10_OPCODE_GE; 4778 break; 4779 default: 4780 assert(!"Unexpected comparison mode"); 4781 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4782 } 4783 4784 begin_emit_instruction(emit); 4785 emit_dword(emit, opcode0.value); 4786 emit_dst_register(emit, dst); 4787 if (swapSrc) { 4788 emit_src_register(emit, src1); 4789 emit_src_register(emit, src0); 4790 } 4791 else { 4792 emit_src_register(emit, src0); 4793 emit_src_register(emit, src1); 4794 } 4795 end_emit_instruction(emit); 4796} 4797 4798 4799/** 4800 * Get texel/address offsets for a texture instruction. 4801 */ 4802static void 4803get_texel_offsets(const struct svga_shader_emitter_v10 *emit, 4804 const struct tgsi_full_instruction *inst, int offsets[3]) 4805{ 4806 if (inst->Texture.NumOffsets == 1) { 4807 /* According to OpenGL Shader Language spec the offsets are only 4808 * fetched from a previously-declared immediate/literal. 4809 */ 4810 const struct tgsi_texture_offset *off = inst->TexOffsets; 4811 const unsigned index = off[0].Index; 4812 const unsigned swizzleX = off[0].SwizzleX; 4813 const unsigned swizzleY = off[0].SwizzleY; 4814 const unsigned swizzleZ = off[0].SwizzleZ; 4815 const union tgsi_immediate_data *imm = emit->immediates[index]; 4816 4817 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); 4818 4819 offsets[0] = imm[swizzleX].Int; 4820 offsets[1] = imm[swizzleY].Int; 4821 offsets[2] = imm[swizzleZ].Int; 4822 } 4823 else { 4824 offsets[0] = offsets[1] = offsets[2] = 0; 4825 } 4826} 4827 4828 4829/** 4830 * Set up the coordinate register for texture sampling. 4831 * When we're sampling from a RECT texture we have to scale the 4832 * unnormalized coordinate to a normalized coordinate. 4833 * We do that by multiplying the coordinate by an "extra" constant. 4834 * An alternative would be to use the RESINFO instruction to query the 4835 * texture's size. 4836 */ 4837static struct tgsi_full_src_register 4838setup_texcoord(struct svga_shader_emitter_v10 *emit, 4839 unsigned unit, 4840 const struct tgsi_full_src_register *coord) 4841{ 4842 if (emit->key.tex[unit].unnormalized) { 4843 unsigned scale_index = emit->texcoord_scale_index[unit]; 4844 unsigned tmp = get_temp_index(emit); 4845 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4846 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4847 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); 4848 4849 /* MUL tmp, coord, const[] */ 4850 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, 4851 coord, &scale_src, FALSE); 4852 return tmp_src; 4853 } 4854 else { 4855 /* use texcoord as-is */ 4856 return *coord; 4857 } 4858} 4859 4860 4861/** 4862 * For SAMPLE_C instructions, emit the extra src register which indicates 4863 * the reference/comparision value. 4864 */ 4865static void 4866emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, 4867 unsigned target, 4868 const struct tgsi_full_src_register *coord) 4869{ 4870 struct tgsi_full_src_register coord_src_ref; 4871 unsigned component; 4872 4873 assert(tgsi_is_shadow_target(target)); 4874 4875 assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */ 4876 if (target == TGSI_TEXTURE_SHADOW2D_ARRAY || 4877 target == TGSI_TEXTURE_SHADOWCUBE) 4878 component = TGSI_SWIZZLE_W; 4879 else 4880 component = TGSI_SWIZZLE_Z; 4881 4882 coord_src_ref = scalar_src(coord, component); 4883 4884 emit_src_register(emit, &coord_src_ref); 4885} 4886 4887 4888/** 4889 * Info for implementing texture swizzles. 4890 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() 4891 * functions use this to encapsulate the extra steps needed to perform 4892 * a texture swizzle, or shadow/depth comparisons. 4893 * The shadow/depth comparison is only done here if for the cases where 4894 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). 4895 */ 4896struct tex_swizzle_info 4897{ 4898 boolean swizzled; 4899 boolean shadow_compare; 4900 unsigned unit; 4901 unsigned texture_target; /**< TGSI_TEXTURE_x */ 4902 struct tgsi_full_src_register tmp_src; 4903 struct tgsi_full_dst_register tmp_dst; 4904 const struct tgsi_full_dst_register *inst_dst; 4905 const struct tgsi_full_src_register *coord_src; 4906}; 4907 4908 4909/** 4910 * Do setup for handling texture swizzles or shadow compares. 4911 * \param unit the texture unit 4912 * \param inst the TGSI texture instruction 4913 * \param shadow_compare do shadow/depth comparison? 4914 * \param swz returns the swizzle info 4915 */ 4916static void 4917begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, 4918 unsigned unit, 4919 const struct tgsi_full_instruction *inst, 4920 boolean shadow_compare, 4921 struct tex_swizzle_info *swz) 4922{ 4923 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || 4924 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || 4925 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || 4926 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); 4927 4928 swz->shadow_compare = shadow_compare; 4929 swz->texture_target = inst->Texture.Texture; 4930 4931 if (swz->swizzled || shadow_compare) { 4932 /* Allocate temp register for the result of the SAMPLE instruction 4933 * and the source of the MOV/compare/swizzle instructions. 4934 */ 4935 unsigned tmp = get_temp_index(emit); 4936 swz->tmp_src = make_src_temp_reg(tmp); 4937 swz->tmp_dst = make_dst_temp_reg(tmp); 4938 4939 swz->unit = unit; 4940 } 4941 swz->inst_dst = &inst->Dst[0]; 4942 swz->coord_src = &inst->Src[0]; 4943} 4944 4945 4946/** 4947 * Returns the register to put the SAMPLE instruction results into. 4948 * This will either be the original instruction dst reg (if no swizzle 4949 * and no shadow comparison) or a temporary reg if there is a swizzle. 4950 */ 4951static const struct tgsi_full_dst_register * 4952get_tex_swizzle_dst(const struct tex_swizzle_info *swz) 4953{ 4954 return (swz->swizzled || swz->shadow_compare) 4955 ? &swz->tmp_dst : swz->inst_dst; 4956} 4957 4958 4959/** 4960 * This emits the MOV instruction that actually implements a texture swizzle 4961 * and/or shadow comparison. 4962 */ 4963static void 4964end_tex_swizzle(struct svga_shader_emitter_v10 *emit, 4965 const struct tex_swizzle_info *swz) 4966{ 4967 if (swz->shadow_compare) { 4968 /* Emit extra instructions to compare the fetched texel value against 4969 * a texture coordinate component. The result of the comparison 4970 * is 0.0 or 1.0. 4971 */ 4972 struct tgsi_full_src_register coord_src; 4973 struct tgsi_full_src_register texel_src = 4974 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); 4975 struct tgsi_full_src_register one = 4976 make_immediate_reg_float(emit, 1.0f); 4977 /* convert gallium comparison func to SVGA comparison func */ 4978 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; 4979 4980 assert(emit->unit == PIPE_SHADER_FRAGMENT); 4981 4982 switch (swz->texture_target) { 4983 case TGSI_TEXTURE_SHADOW2D: 4984 case TGSI_TEXTURE_SHADOWRECT: 4985 case TGSI_TEXTURE_SHADOW1D_ARRAY: 4986 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); 4987 break; 4988 case TGSI_TEXTURE_SHADOW1D: 4989 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y); 4990 break; 4991 case TGSI_TEXTURE_SHADOWCUBE: 4992 case TGSI_TEXTURE_SHADOW2D_ARRAY: 4993 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W); 4994 break; 4995 default: 4996 assert(!"Unexpected texture target in end_tex_swizzle()"); 4997 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); 4998 } 4999 5000 /* COMPARE tmp, coord, texel */ 5001 /* XXX it would seem that the texel and coord arguments should 5002 * be transposed here, but piglit tests indicate otherwise. 5003 */ 5004 emit_comparison(emit, compare_func, 5005 &swz->tmp_dst, &texel_src, &coord_src); 5006 5007 /* AND dest, tmp, {1.0} */ 5008 begin_emit_instruction(emit); 5009 emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); 5010 if (swz->swizzled) { 5011 emit_dst_register(emit, &swz->tmp_dst); 5012 } 5013 else { 5014 emit_dst_register(emit, swz->inst_dst); 5015 } 5016 emit_src_register(emit, &swz->tmp_src); 5017 emit_src_register(emit, &one); 5018 end_emit_instruction(emit); 5019 } 5020 5021 if (swz->swizzled) { 5022 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; 5023 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; 5024 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; 5025 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; 5026 unsigned writemask_0 = 0, writemask_1 = 0; 5027 boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type); 5028 5029 /* Swizzle w/out zero/one terms */ 5030 struct tgsi_full_src_register src_swizzled = 5031 swizzle_src(&swz->tmp_src, 5032 swz_r < PIPE_SWIZZLE_ZERO ? swz_r : PIPE_SWIZZLE_RED, 5033 swz_g < PIPE_SWIZZLE_ZERO ? swz_g : PIPE_SWIZZLE_GREEN, 5034 swz_b < PIPE_SWIZZLE_ZERO ? swz_b : PIPE_SWIZZLE_BLUE, 5035 swz_a < PIPE_SWIZZLE_ZERO ? swz_a : PIPE_SWIZZLE_ALPHA); 5036 5037 /* MOV dst, color(tmp).<swizzle> */ 5038 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 5039 swz->inst_dst, &src_swizzled, FALSE); 5040 5041 /* handle swizzle zero terms */ 5042 writemask_0 = (((swz_r == PIPE_SWIZZLE_ZERO) << 0) | 5043 ((swz_g == PIPE_SWIZZLE_ZERO) << 1) | 5044 ((swz_b == PIPE_SWIZZLE_ZERO) << 2) | 5045 ((swz_a == PIPE_SWIZZLE_ZERO) << 3)); 5046 5047 if (writemask_0) { 5048 struct tgsi_full_src_register zero = int_tex ? 5049 make_immediate_reg_int(emit, 0) : 5050 make_immediate_reg_float(emit, 0.0f); 5051 struct tgsi_full_dst_register dst = 5052 writemask_dst(swz->inst_dst, writemask_0); 5053 5054 /* MOV dst.writemask_0, {0,0,0,0} */ 5055 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 5056 &dst, &zero, FALSE); 5057 } 5058 5059 /* handle swizzle one terms */ 5060 writemask_1 = (((swz_r == PIPE_SWIZZLE_ONE) << 0) | 5061 ((swz_g == PIPE_SWIZZLE_ONE) << 1) | 5062 ((swz_b == PIPE_SWIZZLE_ONE) << 2) | 5063 ((swz_a == PIPE_SWIZZLE_ONE) << 3)); 5064 5065 if (writemask_1) { 5066 struct tgsi_full_src_register one = int_tex ? 5067 make_immediate_reg_int(emit, 1) : 5068 make_immediate_reg_float(emit, 1.0f); 5069 struct tgsi_full_dst_register dst = 5070 writemask_dst(swz->inst_dst, writemask_1); 5071 5072 /* MOV dst.writemask_1, {1,1,1,1} */ 5073 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE); 5074 } 5075 } 5076} 5077 5078 5079/** 5080 * Emit code for TGSI_OPCODE_SAMPLE instruction. 5081 */ 5082static boolean 5083emit_sample(struct svga_shader_emitter_v10 *emit, 5084 const struct tgsi_full_instruction *inst) 5085{ 5086 const unsigned resource_unit = inst->Src[1].Register.Index; 5087 const unsigned sampler_unit = inst->Src[2].Register.Index; 5088 struct tgsi_full_src_register coord; 5089 int offsets[3]; 5090 struct tex_swizzle_info swz_info; 5091 5092 begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); 5093 5094 get_texel_offsets(emit, inst, offsets); 5095 5096 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); 5097 5098 /* SAMPLE dst, coord(s0), resource, sampler */ 5099 begin_emit_instruction(emit); 5100 5101 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, 5102 inst->Instruction.Saturate, offsets); 5103 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5104 emit_src_register(emit, &coord); 5105 emit_resource_register(emit, resource_unit); 5106 emit_sampler_register(emit, sampler_unit); 5107 end_emit_instruction(emit); 5108 5109 end_tex_swizzle(emit, &swz_info); 5110 5111 free_temp_indexes(emit); 5112 5113 return TRUE; 5114} 5115 5116 5117/** 5118 * Check if a texture instruction is valid. 5119 * An example of an invalid texture instruction is doing shadow comparison 5120 * with an integer-valued texture. 5121 * If we detect an invalid texture instruction, we replace it with: 5122 * MOV dst, {1,1,1,1}; 5123 * \return TRUE if valid, FALSE if invalid. 5124 */ 5125static boolean 5126is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, 5127 const struct tgsi_full_instruction *inst) 5128{ 5129 const unsigned unit = inst->Src[1].Register.Index; 5130 const unsigned target = inst->Texture.Texture; 5131 boolean valid = TRUE; 5132 5133 if (tgsi_is_shadow_target(target) && 5134 is_integer_type(emit->key.tex[unit].return_type)) { 5135 debug_printf("Invalid SAMPLE_C with an integer texture!\n"); 5136 valid = FALSE; 5137 } 5138 /* XXX might check for other conditions in the future here */ 5139 5140 if (!valid) { 5141 /* emit a MOV dst, {1,1,1,1} instruction. */ 5142 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 5143 begin_emit_instruction(emit); 5144 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5145 emit_dst_register(emit, &inst->Dst[0]); 5146 emit_src_register(emit, &one); 5147 end_emit_instruction(emit); 5148 } 5149 5150 return valid; 5151} 5152 5153 5154/** 5155 * Emit code for TGSI_OPCODE_TEX (simple texture lookup) 5156 */ 5157static boolean 5158emit_tex(struct svga_shader_emitter_v10 *emit, 5159 const struct tgsi_full_instruction *inst) 5160{ 5161 const uint unit = inst->Src[1].Register.Index; 5162 unsigned target = inst->Texture.Texture; 5163 unsigned opcode; 5164 struct tgsi_full_src_register coord; 5165 int offsets[3]; 5166 struct tex_swizzle_info swz_info; 5167 5168 /* check that the sampler returns a float */ 5169 if (!is_valid_tex_instruction(emit, inst)) 5170 return TRUE; 5171 5172 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5173 5174 get_texel_offsets(emit, inst, offsets); 5175 5176 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5177 5178 /* SAMPLE dst, coord(s0), resource, sampler */ 5179 begin_emit_instruction(emit); 5180 5181 if (tgsi_is_shadow_target(target)) 5182 opcode = VGPU10_OPCODE_SAMPLE_C; 5183 else 5184 opcode = VGPU10_OPCODE_SAMPLE; 5185 5186 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5187 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5188 emit_src_register(emit, &coord); 5189 emit_resource_register(emit, unit); 5190 emit_sampler_register(emit, unit); 5191 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5192 emit_tex_compare_refcoord(emit, target, &coord); 5193 } 5194 end_emit_instruction(emit); 5195 5196 end_tex_swizzle(emit, &swz_info); 5197 5198 free_temp_indexes(emit); 5199 5200 return TRUE; 5201} 5202 5203 5204/** 5205 * Emit code for TGSI_OPCODE_TXP (projective texture) 5206 */ 5207static boolean 5208emit_txp(struct svga_shader_emitter_v10 *emit, 5209 const struct tgsi_full_instruction *inst) 5210{ 5211 const uint unit = inst->Src[1].Register.Index; 5212 unsigned target = inst->Texture.Texture; 5213 unsigned opcode; 5214 int offsets[3]; 5215 unsigned tmp = get_temp_index(emit); 5216 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 5217 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 5218 struct tgsi_full_src_register src0_wwww = 5219 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5220 struct tgsi_full_src_register coord; 5221 struct tex_swizzle_info swz_info; 5222 5223 /* check that the sampler returns a float */ 5224 if (!is_valid_tex_instruction(emit, inst)) 5225 return TRUE; 5226 5227 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5228 5229 get_texel_offsets(emit, inst, offsets); 5230 5231 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5232 5233 /* DIV tmp, coord, coord.wwww */ 5234 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, 5235 &coord, &src0_wwww, FALSE); 5236 5237 /* SAMPLE dst, coord(tmp), resource, sampler */ 5238 begin_emit_instruction(emit); 5239 5240 if (tgsi_is_shadow_target(target)) 5241 opcode = VGPU10_OPCODE_SAMPLE_C; 5242 else 5243 opcode = VGPU10_OPCODE_SAMPLE; 5244 5245 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5246 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5247 emit_src_register(emit, &tmp_src); /* projected coord */ 5248 emit_resource_register(emit, unit); 5249 emit_sampler_register(emit, unit); 5250 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5251 emit_tex_compare_refcoord(emit, target, &tmp_src); 5252 } 5253 end_emit_instruction(emit); 5254 5255 end_tex_swizzle(emit, &swz_info); 5256 5257 free_temp_indexes(emit); 5258 5259 return TRUE; 5260} 5261 5262 5263/* 5264 * Emit code for TGSI_OPCODE_XPD instruction. 5265 */ 5266static boolean 5267emit_xpd(struct svga_shader_emitter_v10 *emit, 5268 const struct tgsi_full_instruction *inst) 5269{ 5270 /* dst.x = src0.y * src1.z - src1.y * src0.z 5271 * dst.y = src0.z * src1.x - src1.z * src0.x 5272 * dst.z = src0.x * src1.y - src1.x * src0.y 5273 * dst.w = 1 5274 */ 5275 struct tgsi_full_src_register s0_xxxx = 5276 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 5277 struct tgsi_full_src_register s0_yyyy = 5278 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 5279 struct tgsi_full_src_register s0_zzzz = 5280 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 5281 5282 struct tgsi_full_src_register s1_xxxx = 5283 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 5284 struct tgsi_full_src_register s1_yyyy = 5285 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 5286 struct tgsi_full_src_register s1_zzzz = 5287 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z); 5288 5289 unsigned tmp1 = get_temp_index(emit); 5290 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 5291 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 5292 5293 unsigned tmp2 = get_temp_index(emit); 5294 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 5295 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 5296 struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src); 5297 5298 unsigned tmp3 = get_temp_index(emit); 5299 struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3); 5300 struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3); 5301 struct tgsi_full_dst_register tmp3_dst_x = 5302 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X); 5303 struct tgsi_full_dst_register tmp3_dst_y = 5304 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y); 5305 struct tgsi_full_dst_register tmp3_dst_z = 5306 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z); 5307 struct tgsi_full_dst_register tmp3_dst_w = 5308 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W); 5309 5310 /* Note: we put all the intermediate computations into tmp3 in case 5311 * the XPD dest register is that same as one of the src regs (in which 5312 * case we could clobber a src reg before we're done with it) . 5313 * 5314 * Note: we could get by with just one temp register instead of three 5315 * since we're doing scalar operations and there's enough room in one 5316 * temp for everything. 5317 */ 5318 5319 /* MUL tmp1, src0.y, src1.z */ 5320 /* MUL tmp2, src1.y, src0.z */ 5321 /* ADD tmp3.x, tmp1, -tmp2 */ 5322 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 5323 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, 5324 &s0_yyyy, &s1_zzzz, FALSE); 5325 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, 5326 &s1_yyyy, &s0_zzzz, FALSE); 5327 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x, 5328 &tmp1_src, &neg_tmp2_src, FALSE); 5329 } 5330 5331 /* MUL tmp1, src0.z, src1.x */ 5332 /* MUL tmp2, src1.z, src0.x */ 5333 /* ADD tmp3.y, tmp1, -tmp2 */ 5334 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 5335 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz, 5336 &s1_xxxx, FALSE); 5337 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz, 5338 &s0_xxxx, FALSE); 5339 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y, 5340 &tmp1_src, &neg_tmp2_src, FALSE); 5341 } 5342 5343 /* MUL tmp1, src0.x, src1.y */ 5344 /* MUL tmp2, src1.x, src0.y */ 5345 /* ADD tmp3.z, tmp1, -tmp2 */ 5346 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 5347 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx, 5348 &s1_yyyy, FALSE); 5349 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx, 5350 &s0_yyyy, FALSE); 5351 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z, 5352 &tmp1_src, &neg_tmp2_src, FALSE); 5353 } 5354 5355 /* MOV tmp3.w, 1.0 */ 5356 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 5357 struct tgsi_full_src_register one = 5358 make_immediate_reg_float(emit, 1.0f); 5359 5360 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE); 5361 } 5362 5363 /* MOV dst, tmp3 */ 5364 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src, 5365 inst->Instruction.Saturate); 5366 5367 5368 free_temp_indexes(emit); 5369 5370 return TRUE; 5371} 5372 5373 5374/** 5375 * Emit code for TGSI_OPCODE_TXD (explicit derivatives) 5376 */ 5377static boolean 5378emit_txd(struct svga_shader_emitter_v10 *emit, 5379 const struct tgsi_full_instruction *inst) 5380{ 5381 const uint unit = inst->Src[3].Register.Index; 5382 unsigned target = inst->Texture.Texture; 5383 int offsets[3]; 5384 struct tgsi_full_src_register coord; 5385 struct tex_swizzle_info swz_info; 5386 5387 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5388 &swz_info); 5389 5390 get_texel_offsets(emit, inst, offsets); 5391 5392 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5393 5394 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ 5395 begin_emit_instruction(emit); 5396 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, 5397 inst->Instruction.Saturate, offsets); 5398 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5399 emit_src_register(emit, &coord); 5400 emit_resource_register(emit, unit); 5401 emit_sampler_register(emit, unit); 5402 emit_src_register(emit, &inst->Src[1]); /* Xderiv */ 5403 emit_src_register(emit, &inst->Src[2]); /* Yderiv */ 5404 end_emit_instruction(emit); 5405 5406 end_tex_swizzle(emit, &swz_info); 5407 5408 free_temp_indexes(emit); 5409 5410 return TRUE; 5411} 5412 5413 5414/** 5415 * Emit code for TGSI_OPCODE_TXF (texel fetch) 5416 */ 5417static boolean 5418emit_txf(struct svga_shader_emitter_v10 *emit, 5419 const struct tgsi_full_instruction *inst) 5420{ 5421 const uint unit = inst->Src[1].Register.Index; 5422 const unsigned msaa = emit->key.tex[unit].texture_msaa; 5423 int offsets[3]; 5424 struct tex_swizzle_info swz_info; 5425 5426 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5427 5428 get_texel_offsets(emit, inst, offsets); 5429 5430 if (msaa) { 5431 /* Fetch one sample from an MSAA texture */ 5432 struct tgsi_full_src_register sampleIndex = 5433 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5434 /* LD_MS dst, coord(s0), resource, sampleIndex */ 5435 begin_emit_instruction(emit); 5436 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, 5437 inst->Instruction.Saturate, offsets); 5438 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5439 emit_src_register(emit, &inst->Src[0]); 5440 emit_resource_register(emit, unit); 5441 emit_src_register(emit, &sampleIndex); 5442 end_emit_instruction(emit); 5443 } 5444 else { 5445 /* Fetch one texel specified by integer coordinate */ 5446 /* LD dst, coord(s0), resource */ 5447 begin_emit_instruction(emit); 5448 emit_sample_opcode(emit, VGPU10_OPCODE_LD, 5449 inst->Instruction.Saturate, offsets); 5450 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5451 emit_src_register(emit, &inst->Src[0]); 5452 emit_resource_register(emit, unit); 5453 end_emit_instruction(emit); 5454 } 5455 5456 end_tex_swizzle(emit, &swz_info); 5457 5458 free_temp_indexes(emit); 5459 5460 return TRUE; 5461} 5462 5463 5464/** 5465 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) 5466 * or TGSI_OPCODE_TXB2 (for cube shadow maps). 5467 */ 5468static boolean 5469emit_txl_txb(struct svga_shader_emitter_v10 *emit, 5470 const struct tgsi_full_instruction *inst) 5471{ 5472 unsigned target = inst->Texture.Texture; 5473 unsigned opcode, unit; 5474 int offsets[3]; 5475 struct tgsi_full_src_register coord, lod_bias; 5476 struct tex_swizzle_info swz_info; 5477 5478 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || 5479 inst->Instruction.Opcode == TGSI_OPCODE_TXB || 5480 inst->Instruction.Opcode == TGSI_OPCODE_TXB2); 5481 5482 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { 5483 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 5484 unit = inst->Src[2].Register.Index; 5485 } 5486 else { 5487 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5488 unit = inst->Src[1].Register.Index; 5489 } 5490 5491 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5492 &swz_info); 5493 5494 get_texel_offsets(emit, inst, offsets); 5495 5496 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5497 5498 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ 5499 begin_emit_instruction(emit); 5500 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { 5501 opcode = VGPU10_OPCODE_SAMPLE_L; 5502 } 5503 else { 5504 opcode = VGPU10_OPCODE_SAMPLE_B; 5505 } 5506 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5507 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5508 emit_src_register(emit, &coord); 5509 emit_resource_register(emit, unit); 5510 emit_sampler_register(emit, unit); 5511 emit_src_register(emit, &lod_bias); 5512 end_emit_instruction(emit); 5513 5514 end_tex_swizzle(emit, &swz_info); 5515 5516 free_temp_indexes(emit); 5517 5518 return TRUE; 5519} 5520 5521 5522/** 5523 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. 5524 */ 5525static boolean 5526emit_txq(struct svga_shader_emitter_v10 *emit, 5527 const struct tgsi_full_instruction *inst) 5528{ 5529 const uint unit = inst->Src[1].Register.Index; 5530 5531 if (emit->key.tex[unit].texture_target == PIPE_BUFFER) { 5532 /* RESINFO does not support querying texture buffers, so we instead 5533 * store texture buffer sizes in shader constants, then copy them to 5534 * implement TXQ instead of emitting RESINFO. 5535 * MOV dst, const[texture_buffer_size_index[unit]] 5536 */ 5537 struct tgsi_full_src_register size_src = 5538 make_src_const_reg(emit->texture_buffer_size_index[unit]); 5539 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src, 5540 FALSE); 5541 } else { 5542 /* RESINFO dst, srcMipLevel, resource */ 5543 begin_emit_instruction(emit); 5544 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); 5545 emit_dst_register(emit, &inst->Dst[0]); 5546 emit_src_register(emit, &inst->Src[0]); 5547 emit_resource_register(emit, unit); 5548 end_emit_instruction(emit); 5549 } 5550 5551 free_temp_indexes(emit); 5552 5553 return TRUE; 5554} 5555 5556 5557/** 5558 * Emit a simple instruction (like ADD, MUL, MIN, etc). 5559 */ 5560static boolean 5561emit_simple(struct svga_shader_emitter_v10 *emit, 5562 const struct tgsi_full_instruction *inst) 5563{ 5564 const unsigned opcode = inst->Instruction.Opcode; 5565 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5566 unsigned i; 5567 5568 begin_emit_instruction(emit); 5569 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5570 inst->Instruction.Saturate); 5571 for (i = 0; i < op->num_dst; i++) { 5572 emit_dst_register(emit, &inst->Dst[i]); 5573 } 5574 for (i = 0; i < op->num_src; i++) { 5575 emit_src_register(emit, &inst->Src[i]); 5576 } 5577 end_emit_instruction(emit); 5578 5579 return TRUE; 5580} 5581 5582 5583/** 5584 * We only special case the MOV instruction to try to detect constant 5585 * color writes in the fragment shader. 5586 */ 5587static boolean 5588emit_mov(struct svga_shader_emitter_v10 *emit, 5589 const struct tgsi_full_instruction *inst) 5590{ 5591 const struct tgsi_full_src_register *src = &inst->Src[0]; 5592 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 5593 5594 if (emit->unit == PIPE_SHADER_FRAGMENT && 5595 dst->Register.File == TGSI_FILE_OUTPUT && 5596 dst->Register.Index == 0 && 5597 src->Register.File == TGSI_FILE_CONSTANT && 5598 !src->Register.Indirect) { 5599 emit->constant_color_output = TRUE; 5600 } 5601 5602 return emit_simple(emit, inst); 5603} 5604 5605 5606/** 5607 * Emit a simple VGPU10 instruction which writes to multiple dest registers, 5608 * where TGSI only uses one dest register. 5609 */ 5610static boolean 5611emit_simple_1dst(struct svga_shader_emitter_v10 *emit, 5612 const struct tgsi_full_instruction *inst, 5613 unsigned dst_count, 5614 unsigned dst_index) 5615{ 5616 const unsigned opcode = inst->Instruction.Opcode; 5617 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5618 unsigned i; 5619 5620 begin_emit_instruction(emit); 5621 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5622 inst->Instruction.Saturate); 5623 5624 for (i = 0; i < dst_count; i++) { 5625 if (i == dst_index) { 5626 emit_dst_register(emit, &inst->Dst[0]); 5627 } else { 5628 emit_null_dst_register(emit); 5629 } 5630 } 5631 5632 for (i = 0; i < op->num_src; i++) { 5633 emit_src_register(emit, &inst->Src[i]); 5634 } 5635 end_emit_instruction(emit); 5636 5637 return TRUE; 5638} 5639 5640 5641/** 5642 * Translate a single TGSI instruction to VGPU10. 5643 */ 5644static boolean 5645emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, 5646 unsigned inst_number, 5647 const struct tgsi_full_instruction *inst) 5648{ 5649 const unsigned opcode = inst->Instruction.Opcode; 5650 5651 switch (opcode) { 5652 case TGSI_OPCODE_ADD: 5653 case TGSI_OPCODE_AND: 5654 case TGSI_OPCODE_BGNLOOP: 5655 case TGSI_OPCODE_BRK: 5656 case TGSI_OPCODE_CEIL: 5657 case TGSI_OPCODE_CONT: 5658 case TGSI_OPCODE_DDX: 5659 case TGSI_OPCODE_DDY: 5660 case TGSI_OPCODE_DIV: 5661 case TGSI_OPCODE_DP2: 5662 case TGSI_OPCODE_DP3: 5663 case TGSI_OPCODE_DP4: 5664 case TGSI_OPCODE_ELSE: 5665 case TGSI_OPCODE_ENDIF: 5666 case TGSI_OPCODE_ENDLOOP: 5667 case TGSI_OPCODE_ENDSUB: 5668 case TGSI_OPCODE_F2I: 5669 case TGSI_OPCODE_F2U: 5670 case TGSI_OPCODE_FLR: 5671 case TGSI_OPCODE_FRC: 5672 case TGSI_OPCODE_FSEQ: 5673 case TGSI_OPCODE_FSGE: 5674 case TGSI_OPCODE_FSLT: 5675 case TGSI_OPCODE_FSNE: 5676 case TGSI_OPCODE_I2F: 5677 case TGSI_OPCODE_IMAX: 5678 case TGSI_OPCODE_IMIN: 5679 case TGSI_OPCODE_INEG: 5680 case TGSI_OPCODE_ISGE: 5681 case TGSI_OPCODE_ISHR: 5682 case TGSI_OPCODE_ISLT: 5683 case TGSI_OPCODE_MAD: 5684 case TGSI_OPCODE_MAX: 5685 case TGSI_OPCODE_MIN: 5686 case TGSI_OPCODE_MUL: 5687 case TGSI_OPCODE_NOP: 5688 case TGSI_OPCODE_NOT: 5689 case TGSI_OPCODE_OR: 5690 case TGSI_OPCODE_RET: 5691 case TGSI_OPCODE_UADD: 5692 case TGSI_OPCODE_USEQ: 5693 case TGSI_OPCODE_USGE: 5694 case TGSI_OPCODE_USLT: 5695 case TGSI_OPCODE_UMIN: 5696 case TGSI_OPCODE_UMAD: 5697 case TGSI_OPCODE_UMAX: 5698 case TGSI_OPCODE_ROUND: 5699 case TGSI_OPCODE_SQRT: 5700 case TGSI_OPCODE_SHL: 5701 case TGSI_OPCODE_TRUNC: 5702 case TGSI_OPCODE_U2F: 5703 case TGSI_OPCODE_UCMP: 5704 case TGSI_OPCODE_USHR: 5705 case TGSI_OPCODE_USNE: 5706 case TGSI_OPCODE_XOR: 5707 /* simple instructions */ 5708 return emit_simple(emit, inst); 5709 5710 case TGSI_OPCODE_MOV: 5711 return emit_mov(emit, inst); 5712 case TGSI_OPCODE_EMIT: 5713 return emit_vertex(emit, inst); 5714 case TGSI_OPCODE_ENDPRIM: 5715 return emit_endprim(emit, inst); 5716 case TGSI_OPCODE_ABS: 5717 return emit_abs(emit, inst); 5718 case TGSI_OPCODE_IABS: 5719 return emit_iabs(emit, inst); 5720 case TGSI_OPCODE_ARL: 5721 /* fall-through */ 5722 case TGSI_OPCODE_UARL: 5723 return emit_arl_uarl(emit, inst); 5724 case TGSI_OPCODE_BGNSUB: 5725 /* no-op */ 5726 return TRUE; 5727 case TGSI_OPCODE_CAL: 5728 return emit_cal(emit, inst); 5729 case TGSI_OPCODE_CMP: 5730 return emit_cmp(emit, inst); 5731 case TGSI_OPCODE_COS: 5732 return emit_sincos(emit, inst); 5733 case TGSI_OPCODE_DP2A: 5734 return emit_dp2a(emit, inst); 5735 case TGSI_OPCODE_DPH: 5736 return emit_dph(emit, inst); 5737 case TGSI_OPCODE_DST: 5738 return emit_dst(emit, inst); 5739 case TGSI_OPCODE_EX2: 5740 return emit_ex2(emit, inst); 5741 case TGSI_OPCODE_EXP: 5742 return emit_exp(emit, inst); 5743 case TGSI_OPCODE_IF: 5744 return emit_if(emit, inst); 5745 case TGSI_OPCODE_KILL: 5746 return emit_kill(emit, inst); 5747 case TGSI_OPCODE_KILL_IF: 5748 return emit_kill_if(emit, inst); 5749 case TGSI_OPCODE_LG2: 5750 return emit_lg2(emit, inst); 5751 case TGSI_OPCODE_LIT: 5752 return emit_lit(emit, inst); 5753 case TGSI_OPCODE_LOG: 5754 return emit_log(emit, inst); 5755 case TGSI_OPCODE_LRP: 5756 return emit_lrp(emit, inst); 5757 case TGSI_OPCODE_POW: 5758 return emit_pow(emit, inst); 5759 case TGSI_OPCODE_RCP: 5760 return emit_rcp(emit, inst); 5761 case TGSI_OPCODE_RSQ: 5762 return emit_rsq(emit, inst); 5763 case TGSI_OPCODE_SAMPLE: 5764 return emit_sample(emit, inst); 5765 case TGSI_OPCODE_SCS: 5766 return emit_scs(emit, inst); 5767 case TGSI_OPCODE_SEQ: 5768 return emit_seq(emit, inst); 5769 case TGSI_OPCODE_SGE: 5770 return emit_sge(emit, inst); 5771 case TGSI_OPCODE_SGT: 5772 return emit_sgt(emit, inst); 5773 case TGSI_OPCODE_SIN: 5774 return emit_sincos(emit, inst); 5775 case TGSI_OPCODE_SLE: 5776 return emit_sle(emit, inst); 5777 case TGSI_OPCODE_SLT: 5778 return emit_slt(emit, inst); 5779 case TGSI_OPCODE_SNE: 5780 return emit_sne(emit, inst); 5781 case TGSI_OPCODE_SSG: 5782 return emit_ssg(emit, inst); 5783 case TGSI_OPCODE_ISSG: 5784 return emit_issg(emit, inst); 5785 case TGSI_OPCODE_SUB: 5786 return emit_sub(emit, inst); 5787 case TGSI_OPCODE_TEX: 5788 return emit_tex(emit, inst); 5789 case TGSI_OPCODE_TXP: 5790 return emit_txp(emit, inst); 5791 case TGSI_OPCODE_TXB: 5792 case TGSI_OPCODE_TXB2: 5793 case TGSI_OPCODE_TXL: 5794 return emit_txl_txb(emit, inst); 5795 case TGSI_OPCODE_TXD: 5796 return emit_txd(emit, inst); 5797 case TGSI_OPCODE_TXF: 5798 return emit_txf(emit, inst); 5799 case TGSI_OPCODE_TXQ: 5800 return emit_txq(emit, inst); 5801 case TGSI_OPCODE_UIF: 5802 return emit_if(emit, inst); 5803 case TGSI_OPCODE_XPD: 5804 return emit_xpd(emit, inst); 5805 case TGSI_OPCODE_UMUL_HI: 5806 case TGSI_OPCODE_IMUL_HI: 5807 case TGSI_OPCODE_UDIV: 5808 case TGSI_OPCODE_IDIV: 5809 /* These cases use only the FIRST of two destination registers */ 5810 return emit_simple_1dst(emit, inst, 2, 0); 5811 case TGSI_OPCODE_UMUL: 5812 case TGSI_OPCODE_UMOD: 5813 case TGSI_OPCODE_MOD: 5814 /* These cases use only the SECOND of two destination registers */ 5815 return emit_simple_1dst(emit, inst, 2, 1); 5816 case TGSI_OPCODE_END: 5817 if (!emit_post_helpers(emit)) 5818 return FALSE; 5819 return emit_simple(emit, inst); 5820 5821 default: 5822 debug_printf("Unimplemented tgsi instruction %s\n", 5823 tgsi_get_opcode_name(opcode)); 5824 return FALSE; 5825 } 5826 5827 return TRUE; 5828} 5829 5830 5831/** 5832 * Emit the extra instructions to adjust the vertex position. 5833 * There are two possible adjustments: 5834 * 1. Converting from Gallium to VGPU10 coordinate space by applying the 5835 * "prescale" and "pretranslate" values. 5836 * 2. Undoing the viewport transformation when we use the swtnl/draw path. 5837 * \param vs_pos_tmp_index which temporary register contains the vertex pos. 5838 */ 5839static void 5840emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, 5841 unsigned vs_pos_tmp_index) 5842{ 5843 struct tgsi_full_src_register tmp_pos_src; 5844 struct tgsi_full_dst_register pos_dst; 5845 5846 /* Don't bother to emit any extra vertex instructions if vertex position is 5847 * not written out 5848 */ 5849 if (emit->vposition.out_index == INVALID_INDEX) 5850 return; 5851 5852 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index); 5853 pos_dst = make_dst_output_reg(emit->vposition.out_index); 5854 5855 /* If non-adjusted vertex position register index 5856 * is valid, copy the vertex position from the temporary 5857 * vertex position register before it is modified by the 5858 * prescale computation. 5859 */ 5860 if (emit->vposition.so_index != INVALID_INDEX) { 5861 struct tgsi_full_dst_register pos_so_dst = 5862 make_dst_output_reg(emit->vposition.so_index); 5863 5864 /* MOV pos_so, tmp_pos */ 5865 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, 5866 &tmp_pos_src, FALSE); 5867 } 5868 5869 if (emit->vposition.need_prescale) { 5870 /* This code adjusts the vertex position to match the VGPU10 convention. 5871 * If p is the position computed by the shader (usually by applying the 5872 * modelview and projection matrices), the new position q is computed by: 5873 * 5874 * q.x = p.w * trans.x + p.x * scale.x 5875 * q.y = p.w * trans.y + p.y * scale.y 5876 * q.z = p.w * trans.z + p.z * scale.z; 5877 * q.w = p.w * trans.w + p.w; 5878 */ 5879 struct tgsi_full_src_register tmp_pos_src_w = 5880 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5881 struct tgsi_full_dst_register tmp_pos_dst = 5882 make_dst_temp_reg(vs_pos_tmp_index); 5883 struct tgsi_full_dst_register tmp_pos_dst_xyz = 5884 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ); 5885 5886 struct tgsi_full_src_register prescale_scale = 5887 make_src_const_reg(emit->vposition.prescale_scale_index); 5888 struct tgsi_full_src_register prescale_trans = 5889 make_src_const_reg(emit->vposition.prescale_trans_index); 5890 5891 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */ 5892 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz, 5893 &tmp_pos_src, &prescale_scale, FALSE); 5894 5895 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */ 5896 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w, 5897 &prescale_trans, &tmp_pos_src, FALSE); 5898 } 5899 else if (emit->key.vs.undo_viewport) { 5900 /* This code computes the final vertex position from the temporary 5901 * vertex position by undoing the viewport transformation and the 5902 * divide-by-W operation (we convert window coords back to clip coords). 5903 * This is needed when we use the 'draw' module for fallbacks. 5904 * If p is the temp pos in window coords, then the NDC coord q is: 5905 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w 5906 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w 5907 * q.z = p.z * p.w 5908 * q.w = p.w 5909 * CONST[vs_viewport_index] contains: 5910 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans } 5911 */ 5912 struct tgsi_full_dst_register tmp_pos_dst = 5913 make_dst_temp_reg(vs_pos_tmp_index); 5914 struct tgsi_full_dst_register tmp_pos_dst_xy = 5915 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY); 5916 struct tgsi_full_src_register tmp_pos_src_wwww = 5917 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5918 5919 struct tgsi_full_dst_register pos_dst_xyz = 5920 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ); 5921 struct tgsi_full_dst_register pos_dst_w = 5922 writemask_dst(&pos_dst, TGSI_WRITEMASK_W); 5923 5924 struct tgsi_full_src_register vp_xyzw = 5925 make_src_const_reg(emit->vs.viewport_index); 5926 struct tgsi_full_src_register vp_zwww = 5927 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, 5928 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 5929 5930 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */ 5931 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy, 5932 &tmp_pos_src, &vp_zwww, FALSE); 5933 5934 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */ 5935 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy, 5936 &tmp_pos_src, &vp_xyzw, FALSE); 5937 5938 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */ 5939 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz, 5940 &tmp_pos_src, &tmp_pos_src_wwww, FALSE); 5941 5942 /* MOV pos.w, tmp_pos.w */ 5943 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, 5944 &tmp_pos_src, FALSE); 5945 } 5946 else if (vs_pos_tmp_index != INVALID_INDEX) { 5947 /* This code is to handle the case where the temporary vertex 5948 * position register is created when the vertex shader has stream 5949 * output and prescale is disabled because rasterization is to be 5950 * discarded. 5951 */ 5952 struct tgsi_full_dst_register pos_dst = 5953 make_dst_output_reg(emit->vposition.out_index); 5954 5955 /* MOV pos, tmp_pos */ 5956 begin_emit_instruction(emit); 5957 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5958 emit_dst_register(emit, &pos_dst); 5959 emit_src_register(emit, &tmp_pos_src); 5960 end_emit_instruction(emit); 5961 } 5962} 5963 5964static void 5965emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) 5966{ 5967 if (emit->clip_mode == CLIP_DISTANCE) { 5968 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */ 5969 emit_clip_distance_instructions(emit); 5970 5971 } else if (emit->clip_mode == CLIP_VERTEX) { 5972 /* Convert TGSI CLIPVERTEX to CLIPDIST */ 5973 emit_clip_vertex_instructions(emit); 5974 } 5975 5976 /** 5977 * Emit vertex position and take care of legacy user planes only if 5978 * there is a valid vertex position register index. 5979 * This is to take care of the case 5980 * where the shader doesn't output vertex position. Then in 5981 * this case, don't bother to emit more vertex instructions. 5982 */ 5983 if (emit->vposition.out_index == INVALID_INDEX) 5984 return; 5985 5986 /** 5987 * Emit per-vertex clipping instructions for legacy user defined clip planes. 5988 * NOTE: we must emit the clip distance instructions before the 5989 * emit_vpos_instructions() call since the later function will change 5990 * the TEMP[vs_pos_tmp_index] value. 5991 */ 5992 if (emit->clip_mode == CLIP_LEGACY) { 5993 /* Emit CLIPDIST for legacy user defined clip planes */ 5994 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index); 5995 } 5996} 5997 5998 5999/** 6000 * Emit extra per-vertex instructions. This includes clip-coordinate 6001 * space conversion and computing clip distances. This is called for 6002 * each GS emit-vertex instruction and at the end of VS translation. 6003 */ 6004static void 6005emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) 6006{ 6007 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; 6008 6009 /* Emit clipping instructions based on clipping mode */ 6010 emit_clipping_instructions(emit); 6011 6012 /** 6013 * Reset the temporary vertex position register index 6014 * so that emit_dst_register() will use the real vertex position output 6015 */ 6016 emit->vposition.tmp_index = INVALID_INDEX; 6017 6018 /* Emit vertex position instructions */ 6019 emit_vpos_instructions(emit, vs_pos_tmp_index); 6020 6021 /* Restore original vposition.tmp_index value for the next GS vertex. 6022 * It doesn't matter for VS. 6023 */ 6024 emit->vposition.tmp_index = vs_pos_tmp_index; 6025} 6026 6027/** 6028 * Translate the TGSI_OPCODE_EMIT GS instruction. 6029 */ 6030static boolean 6031emit_vertex(struct svga_shader_emitter_v10 *emit, 6032 const struct tgsi_full_instruction *inst) 6033{ 6034 unsigned ret = TRUE; 6035 6036 assert(emit->unit == PIPE_SHADER_GEOMETRY); 6037 6038 emit_vertex_instructions(emit); 6039 6040 /* We can't use emit_simple() because the TGSI instruction has one 6041 * operand (vertex stream number) which we must ignore for VGPU10. 6042 */ 6043 begin_emit_instruction(emit); 6044 emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); 6045 end_emit_instruction(emit); 6046 6047 return ret; 6048} 6049 6050 6051/** 6052 * Emit the extra code to convert from VGPU10's boolean front-face 6053 * register to TGSI's signed front-face register. 6054 * 6055 * TODO: Make temporary front-face register a scalar. 6056 */ 6057static void 6058emit_frontface_instructions(struct svga_shader_emitter_v10 *emit) 6059{ 6060 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6061 6062 if (emit->fs.face_input_index != INVALID_INDEX) { 6063 /* convert vgpu10 boolean face register to gallium +/-1 value */ 6064 struct tgsi_full_dst_register tmp_dst = 6065 make_dst_temp_reg(emit->fs.face_tmp_index); 6066 struct tgsi_full_src_register one = 6067 make_immediate_reg_float(emit, 1.0f); 6068 struct tgsi_full_src_register neg_one = 6069 make_immediate_reg_float(emit, -1.0f); 6070 6071 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */ 6072 begin_emit_instruction(emit); 6073 emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE); 6074 emit_dst_register(emit, &tmp_dst); 6075 emit_face_register(emit); 6076 emit_src_register(emit, &one); 6077 emit_src_register(emit, &neg_one); 6078 end_emit_instruction(emit); 6079 } 6080} 6081 6082 6083/** 6084 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w. 6085 */ 6086static void 6087emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) 6088{ 6089 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6090 6091 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 6092 struct tgsi_full_dst_register tmp_dst = 6093 make_dst_temp_reg(emit->fs.fragcoord_tmp_index); 6094 struct tgsi_full_dst_register tmp_dst_xyz = 6095 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ); 6096 struct tgsi_full_dst_register tmp_dst_w = 6097 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 6098 struct tgsi_full_src_register one = 6099 make_immediate_reg_float(emit, 1.0f); 6100 struct tgsi_full_src_register fragcoord = 6101 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index); 6102 6103 /* save the input index */ 6104 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index; 6105 /* set to invalid to prevent substitution in emit_src_register() */ 6106 emit->fs.fragcoord_input_index = INVALID_INDEX; 6107 6108 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */ 6109 begin_emit_instruction(emit); 6110 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 6111 emit_dst_register(emit, &tmp_dst_xyz); 6112 emit_src_register(emit, &fragcoord); 6113 end_emit_instruction(emit); 6114 6115 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */ 6116 begin_emit_instruction(emit); 6117 emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE); 6118 emit_dst_register(emit, &tmp_dst_w); 6119 emit_src_register(emit, &one); 6120 emit_src_register(emit, &fragcoord); 6121 end_emit_instruction(emit); 6122 6123 /* restore saved value */ 6124 emit->fs.fragcoord_input_index = fragcoord_input_index; 6125 } 6126} 6127 6128 6129/** 6130 * Emit extra instructions to adjust VS inputs/attributes. This can 6131 * mean casting a vertex attribute from int to float or setting the 6132 * W component to 1, or both. 6133 */ 6134static void 6135emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) 6136{ 6137 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1; 6138 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof; 6139 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof; 6140 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra; 6141 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm; 6142 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled; 6143 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled; 6144 6145 unsigned adjust_mask = (save_w_1_mask | 6146 save_itof_mask | 6147 save_utof_mask | 6148 save_is_bgra_mask | 6149 save_puint_to_snorm_mask | 6150 save_puint_to_uscaled_mask | 6151 save_puint_to_sscaled_mask); 6152 6153 assert(emit->unit == PIPE_SHADER_VERTEX); 6154 6155 if (adjust_mask) { 6156 struct tgsi_full_src_register one = 6157 make_immediate_reg_float(emit, 1.0f); 6158 6159 struct tgsi_full_src_register one_int = 6160 make_immediate_reg_int(emit, 1); 6161 6162 /* We need to turn off these bitmasks while emitting the 6163 * instructions below, then restore them afterward. 6164 */ 6165 emit->key.vs.adjust_attrib_w_1 = 0; 6166 emit->key.vs.adjust_attrib_itof = 0; 6167 emit->key.vs.adjust_attrib_utof = 0; 6168 emit->key.vs.attrib_is_bgra = 0; 6169 emit->key.vs.attrib_puint_to_snorm = 0; 6170 emit->key.vs.attrib_puint_to_uscaled = 0; 6171 emit->key.vs.attrib_puint_to_sscaled = 0; 6172 6173 while (adjust_mask) { 6174 unsigned index = u_bit_scan(&adjust_mask); 6175 6176 /* skip the instruction if this vertex attribute is not being used */ 6177 if (emit->info.input_usage_mask[index] == 0) 6178 continue; 6179 6180 unsigned tmp = emit->vs.adjusted_input[index]; 6181 struct tgsi_full_src_register input_src = 6182 make_src_reg(TGSI_FILE_INPUT, index); 6183 6184 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6185 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6186 struct tgsi_full_dst_register tmp_dst_w = 6187 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 6188 6189 /* ITOF/UTOF/MOV tmp, input[index] */ 6190 if (save_itof_mask & (1 << index)) { 6191 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, 6192 &tmp_dst, &input_src, FALSE); 6193 } 6194 else if (save_utof_mask & (1 << index)) { 6195 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF, 6196 &tmp_dst, &input_src, FALSE); 6197 } 6198 else if (save_puint_to_snorm_mask & (1 << index)) { 6199 emit_puint_to_snorm(emit, &tmp_dst, &input_src); 6200 } 6201 else if (save_puint_to_uscaled_mask & (1 << index)) { 6202 emit_puint_to_uscaled(emit, &tmp_dst, &input_src); 6203 } 6204 else if (save_puint_to_sscaled_mask & (1 << index)) { 6205 emit_puint_to_sscaled(emit, &tmp_dst, &input_src); 6206 } 6207 else { 6208 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index)); 6209 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6210 &tmp_dst, &input_src, FALSE); 6211 } 6212 6213 if (save_is_bgra_mask & (1 << index)) { 6214 emit_swap_r_b(emit, &tmp_dst, &tmp_src); 6215 } 6216 6217 if (save_w_1_mask & (1 << index)) { 6218 /* MOV tmp.w, 1.0 */ 6219 if (emit->key.vs.attrib_is_pure_int & (1 << index)) { 6220 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6221 &tmp_dst_w, &one_int, FALSE); 6222 } 6223 else { 6224 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6225 &tmp_dst_w, &one, FALSE); 6226 } 6227 } 6228 } 6229 6230 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask; 6231 emit->key.vs.adjust_attrib_itof = save_itof_mask; 6232 emit->key.vs.adjust_attrib_utof = save_utof_mask; 6233 emit->key.vs.attrib_is_bgra = save_is_bgra_mask; 6234 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask; 6235 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask; 6236 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask; 6237 } 6238} 6239 6240 6241/** 6242 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed 6243 * to implement some instructions. We pre-allocate those values here 6244 * in the immediate constant buffer. 6245 */ 6246static void 6247alloc_common_immediates(struct svga_shader_emitter_v10 *emit) 6248{ 6249 unsigned n = 0; 6250 6251 emit->common_immediate_pos[n++] = 6252 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); 6253 6254 emit->common_immediate_pos[n++] = 6255 alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f); 6256 6257 emit->common_immediate_pos[n++] = 6258 alloc_immediate_int4(emit, 0, 1, 0, -1); 6259 6260 if (emit->key.vs.attrib_puint_to_snorm) { 6261 emit->common_immediate_pos[n++] = 6262 alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 6263 } 6264 6265 if (emit->key.vs.attrib_puint_to_uscaled) { 6266 emit->common_immediate_pos[n++] = 6267 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); 6268 } 6269 6270 if (emit->key.vs.attrib_puint_to_sscaled) { 6271 emit->common_immediate_pos[n++] = 6272 alloc_immediate_int4(emit, 22, 12, 2, 0); 6273 6274 emit->common_immediate_pos[n++] = 6275 alloc_immediate_int4(emit, 22, 30, 0, 0); 6276 } 6277 6278 assert(n <= Elements(emit->common_immediate_pos)); 6279 emit->num_common_immediates = n; 6280} 6281 6282 6283/** 6284 * Emit any extra/helper declarations/code that we might need between 6285 * the declaration section and code section. 6286 */ 6287static boolean 6288emit_pre_helpers(struct svga_shader_emitter_v10 *emit) 6289{ 6290 /* Properties */ 6291 if (emit->unit == PIPE_SHADER_GEOMETRY) 6292 emit_property_instructions(emit); 6293 6294 /* Declare inputs */ 6295 if (!emit_input_declarations(emit)) 6296 return FALSE; 6297 6298 /* Declare outputs */ 6299 if (!emit_output_declarations(emit)) 6300 return FALSE; 6301 6302 /* Declare temporary registers */ 6303 emit_temporaries_declaration(emit); 6304 6305 /* Declare constant registers */ 6306 emit_constant_declaration(emit); 6307 6308 /* Declare samplers and resources */ 6309 emit_sampler_declarations(emit); 6310 emit_resource_declarations(emit); 6311 6312 /* Declare clip distance output registers */ 6313 if (emit->unit == PIPE_SHADER_VERTEX || 6314 emit->unit == PIPE_SHADER_GEOMETRY) { 6315 emit_clip_distance_declarations(emit); 6316 } 6317 6318 alloc_common_immediates(emit); 6319 6320 if (emit->unit == PIPE_SHADER_FRAGMENT && 6321 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6322 float alpha = emit->key.fs.alpha_ref; 6323 emit->fs.alpha_ref_index = 6324 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha); 6325 } 6326 6327 /* Now, emit the constant block containing all the immediates 6328 * declared by shader, as well as the extra ones seen above. 6329 */ 6330 emit_vgpu10_immediates_block(emit); 6331 6332 if (emit->unit == PIPE_SHADER_FRAGMENT) { 6333 emit_frontface_instructions(emit); 6334 emit_fragcoord_instructions(emit); 6335 } 6336 else if (emit->unit == PIPE_SHADER_VERTEX) { 6337 emit_vertex_attrib_instructions(emit); 6338 } 6339 6340 return TRUE; 6341} 6342 6343 6344/** 6345 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w 6346 * against the alpha reference value and discards the fragment if the 6347 * comparison fails. 6348 */ 6349static void 6350emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, 6351 unsigned fs_color_tmp_index) 6352{ 6353 /* compare output color's alpha to alpha ref and kill */ 6354 unsigned tmp = get_temp_index(emit); 6355 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6356 struct tgsi_full_src_register tmp_src_x = 6357 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 6358 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6359 struct tgsi_full_src_register color_src = 6360 make_src_temp_reg(fs_color_tmp_index); 6361 struct tgsi_full_src_register color_src_w = 6362 scalar_src(&color_src, TGSI_SWIZZLE_W); 6363 struct tgsi_full_src_register ref_src = 6364 make_src_immediate_reg(emit->fs.alpha_ref_index); 6365 struct tgsi_full_dst_register color_dst = 6366 make_dst_output_reg(emit->fs.color_out_index[0]); 6367 6368 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6369 6370 /* dst = src0 'alpha_func' src1 */ 6371 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst, 6372 &color_src_w, &ref_src); 6373 6374 /* DISCARD if dst.x == 0 */ 6375 begin_emit_instruction(emit); 6376 emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */ 6377 emit_src_register(emit, &tmp_src_x); 6378 end_emit_instruction(emit); 6379 6380 /* If we don't need to broadcast the color below or set fragments to 6381 * white, emit final color here. 6382 */ 6383 if (emit->key.fs.write_color0_to_n_cbufs <= 1 && 6384 !emit->key.fs.white_fragments) { 6385 /* MOV output.color, tempcolor */ 6386 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6387 &color_src, FALSE); /* XXX saturate? */ 6388 } 6389 6390 free_temp_indexes(emit); 6391} 6392 6393 6394/** 6395 * When we need to emit white for all fragments (for emulating XOR logicop 6396 * mode), this function copies white into the temporary color output register. 6397 */ 6398static void 6399emit_set_color_white(struct svga_shader_emitter_v10 *emit, 6400 unsigned fs_color_tmp_index) 6401{ 6402 struct tgsi_full_dst_register color_dst = 6403 make_dst_temp_reg(fs_color_tmp_index); 6404 struct tgsi_full_src_register white = 6405 make_immediate_reg_float(emit, 1.0f); 6406 6407 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE); 6408} 6409 6410 6411/** 6412 * Emit instructions for writing a single color output to multiple 6413 * color buffers. 6414 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or 6415 * when key.fs.white_fragments is true). 6416 * property is set and the number of render targets is greater than one. 6417 * \param fs_color_tmp_index index of the temp register that holds the 6418 * color to broadcast. 6419 */ 6420static void 6421emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, 6422 unsigned fs_color_tmp_index) 6423{ 6424 const unsigned n = emit->key.fs.write_color0_to_n_cbufs; 6425 unsigned i; 6426 struct tgsi_full_src_register color_src = 6427 make_src_temp_reg(fs_color_tmp_index); 6428 6429 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6430 6431 for (i = 0; i < n; i++) { 6432 unsigned output_reg = emit->fs.color_out_index[i]; 6433 struct tgsi_full_dst_register color_dst = 6434 make_dst_output_reg(output_reg); 6435 6436 /* Fill in this semantic here since we'll use it later in 6437 * emit_dst_register(). 6438 */ 6439 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR; 6440 6441 /* MOV output.color[i], tempcolor */ 6442 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6443 &color_src, FALSE); /* XXX saturate? */ 6444 } 6445} 6446 6447 6448/** 6449 * Emit extra helper code after the original shader code, but before the 6450 * last END/RET instruction. 6451 * For vertex shaders this means emitting the extra code to apply the 6452 * prescale scale/translation. 6453 */ 6454static boolean 6455emit_post_helpers(struct svga_shader_emitter_v10 *emit) 6456{ 6457 if (emit->unit == PIPE_SHADER_VERTEX) { 6458 emit_vertex_instructions(emit); 6459 } 6460 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 6461 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index; 6462 6463 /* We no longer want emit_dst_register() to substitute the 6464 * temporary fragment color register for the real color output. 6465 */ 6466 emit->fs.color_tmp_index = INVALID_INDEX; 6467 6468 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6469 emit_alpha_test_instructions(emit, fs_color_tmp_index); 6470 } 6471 if (emit->key.fs.white_fragments) { 6472 emit_set_color_white(emit, fs_color_tmp_index); 6473 } 6474 if (emit->key.fs.write_color0_to_n_cbufs > 1 || 6475 emit->key.fs.white_fragments) { 6476 emit_broadcast_color_instructions(emit, fs_color_tmp_index); 6477 } 6478 } 6479 6480 return TRUE; 6481} 6482 6483 6484/** 6485 * Translate the TGSI tokens into VGPU10 tokens. 6486 */ 6487static boolean 6488emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, 6489 const struct tgsi_token *tokens) 6490{ 6491 struct tgsi_parse_context parse; 6492 boolean ret = TRUE; 6493 boolean pre_helpers_emitted = FALSE; 6494 unsigned inst_number = 0; 6495 6496 tgsi_parse_init(&parse, tokens); 6497 6498 while (!tgsi_parse_end_of_tokens(&parse)) { 6499 tgsi_parse_token(&parse); 6500 6501 switch (parse.FullToken.Token.Type) { 6502 case TGSI_TOKEN_TYPE_IMMEDIATE: 6503 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate); 6504 if (!ret) 6505 goto done; 6506 break; 6507 6508 case TGSI_TOKEN_TYPE_DECLARATION: 6509 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration); 6510 if (!ret) 6511 goto done; 6512 break; 6513 6514 case TGSI_TOKEN_TYPE_INSTRUCTION: 6515 if (!pre_helpers_emitted) { 6516 ret = emit_pre_helpers(emit); 6517 if (!ret) 6518 goto done; 6519 pre_helpers_emitted = TRUE; 6520 } 6521 ret = emit_vgpu10_instruction(emit, inst_number++, 6522 &parse.FullToken.FullInstruction); 6523 if (!ret) 6524 goto done; 6525 break; 6526 6527 case TGSI_TOKEN_TYPE_PROPERTY: 6528 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty); 6529 if (!ret) 6530 goto done; 6531 break; 6532 6533 default: 6534 break; 6535 } 6536 } 6537 6538done: 6539 tgsi_parse_free(&parse); 6540 return ret; 6541} 6542 6543 6544/** 6545 * Emit the first VGPU10 shader tokens. 6546 */ 6547static boolean 6548emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) 6549{ 6550 VGPU10ProgramToken ptoken; 6551 6552 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ 6553 ptoken.majorVersion = 4; 6554 ptoken.minorVersion = 0; 6555 ptoken.programType = translate_shader_type(emit->unit); 6556 if (!emit_dword(emit, ptoken.value)) 6557 return FALSE; 6558 6559 /* Second token: total length of shader, in tokens. We can't fill this 6560 * in until we're all done. Emit zero for now. 6561 */ 6562 return emit_dword(emit, 0); 6563} 6564 6565 6566static boolean 6567emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) 6568{ 6569 VGPU10ProgramToken *tokens; 6570 6571 /* Replace the second token with total shader length */ 6572 tokens = (VGPU10ProgramToken *) emit->buf; 6573 tokens[1].value = emit_get_num_tokens(emit); 6574 6575 return TRUE; 6576} 6577 6578 6579/** 6580 * Modify the FS to read the BCOLORs and use the FACE register 6581 * to choose between the front/back colors. 6582 */ 6583static const struct tgsi_token * 6584transform_fs_twoside(const struct tgsi_token *tokens) 6585{ 6586 if (0) { 6587 debug_printf("Before tgsi_add_two_side ------------------\n"); 6588 tgsi_dump(tokens,0); 6589 } 6590 tokens = tgsi_add_two_side(tokens); 6591 if (0) { 6592 debug_printf("After tgsi_add_two_side ------------------\n"); 6593 tgsi_dump(tokens, 0); 6594 } 6595 return tokens; 6596} 6597 6598 6599/** 6600 * Modify the FS to do polygon stipple. 6601 */ 6602static const struct tgsi_token * 6603transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, 6604 const struct tgsi_token *tokens) 6605{ 6606 const struct tgsi_token *new_tokens; 6607 unsigned unit; 6608 6609 if (0) { 6610 debug_printf("Before pstipple ------------------\n"); 6611 tgsi_dump(tokens,0); 6612 } 6613 6614 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, 6615 TGSI_FILE_INPUT); 6616 6617 emit->fs.pstipple_sampler_unit = unit; 6618 6619 /* Setup texture state for stipple */ 6620 emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D; 6621 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; 6622 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; 6623 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; 6624 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; 6625 6626 if (0) { 6627 debug_printf("After pstipple ------------------\n"); 6628 tgsi_dump(new_tokens, 0); 6629 } 6630 6631 return new_tokens; 6632} 6633 6634/** 6635 * Modify the FS to support anti-aliasing point. 6636 */ 6637static const struct tgsi_token * 6638transform_fs_aapoint(const struct tgsi_token *tokens, 6639 int aa_coord_index) 6640{ 6641 if (0) { 6642 debug_printf("Before tgsi_add_aa_point ------------------\n"); 6643 tgsi_dump(tokens,0); 6644 } 6645 tokens = tgsi_add_aa_point(tokens, aa_coord_index); 6646 if (0) { 6647 debug_printf("After tgsi_add_aa_point ------------------\n"); 6648 tgsi_dump(tokens, 0); 6649 } 6650 return tokens; 6651} 6652 6653/** 6654 * This is the main entrypoint for the TGSI -> VPGU10 translator. 6655 */ 6656struct svga_shader_variant * 6657svga_tgsi_vgpu10_translate(struct svga_context *svga, 6658 const struct svga_shader *shader, 6659 const struct svga_compile_key *key, 6660 unsigned unit) 6661{ 6662 struct svga_shader_variant *variant = NULL; 6663 struct svga_shader_emitter_v10 *emit; 6664 const struct tgsi_token *tokens = shader->tokens; 6665 struct svga_vertex_shader *vs = svga->curr.vs; 6666 struct svga_geometry_shader *gs = svga->curr.gs; 6667 6668 assert(unit == PIPE_SHADER_VERTEX || 6669 unit == PIPE_SHADER_GEOMETRY || 6670 unit == PIPE_SHADER_FRAGMENT); 6671 6672 /* These two flags cannot be used together */ 6673 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1); 6674 6675 /* 6676 * Setup the code emitter 6677 */ 6678 emit = alloc_emitter(); 6679 if (!emit) 6680 return NULL; 6681 6682 emit->unit = unit; 6683 emit->key = *key; 6684 6685 emit->vposition.need_prescale = (emit->key.vs.need_prescale || 6686 emit->key.gs.need_prescale); 6687 emit->vposition.tmp_index = INVALID_INDEX; 6688 emit->vposition.so_index = INVALID_INDEX; 6689 emit->vposition.out_index = INVALID_INDEX; 6690 6691 emit->fs.color_tmp_index = INVALID_INDEX; 6692 emit->fs.face_input_index = INVALID_INDEX; 6693 emit->fs.fragcoord_input_index = INVALID_INDEX; 6694 6695 emit->gs.prim_id_index = INVALID_INDEX; 6696 6697 emit->clip_dist_out_index = INVALID_INDEX; 6698 emit->clip_dist_tmp_index = INVALID_INDEX; 6699 emit->clip_dist_so_index = INVALID_INDEX; 6700 emit->clip_vertex_out_index = INVALID_INDEX; 6701 6702 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { 6703 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; 6704 } 6705 6706 if (unit == PIPE_SHADER_FRAGMENT) { 6707 if (key->fs.light_twoside) { 6708 tokens = transform_fs_twoside(tokens); 6709 } 6710 if (key->fs.pstipple) { 6711 const struct tgsi_token *new_tokens = 6712 transform_fs_pstipple(emit, tokens); 6713 if (tokens != shader->tokens) { 6714 /* free the two-sided shader tokens */ 6715 tgsi_free_tokens(tokens); 6716 } 6717 tokens = new_tokens; 6718 } 6719 if (key->fs.aa_point) { 6720 tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index); 6721 } 6722 } 6723 6724 if (SVGA_DEBUG & DEBUG_TGSI) { 6725 debug_printf("#####################################\n"); 6726 debug_printf("### TGSI Shader %u\n", shader->id); 6727 tgsi_dump(tokens, 0); 6728 } 6729 6730 /** 6731 * Rescan the header if the token string is different from the one 6732 * included in the shader; otherwise, the header info is already up-to-date 6733 */ 6734 if (tokens != shader->tokens) { 6735 tgsi_scan_shader(tokens, &emit->info); 6736 } else { 6737 emit->info = shader->info; 6738 } 6739 6740 emit->num_outputs = emit->info.num_outputs; 6741 6742 if (unit == PIPE_SHADER_FRAGMENT) { 6743 /* Compute FS input remapping to match the output from VS/GS */ 6744 if (gs) { 6745 svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage); 6746 } else { 6747 assert(vs); 6748 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6749 } 6750 } else if (unit == PIPE_SHADER_GEOMETRY) { 6751 assert(vs); 6752 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6753 } 6754 6755 determine_clipping_mode(emit); 6756 6757 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) { 6758 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) { 6759 /* if there is stream output declarations associated 6760 * with this shader or the shader writes to ClipDistance 6761 * then reserve extra registers for the non-adjusted vertex position 6762 * and the ClipDistance shadow copy 6763 */ 6764 emit->vposition.so_index = emit->num_outputs++; 6765 6766 if (emit->clip_mode == CLIP_DISTANCE) { 6767 emit->clip_dist_so_index = emit->num_outputs++; 6768 if (emit->info.num_written_clipdistance > 4) 6769 emit->num_outputs++; 6770 } 6771 } 6772 } 6773 6774 /* 6775 * Do actual shader translation. 6776 */ 6777 if (!emit_vgpu10_header(emit)) { 6778 debug_printf("svga: emit VGPU10 header failed\n"); 6779 goto cleanup; 6780 } 6781 6782 if (!emit_vgpu10_instructions(emit, tokens)) { 6783 debug_printf("svga: emit VGPU10 instructions failed\n"); 6784 goto cleanup; 6785 } 6786 6787 if (!emit_vgpu10_tail(emit)) { 6788 debug_printf("svga: emit VGPU10 tail failed\n"); 6789 goto cleanup; 6790 } 6791 6792 if (emit->register_overflow) { 6793 goto cleanup; 6794 } 6795 6796 /* 6797 * Create, initialize the 'variant' object. 6798 */ 6799 variant = svga_new_shader_variant(svga); 6800 if (!variant) 6801 goto cleanup; 6802 6803 variant->shader = shader; 6804 variant->nr_tokens = emit_get_num_tokens(emit); 6805 variant->tokens = (const unsigned *)emit->buf; 6806 emit->buf = NULL; /* buffer is no longer owed by emitter context */ 6807 memcpy(&variant->key, key, sizeof(*key)); 6808 variant->id = UTIL_BITMASK_INVALID_INDEX; 6809 6810 /* The extra constant starting offset starts with the number of 6811 * shader constants declared in the shader. 6812 */ 6813 variant->extra_const_start = emit->num_shader_consts[0]; 6814 if (key->gs.wide_point) { 6815 /** 6816 * The extra constant added in the transformed shader 6817 * for inverse viewport scale is to be supplied by the driver. 6818 * So the extra constant starting offset needs to be reduced by 1. 6819 */ 6820 assert(variant->extra_const_start > 0); 6821 variant->extra_const_start--; 6822 } 6823 6824 variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; 6825 6826 /* If there was exactly one write to a fragment shader output register 6827 * and it came from a constant buffer, we know all fragments will have 6828 * the same color (except for blending). 6829 */ 6830 variant->constant_color_output = 6831 emit->constant_color_output && emit->num_output_writes == 1; 6832 6833 /** keep track in the variant if flat interpolation is used 6834 * for any of the varyings. 6835 */ 6836 variant->uses_flat_interp = emit->uses_flat_interp; 6837 6838 if (tokens != shader->tokens) { 6839 tgsi_free_tokens(tokens); 6840 } 6841 6842cleanup: 6843 free_emitter(emit); 6844 6845 return variant; 6846} 6847