tgsi_ureg.c revision 38f6f23fcf37247fd709d1c612d08bfa9b124e69
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include "pipe/p_context.h" 30#include "pipe/p_state.h" 31#include "tgsi/tgsi_ureg.h" 32#include "tgsi/tgsi_build.h" 33#include "tgsi/tgsi_info.h" 34#include "tgsi/tgsi_dump.h" 35#include "tgsi/tgsi_sanity.h" 36#include "util/u_debug.h" 37#include "util/u_memory.h" 38#include "util/u_math.h" 39 40union tgsi_any_token { 41 struct tgsi_header header; 42 struct tgsi_processor processor; 43 struct tgsi_token token; 44 struct tgsi_property prop; 45 struct tgsi_property_data prop_data; 46 struct tgsi_declaration decl; 47 struct tgsi_declaration_range decl_range; 48 struct tgsi_declaration_dimension decl_dim; 49 struct tgsi_declaration_semantic decl_semantic; 50 struct tgsi_immediate imm; 51 union tgsi_immediate_data imm_data; 52 struct tgsi_instruction insn; 53 struct tgsi_instruction_predicate insn_predicate; 54 struct tgsi_instruction_label insn_label; 55 struct tgsi_instruction_texture insn_texture; 56 struct tgsi_src_register src; 57 struct tgsi_dimension dim; 58 struct tgsi_dst_register dst; 59 unsigned value; 60}; 61 62 63struct ureg_tokens { 64 union tgsi_any_token *tokens; 65 unsigned size; 66 unsigned order; 67 unsigned count; 68}; 69 70#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS 71#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS 72#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS 73#define UREG_MAX_CONSTANT_RANGE 32 74#define UREG_MAX_IMMEDIATE 32 75#define UREG_MAX_TEMP 256 76#define UREG_MAX_ADDR 2 77#define UREG_MAX_LOOP 1 78#define UREG_MAX_PRED 1 79 80struct const_decl { 81 struct { 82 unsigned first; 83 unsigned last; 84 } constant_range[UREG_MAX_CONSTANT_RANGE]; 85 unsigned nr_constant_ranges; 86}; 87 88#define DOMAIN_DECL 0 89#define DOMAIN_INSN 1 90 91struct ureg_program 92{ 93 unsigned processor; 94 struct pipe_context *pipe; 95 96 struct { 97 unsigned semantic_name; 98 unsigned semantic_index; 99 unsigned interp; 100 } fs_input[UREG_MAX_INPUT]; 101 unsigned nr_fs_inputs; 102 103 unsigned vs_inputs[UREG_MAX_INPUT/32]; 104 105 struct { 106 unsigned index; 107 } gs_input[UREG_MAX_INPUT]; 108 unsigned nr_gs_inputs; 109 110 struct { 111 unsigned index; 112 unsigned semantic_name; 113 unsigned semantic_index; 114 } system_value[UREG_MAX_SYSTEM_VALUE]; 115 unsigned nr_system_values; 116 117 struct { 118 unsigned semantic_name; 119 unsigned semantic_index; 120 } output[UREG_MAX_OUTPUT]; 121 unsigned nr_outputs; 122 123 struct { 124 union { 125 float f[4]; 126 unsigned u[4]; 127 int i[4]; 128 } value; 129 unsigned nr; 130 unsigned type; 131 } immediate[UREG_MAX_IMMEDIATE]; 132 unsigned nr_immediates; 133 134 struct ureg_src sampler[PIPE_MAX_SAMPLERS]; 135 unsigned nr_samplers; 136 137 unsigned temps_active[UREG_MAX_TEMP / 32]; 138 unsigned nr_temps; 139 140 struct const_decl const_decls; 141 struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS]; 142 143 unsigned property_gs_input_prim; 144 unsigned property_gs_output_prim; 145 unsigned property_gs_max_vertices; 146 unsigned char property_fs_coord_origin; /* = TGSI_FS_COORD_ORIGIN_* */ 147 unsigned char property_fs_coord_pixel_center; /* = TGSI_FS_COORD_PIXEL_CENTER_* */ 148 149 unsigned nr_addrs; 150 unsigned nr_preds; 151 unsigned nr_loops; 152 unsigned nr_instructions; 153 154 struct ureg_tokens domain[2]; 155}; 156 157static union tgsi_any_token error_tokens[32]; 158 159static void tokens_error( struct ureg_tokens *tokens ) 160{ 161 if (tokens->tokens && tokens->tokens != error_tokens) 162 FREE(tokens->tokens); 163 164 tokens->tokens = error_tokens; 165 tokens->size = Elements(error_tokens); 166 tokens->count = 0; 167} 168 169 170static void tokens_expand( struct ureg_tokens *tokens, 171 unsigned count ) 172{ 173 unsigned old_size = tokens->size * sizeof(unsigned); 174 175 if (tokens->tokens == error_tokens) { 176 return; 177 } 178 179 while (tokens->count + count > tokens->size) { 180 tokens->size = (1 << ++tokens->order); 181 } 182 183 tokens->tokens = REALLOC(tokens->tokens, 184 old_size, 185 tokens->size * sizeof(unsigned)); 186 if (tokens->tokens == NULL) { 187 tokens_error(tokens); 188 } 189} 190 191static void set_bad( struct ureg_program *ureg ) 192{ 193 tokens_error(&ureg->domain[0]); 194} 195 196 197 198static union tgsi_any_token *get_tokens( struct ureg_program *ureg, 199 unsigned domain, 200 unsigned count ) 201{ 202 struct ureg_tokens *tokens = &ureg->domain[domain]; 203 union tgsi_any_token *result; 204 205 if (tokens->count + count > tokens->size) 206 tokens_expand(tokens, count); 207 208 result = &tokens->tokens[tokens->count]; 209 tokens->count += count; 210 return result; 211} 212 213 214static union tgsi_any_token *retrieve_token( struct ureg_program *ureg, 215 unsigned domain, 216 unsigned nr ) 217{ 218 if (ureg->domain[domain].tokens == error_tokens) 219 return &error_tokens[0]; 220 221 return &ureg->domain[domain].tokens[nr]; 222} 223 224 225 226static INLINE struct ureg_dst 227ureg_dst_register( unsigned file, 228 unsigned index ) 229{ 230 struct ureg_dst dst; 231 232 dst.File = file; 233 dst.WriteMask = TGSI_WRITEMASK_XYZW; 234 dst.Indirect = 0; 235 dst.IndirectIndex = 0; 236 dst.IndirectSwizzle = 0; 237 dst.Saturate = 0; 238 dst.Predicate = 0; 239 dst.PredNegate = 0; 240 dst.PredSwizzleX = TGSI_SWIZZLE_X; 241 dst.PredSwizzleY = TGSI_SWIZZLE_Y; 242 dst.PredSwizzleZ = TGSI_SWIZZLE_Z; 243 dst.PredSwizzleW = TGSI_SWIZZLE_W; 244 dst.Index = index; 245 246 return dst; 247} 248 249 250void 251ureg_property_gs_input_prim(struct ureg_program *ureg, 252 unsigned input_prim) 253{ 254 ureg->property_gs_input_prim = input_prim; 255} 256 257void 258ureg_property_gs_output_prim(struct ureg_program *ureg, 259 unsigned output_prim) 260{ 261 ureg->property_gs_output_prim = output_prim; 262} 263 264void 265ureg_property_gs_max_vertices(struct ureg_program *ureg, 266 unsigned max_vertices) 267{ 268 ureg->property_gs_max_vertices = max_vertices; 269} 270 271void 272ureg_property_fs_coord_origin(struct ureg_program *ureg, 273 unsigned fs_coord_origin) 274{ 275 ureg->property_fs_coord_origin = fs_coord_origin; 276} 277 278void 279ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, 280 unsigned fs_coord_pixel_center) 281{ 282 ureg->property_fs_coord_pixel_center = fs_coord_pixel_center; 283} 284 285 286 287struct ureg_src 288ureg_DECL_fs_input( struct ureg_program *ureg, 289 unsigned name, 290 unsigned index, 291 unsigned interp_mode ) 292{ 293 unsigned i; 294 295 for (i = 0; i < ureg->nr_fs_inputs; i++) { 296 if (ureg->fs_input[i].semantic_name == name && 297 ureg->fs_input[i].semantic_index == index) 298 goto out; 299 } 300 301 if (ureg->nr_fs_inputs < UREG_MAX_INPUT) { 302 ureg->fs_input[i].semantic_name = name; 303 ureg->fs_input[i].semantic_index = index; 304 ureg->fs_input[i].interp = interp_mode; 305 ureg->nr_fs_inputs++; 306 } 307 else { 308 set_bad( ureg ); 309 } 310 311out: 312 return ureg_src_register( TGSI_FILE_INPUT, i ); 313} 314 315 316struct ureg_src 317ureg_DECL_vs_input( struct ureg_program *ureg, 318 unsigned index ) 319{ 320 assert(ureg->processor == TGSI_PROCESSOR_VERTEX); 321 322 ureg->vs_inputs[index/32] |= 1 << (index % 32); 323 return ureg_src_register( TGSI_FILE_INPUT, index ); 324} 325 326 327struct ureg_src 328ureg_DECL_gs_input(struct ureg_program *ureg, 329 unsigned index) 330{ 331 if (ureg->nr_gs_inputs < UREG_MAX_INPUT) { 332 ureg->gs_input[ureg->nr_gs_inputs].index = index; 333 ureg->nr_gs_inputs++; 334 } else { 335 set_bad(ureg); 336 } 337 338 /* XXX: Add suport for true 2D input registers. */ 339 return ureg_src_register(TGSI_FILE_INPUT, index); 340} 341 342 343struct ureg_src 344ureg_DECL_system_value(struct ureg_program *ureg, 345 unsigned index, 346 unsigned semantic_name, 347 unsigned semantic_index) 348{ 349 if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) { 350 ureg->system_value[ureg->nr_system_values].index = index; 351 ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name; 352 ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index; 353 ureg->nr_system_values++; 354 } else { 355 set_bad(ureg); 356 } 357 358 return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index); 359} 360 361 362struct ureg_dst 363ureg_DECL_output( struct ureg_program *ureg, 364 unsigned name, 365 unsigned index ) 366{ 367 unsigned i; 368 369 for (i = 0; i < ureg->nr_outputs; i++) { 370 if (ureg->output[i].semantic_name == name && 371 ureg->output[i].semantic_index == index) 372 goto out; 373 } 374 375 if (ureg->nr_outputs < UREG_MAX_OUTPUT) { 376 ureg->output[i].semantic_name = name; 377 ureg->output[i].semantic_index = index; 378 ureg->nr_outputs++; 379 } 380 else { 381 set_bad( ureg ); 382 } 383 384out: 385 return ureg_dst_register( TGSI_FILE_OUTPUT, i ); 386} 387 388 389/* Returns a new constant register. Keep track of which have been 390 * referred to so that we can emit decls later. 391 * 392 * Constant operands declared with this function must be addressed 393 * with a two-dimensional index. 394 * 395 * There is nothing in this code to bind this constant to any tracked 396 * value or manage any constant_buffer contents -- that's the 397 * resposibility of the calling code. 398 */ 399void 400ureg_DECL_constant2D(struct ureg_program *ureg, 401 unsigned first, 402 unsigned last, 403 unsigned index2D) 404{ 405 struct const_decl *decl = &ureg->const_decls2D[index2D]; 406 407 assert(index2D < PIPE_MAX_CONSTANT_BUFFERS); 408 409 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { 410 uint i = decl->nr_constant_ranges++; 411 412 decl->constant_range[i].first = first; 413 decl->constant_range[i].last = last; 414 } 415} 416 417 418/* A one-dimensional, depricated version of ureg_DECL_constant2D(). 419 * 420 * Constant operands declared with this function must be addressed 421 * with a one-dimensional index. 422 */ 423struct ureg_src 424ureg_DECL_constant(struct ureg_program *ureg, 425 unsigned index) 426{ 427 struct const_decl *decl = &ureg->const_decls; 428 unsigned minconst = index, maxconst = index; 429 unsigned i; 430 431 /* Inside existing range? 432 */ 433 for (i = 0; i < decl->nr_constant_ranges; i++) { 434 if (decl->constant_range[i].first <= index && 435 decl->constant_range[i].last >= index) { 436 goto out; 437 } 438 } 439 440 /* Extend existing range? 441 */ 442 for (i = 0; i < decl->nr_constant_ranges; i++) { 443 if (decl->constant_range[i].last == index - 1) { 444 decl->constant_range[i].last = index; 445 goto out; 446 } 447 448 if (decl->constant_range[i].first == index + 1) { 449 decl->constant_range[i].first = index; 450 goto out; 451 } 452 453 minconst = MIN2(minconst, decl->constant_range[i].first); 454 maxconst = MAX2(maxconst, decl->constant_range[i].last); 455 } 456 457 /* Create new range? 458 */ 459 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { 460 i = decl->nr_constant_ranges++; 461 decl->constant_range[i].first = index; 462 decl->constant_range[i].last = index; 463 goto out; 464 } 465 466 /* Collapse all ranges down to one: 467 */ 468 i = 0; 469 decl->constant_range[0].first = minconst; 470 decl->constant_range[0].last = maxconst; 471 decl->nr_constant_ranges = 1; 472 473out: 474 assert(i < decl->nr_constant_ranges); 475 assert(decl->constant_range[i].first <= index); 476 assert(decl->constant_range[i].last >= index); 477 return ureg_src_register(TGSI_FILE_CONSTANT, index); 478} 479 480 481/* Allocate a new temporary. Temporaries greater than UREG_MAX_TEMP 482 * are legal, but will not be released. 483 */ 484struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg ) 485{ 486 unsigned i; 487 488 for (i = 0; i < UREG_MAX_TEMP; i += 32) { 489 int bit = ffs(~ureg->temps_active[i/32]); 490 if (bit != 0) { 491 i += bit - 1; 492 goto out; 493 } 494 } 495 496 /* No reusable temps, so allocate a new one: 497 */ 498 i = ureg->nr_temps++; 499 500out: 501 if (i < UREG_MAX_TEMP) 502 ureg->temps_active[i/32] |= 1 << (i % 32); 503 504 if (i >= ureg->nr_temps) 505 ureg->nr_temps = i + 1; 506 507 return ureg_dst_register( TGSI_FILE_TEMPORARY, i ); 508} 509 510 511void ureg_release_temporary( struct ureg_program *ureg, 512 struct ureg_dst tmp ) 513{ 514 if(tmp.File == TGSI_FILE_TEMPORARY) 515 if (tmp.Index < UREG_MAX_TEMP) 516 ureg->temps_active[tmp.Index/32] &= ~(1 << (tmp.Index % 32)); 517} 518 519 520/* Allocate a new address register. 521 */ 522struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) 523{ 524 if (ureg->nr_addrs < UREG_MAX_ADDR) 525 return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ ); 526 527 assert( 0 ); 528 return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); 529} 530 531/* Allocate a new loop register. 532 */ 533struct ureg_dst 534ureg_DECL_loop(struct ureg_program *ureg) 535{ 536 if (ureg->nr_loops < UREG_MAX_LOOP) { 537 return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++); 538 } 539 540 assert(0); 541 return ureg_dst_register(TGSI_FILE_LOOP, 0); 542} 543 544/* Allocate a new predicate register. 545 */ 546struct ureg_dst 547ureg_DECL_predicate(struct ureg_program *ureg) 548{ 549 if (ureg->nr_preds < UREG_MAX_PRED) { 550 return ureg_dst_register(TGSI_FILE_PREDICATE, ureg->nr_preds++); 551 } 552 553 assert(0); 554 return ureg_dst_register(TGSI_FILE_PREDICATE, 0); 555} 556 557/* Allocate a new sampler. 558 */ 559struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, 560 unsigned nr ) 561{ 562 unsigned i; 563 564 for (i = 0; i < ureg->nr_samplers; i++) 565 if (ureg->sampler[i].Index == nr) 566 return ureg->sampler[i]; 567 568 if (i < PIPE_MAX_SAMPLERS) { 569 ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr ); 570 ureg->nr_samplers++; 571 return ureg->sampler[i]; 572 } 573 574 assert( 0 ); 575 return ureg->sampler[0]; 576} 577 578 579static int 580match_or_expand_immediate( const unsigned *v, 581 unsigned nr, 582 unsigned *v2, 583 unsigned *pnr2, 584 unsigned *swizzle ) 585{ 586 unsigned nr2 = *pnr2; 587 unsigned i, j; 588 589 *swizzle = 0; 590 591 for (i = 0; i < nr; i++) { 592 boolean found = FALSE; 593 594 for (j = 0; j < nr2 && !found; j++) { 595 if (v[i] == v2[j]) { 596 *swizzle |= j << (i * 2); 597 found = TRUE; 598 } 599 } 600 601 if (!found) { 602 if (nr2 >= 4) { 603 return FALSE; 604 } 605 606 v2[nr2] = v[i]; 607 *swizzle |= nr2 << (i * 2); 608 nr2++; 609 } 610 } 611 612 /* Actually expand immediate only when fully succeeded. 613 */ 614 *pnr2 = nr2; 615 return TRUE; 616} 617 618 619static struct ureg_src 620decl_immediate( struct ureg_program *ureg, 621 const unsigned *v, 622 unsigned nr, 623 unsigned type ) 624{ 625 unsigned i, j; 626 unsigned swizzle = 0; 627 628 /* Could do a first pass where we examine all existing immediates 629 * without expanding. 630 */ 631 632 for (i = 0; i < ureg->nr_immediates; i++) { 633 if (ureg->immediate[i].type != type) { 634 continue; 635 } 636 if (match_or_expand_immediate(v, 637 nr, 638 ureg->immediate[i].value.u, 639 &ureg->immediate[i].nr, 640 &swizzle)) { 641 goto out; 642 } 643 } 644 645 if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) { 646 i = ureg->nr_immediates++; 647 ureg->immediate[i].type = type; 648 if (match_or_expand_immediate(v, 649 nr, 650 ureg->immediate[i].value.u, 651 &ureg->immediate[i].nr, 652 &swizzle)) { 653 goto out; 654 } 655 } 656 657 set_bad(ureg); 658 659out: 660 /* Make sure that all referenced elements are from this immediate. 661 * Has the effect of making size-one immediates into scalars. 662 */ 663 for (j = nr; j < 4; j++) { 664 swizzle |= (swizzle & 0x3) << (j * 2); 665 } 666 667 return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i), 668 (swizzle >> 0) & 0x3, 669 (swizzle >> 2) & 0x3, 670 (swizzle >> 4) & 0x3, 671 (swizzle >> 6) & 0x3); 672} 673 674 675struct ureg_src 676ureg_DECL_immediate( struct ureg_program *ureg, 677 const float *v, 678 unsigned nr ) 679{ 680 union { 681 float f[4]; 682 unsigned u[4]; 683 } fu; 684 unsigned int i; 685 686 for (i = 0; i < nr; i++) { 687 fu.f[i] = v[i]; 688 } 689 690 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32); 691} 692 693 694struct ureg_src 695ureg_DECL_immediate_uint( struct ureg_program *ureg, 696 const unsigned *v, 697 unsigned nr ) 698{ 699 return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32); 700} 701 702 703struct ureg_src 704ureg_DECL_immediate_block_uint( struct ureg_program *ureg, 705 const unsigned *v, 706 unsigned nr ) 707{ 708 uint index; 709 uint i; 710 711 if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) { 712 set_bad(ureg); 713 return ureg_src_register(TGSI_FILE_IMMEDIATE, 0); 714 } 715 716 index = ureg->nr_immediates; 717 ureg->nr_immediates += (nr + 3) / 4; 718 719 for (i = index; i < ureg->nr_immediates; i++) { 720 ureg->immediate[i].type = TGSI_IMM_UINT32; 721 ureg->immediate[i].nr = nr > 4 ? 4 : nr; 722 memcpy(ureg->immediate[i].value.u, 723 &v[(i - index) * 4], 724 ureg->immediate[i].nr * sizeof(uint)); 725 nr -= 4; 726 } 727 728 return ureg_src_register(TGSI_FILE_IMMEDIATE, index); 729} 730 731 732struct ureg_src 733ureg_DECL_immediate_int( struct ureg_program *ureg, 734 const int *v, 735 unsigned nr ) 736{ 737 return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); 738} 739 740 741void 742ureg_emit_src( struct ureg_program *ureg, 743 struct ureg_src src ) 744{ 745 unsigned size = 1 + (src.Indirect ? 1 : 0) + (src.Dimension ? 1 : 0); 746 747 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); 748 unsigned n = 0; 749 750 assert(src.File != TGSI_FILE_NULL); 751 assert(src.File != TGSI_FILE_OUTPUT); 752 assert(src.File < TGSI_FILE_COUNT); 753 754 out[n].value = 0; 755 out[n].src.File = src.File; 756 out[n].src.SwizzleX = src.SwizzleX; 757 out[n].src.SwizzleY = src.SwizzleY; 758 out[n].src.SwizzleZ = src.SwizzleZ; 759 out[n].src.SwizzleW = src.SwizzleW; 760 out[n].src.Index = src.Index; 761 out[n].src.Negate = src.Negate; 762 out[0].src.Absolute = src.Absolute; 763 n++; 764 765 if (src.Indirect) { 766 out[0].src.Indirect = 1; 767 out[n].value = 0; 768 out[n].src.File = src.IndirectFile; 769 out[n].src.SwizzleX = src.IndirectSwizzle; 770 out[n].src.SwizzleY = src.IndirectSwizzle; 771 out[n].src.SwizzleZ = src.IndirectSwizzle; 772 out[n].src.SwizzleW = src.IndirectSwizzle; 773 out[n].src.Index = src.IndirectIndex; 774 n++; 775 } 776 777 if (src.Dimension) { 778 out[0].src.Dimension = 1; 779 out[n].dim.Indirect = 0; 780 out[n].dim.Dimension = 0; 781 out[n].dim.Padding = 0; 782 out[n].dim.Index = src.DimensionIndex; 783 n++; 784 } 785 786 assert(n == size); 787} 788 789 790void 791ureg_emit_dst( struct ureg_program *ureg, 792 struct ureg_dst dst ) 793{ 794 unsigned size = (1 + 795 (dst.Indirect ? 1 : 0)); 796 797 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); 798 unsigned n = 0; 799 800 assert(dst.File != TGSI_FILE_NULL); 801 assert(dst.File != TGSI_FILE_CONSTANT); 802 assert(dst.File != TGSI_FILE_INPUT); 803 assert(dst.File != TGSI_FILE_SAMPLER); 804 assert(dst.File != TGSI_FILE_IMMEDIATE); 805 assert(dst.File < TGSI_FILE_COUNT); 806 807 out[n].value = 0; 808 out[n].dst.File = dst.File; 809 out[n].dst.WriteMask = dst.WriteMask; 810 out[n].dst.Indirect = dst.Indirect; 811 out[n].dst.Index = dst.Index; 812 n++; 813 814 if (dst.Indirect) { 815 out[n].value = 0; 816 out[n].src.File = TGSI_FILE_ADDRESS; 817 out[n].src.SwizzleX = dst.IndirectSwizzle; 818 out[n].src.SwizzleY = dst.IndirectSwizzle; 819 out[n].src.SwizzleZ = dst.IndirectSwizzle; 820 out[n].src.SwizzleW = dst.IndirectSwizzle; 821 out[n].src.Index = dst.IndirectIndex; 822 n++; 823 } 824 825 assert(n == size); 826} 827 828 829static void validate( unsigned opcode, 830 unsigned nr_dst, 831 unsigned nr_src ) 832{ 833#ifdef DEBUG 834 const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); 835 assert(info); 836 if(info) { 837 assert(nr_dst == info->num_dst); 838 assert(nr_src == info->num_src); 839 } 840#endif 841} 842 843struct ureg_emit_insn_result 844ureg_emit_insn(struct ureg_program *ureg, 845 unsigned opcode, 846 boolean saturate, 847 boolean predicate, 848 boolean pred_negate, 849 unsigned pred_swizzle_x, 850 unsigned pred_swizzle_y, 851 unsigned pred_swizzle_z, 852 unsigned pred_swizzle_w, 853 unsigned num_dst, 854 unsigned num_src ) 855{ 856 union tgsi_any_token *out; 857 uint count = predicate ? 2 : 1; 858 struct ureg_emit_insn_result result; 859 860 validate( opcode, num_dst, num_src ); 861 862 out = get_tokens( ureg, DOMAIN_INSN, count ); 863 out[0].insn = tgsi_default_instruction(); 864 out[0].insn.Opcode = opcode; 865 out[0].insn.Saturate = saturate; 866 out[0].insn.NumDstRegs = num_dst; 867 out[0].insn.NumSrcRegs = num_src; 868 869 result.insn_token = ureg->domain[DOMAIN_INSN].count - count; 870 result.extended_token = result.insn_token; 871 872 if (predicate) { 873 out[0].insn.Predicate = 1; 874 out[1].insn_predicate = tgsi_default_instruction_predicate(); 875 out[1].insn_predicate.Negate = pred_negate; 876 out[1].insn_predicate.SwizzleX = pred_swizzle_x; 877 out[1].insn_predicate.SwizzleY = pred_swizzle_y; 878 out[1].insn_predicate.SwizzleZ = pred_swizzle_z; 879 out[1].insn_predicate.SwizzleW = pred_swizzle_w; 880 } 881 882 ureg->nr_instructions++; 883 884 return result; 885} 886 887 888void 889ureg_emit_label(struct ureg_program *ureg, 890 unsigned extended_token, 891 unsigned *label_token ) 892{ 893 union tgsi_any_token *out, *insn; 894 895 if(!label_token) 896 return; 897 898 out = get_tokens( ureg, DOMAIN_INSN, 1 ); 899 out[0].value = 0; 900 901 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); 902 insn->insn.Label = 1; 903 904 *label_token = ureg->domain[DOMAIN_INSN].count - 1; 905} 906 907/* Will return a number which can be used in a label to point to the 908 * next instruction to be emitted. 909 */ 910unsigned 911ureg_get_instruction_number( struct ureg_program *ureg ) 912{ 913 return ureg->nr_instructions; 914} 915 916/* Patch a given label (expressed as a token number) to point to a 917 * given instruction (expressed as an instruction number). 918 */ 919void 920ureg_fixup_label(struct ureg_program *ureg, 921 unsigned label_token, 922 unsigned instruction_number ) 923{ 924 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token ); 925 926 out->insn_label.Label = instruction_number; 927} 928 929 930void 931ureg_emit_texture(struct ureg_program *ureg, 932 unsigned extended_token, 933 unsigned target ) 934{ 935 union tgsi_any_token *out, *insn; 936 937 out = get_tokens( ureg, DOMAIN_INSN, 1 ); 938 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); 939 940 insn->insn.Texture = 1; 941 942 out[0].value = 0; 943 out[0].insn_texture.Texture = target; 944} 945 946 947void 948ureg_fixup_insn_size(struct ureg_program *ureg, 949 unsigned insn ) 950{ 951 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn ); 952 953 assert(out->insn.Type == TGSI_TOKEN_TYPE_INSTRUCTION); 954 out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1; 955} 956 957 958void 959ureg_insn(struct ureg_program *ureg, 960 unsigned opcode, 961 const struct ureg_dst *dst, 962 unsigned nr_dst, 963 const struct ureg_src *src, 964 unsigned nr_src ) 965{ 966 struct ureg_emit_insn_result insn; 967 unsigned i; 968 boolean saturate; 969 boolean predicate; 970 boolean negate = FALSE; 971 unsigned swizzle[4] = { 0 }; 972 973 saturate = nr_dst ? dst[0].Saturate : FALSE; 974 predicate = nr_dst ? dst[0].Predicate : FALSE; 975 if (predicate) { 976 negate = dst[0].PredNegate; 977 swizzle[0] = dst[0].PredSwizzleX; 978 swizzle[1] = dst[0].PredSwizzleY; 979 swizzle[2] = dst[0].PredSwizzleZ; 980 swizzle[3] = dst[0].PredSwizzleW; 981 } 982 983 insn = ureg_emit_insn(ureg, 984 opcode, 985 saturate, 986 predicate, 987 negate, 988 swizzle[0], 989 swizzle[1], 990 swizzle[2], 991 swizzle[3], 992 nr_dst, 993 nr_src); 994 995 for (i = 0; i < nr_dst; i++) 996 ureg_emit_dst( ureg, dst[i] ); 997 998 for (i = 0; i < nr_src; i++) 999 ureg_emit_src( ureg, src[i] ); 1000 1001 ureg_fixup_insn_size( ureg, insn.insn_token ); 1002} 1003 1004void 1005ureg_tex_insn(struct ureg_program *ureg, 1006 unsigned opcode, 1007 const struct ureg_dst *dst, 1008 unsigned nr_dst, 1009 unsigned target, 1010 const struct ureg_src *src, 1011 unsigned nr_src ) 1012{ 1013 struct ureg_emit_insn_result insn; 1014 unsigned i; 1015 boolean saturate; 1016 boolean predicate; 1017 boolean negate = FALSE; 1018 unsigned swizzle[4] = { 0 }; 1019 1020 saturate = nr_dst ? dst[0].Saturate : FALSE; 1021 predicate = nr_dst ? dst[0].Predicate : FALSE; 1022 if (predicate) { 1023 negate = dst[0].PredNegate; 1024 swizzle[0] = dst[0].PredSwizzleX; 1025 swizzle[1] = dst[0].PredSwizzleY; 1026 swizzle[2] = dst[0].PredSwizzleZ; 1027 swizzle[3] = dst[0].PredSwizzleW; 1028 } 1029 1030 insn = ureg_emit_insn(ureg, 1031 opcode, 1032 saturate, 1033 predicate, 1034 negate, 1035 swizzle[0], 1036 swizzle[1], 1037 swizzle[2], 1038 swizzle[3], 1039 nr_dst, 1040 nr_src); 1041 1042 ureg_emit_texture( ureg, insn.extended_token, target ); 1043 1044 for (i = 0; i < nr_dst; i++) 1045 ureg_emit_dst( ureg, dst[i] ); 1046 1047 for (i = 0; i < nr_src; i++) 1048 ureg_emit_src( ureg, src[i] ); 1049 1050 ureg_fixup_insn_size( ureg, insn.insn_token ); 1051} 1052 1053 1054void 1055ureg_label_insn(struct ureg_program *ureg, 1056 unsigned opcode, 1057 const struct ureg_src *src, 1058 unsigned nr_src, 1059 unsigned *label_token ) 1060{ 1061 struct ureg_emit_insn_result insn; 1062 unsigned i; 1063 1064 insn = ureg_emit_insn(ureg, 1065 opcode, 1066 FALSE, 1067 FALSE, 1068 FALSE, 1069 TGSI_SWIZZLE_X, 1070 TGSI_SWIZZLE_Y, 1071 TGSI_SWIZZLE_Z, 1072 TGSI_SWIZZLE_W, 1073 0, 1074 nr_src); 1075 1076 ureg_emit_label( ureg, insn.extended_token, label_token ); 1077 1078 for (i = 0; i < nr_src; i++) 1079 ureg_emit_src( ureg, src[i] ); 1080 1081 ureg_fixup_insn_size( ureg, insn.insn_token ); 1082} 1083 1084 1085 1086static void emit_decl( struct ureg_program *ureg, 1087 unsigned file, 1088 unsigned index, 1089 unsigned semantic_name, 1090 unsigned semantic_index, 1091 unsigned interp ) 1092{ 1093 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 ); 1094 1095 out[0].value = 0; 1096 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1097 out[0].decl.NrTokens = 3; 1098 out[0].decl.File = file; 1099 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */ 1100 out[0].decl.Interpolate = interp; 1101 out[0].decl.Semantic = 1; 1102 1103 out[1].value = 0; 1104 out[1].decl_range.First = 1105 out[1].decl_range.Last = index; 1106 1107 out[2].value = 0; 1108 out[2].decl_semantic.Name = semantic_name; 1109 out[2].decl_semantic.Index = semantic_index; 1110 1111} 1112 1113 1114static void emit_decl_range( struct ureg_program *ureg, 1115 unsigned file, 1116 unsigned first, 1117 unsigned count ) 1118{ 1119 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 ); 1120 1121 out[0].value = 0; 1122 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1123 out[0].decl.NrTokens = 2; 1124 out[0].decl.File = file; 1125 out[0].decl.UsageMask = 0xf; 1126 out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; 1127 out[0].decl.Semantic = 0; 1128 1129 out[1].value = 0; 1130 out[1].decl_range.First = first; 1131 out[1].decl_range.Last = first + count - 1; 1132} 1133 1134static void 1135emit_decl_range2D(struct ureg_program *ureg, 1136 unsigned file, 1137 unsigned first, 1138 unsigned last, 1139 unsigned index2D) 1140{ 1141 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); 1142 1143 out[0].value = 0; 1144 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1145 out[0].decl.NrTokens = 3; 1146 out[0].decl.File = file; 1147 out[0].decl.UsageMask = 0xf; 1148 out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; 1149 out[0].decl.Dimension = 1; 1150 1151 out[1].value = 0; 1152 out[1].decl_range.First = first; 1153 out[1].decl_range.Last = last; 1154 1155 out[2].value = 0; 1156 out[2].decl_dim.Index2D = index2D; 1157} 1158 1159static void 1160emit_immediate( struct ureg_program *ureg, 1161 const unsigned *v, 1162 unsigned type ) 1163{ 1164 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 ); 1165 1166 out[0].value = 0; 1167 out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE; 1168 out[0].imm.NrTokens = 5; 1169 out[0].imm.DataType = type; 1170 out[0].imm.Padding = 0; 1171 1172 out[1].imm_data.Uint = v[0]; 1173 out[2].imm_data.Uint = v[1]; 1174 out[3].imm_data.Uint = v[2]; 1175 out[4].imm_data.Uint = v[3]; 1176} 1177 1178static void 1179emit_property(struct ureg_program *ureg, 1180 unsigned name, 1181 unsigned data) 1182{ 1183 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); 1184 1185 out[0].value = 0; 1186 out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY; 1187 out[0].prop.NrTokens = 2; 1188 out[0].prop.PropertyName = name; 1189 1190 out[1].prop_data.Data = data; 1191} 1192 1193 1194static void emit_decls( struct ureg_program *ureg ) 1195{ 1196 unsigned i; 1197 1198 if (ureg->property_gs_input_prim != ~0) { 1199 assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); 1200 1201 emit_property(ureg, 1202 TGSI_PROPERTY_GS_INPUT_PRIM, 1203 ureg->property_gs_input_prim); 1204 } 1205 1206 if (ureg->property_gs_output_prim != ~0) { 1207 assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); 1208 1209 emit_property(ureg, 1210 TGSI_PROPERTY_GS_OUTPUT_PRIM, 1211 ureg->property_gs_output_prim); 1212 } 1213 1214 if (ureg->property_gs_max_vertices != ~0) { 1215 assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); 1216 1217 emit_property(ureg, 1218 TGSI_PROPERTY_GS_MAX_VERTICES, 1219 ureg->property_gs_max_vertices); 1220 } 1221 1222 if (ureg->property_fs_coord_origin) { 1223 assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); 1224 1225 emit_property(ureg, 1226 TGSI_PROPERTY_FS_COORD_ORIGIN, 1227 ureg->property_fs_coord_origin); 1228 } 1229 1230 if (ureg->property_fs_coord_pixel_center) { 1231 assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); 1232 1233 emit_property(ureg, 1234 TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, 1235 ureg->property_fs_coord_pixel_center); 1236 } 1237 1238 if (ureg->processor == TGSI_PROCESSOR_VERTEX) { 1239 for (i = 0; i < UREG_MAX_INPUT; i++) { 1240 if (ureg->vs_inputs[i/32] & (1 << (i%32))) { 1241 emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 ); 1242 } 1243 } 1244 } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) { 1245 for (i = 0; i < ureg->nr_fs_inputs; i++) { 1246 emit_decl( ureg, 1247 TGSI_FILE_INPUT, 1248 i, 1249 ureg->fs_input[i].semantic_name, 1250 ureg->fs_input[i].semantic_index, 1251 ureg->fs_input[i].interp ); 1252 } 1253 } else { 1254 for (i = 0; i < ureg->nr_gs_inputs; i++) { 1255 emit_decl_range(ureg, 1256 TGSI_FILE_INPUT, 1257 ureg->gs_input[i].index, 1258 1); 1259 } 1260 } 1261 1262 for (i = 0; i < ureg->nr_system_values; i++) { 1263 emit_decl(ureg, 1264 TGSI_FILE_SYSTEM_VALUE, 1265 ureg->system_value[i].index, 1266 ureg->system_value[i].semantic_name, 1267 ureg->system_value[i].semantic_index, 1268 TGSI_INTERPOLATE_CONSTANT); 1269 } 1270 1271 for (i = 0; i < ureg->nr_outputs; i++) { 1272 emit_decl( ureg, 1273 TGSI_FILE_OUTPUT, 1274 i, 1275 ureg->output[i].semantic_name, 1276 ureg->output[i].semantic_index, 1277 TGSI_INTERPOLATE_CONSTANT ); 1278 } 1279 1280 for (i = 0; i < ureg->nr_samplers; i++) { 1281 emit_decl_range( ureg, 1282 TGSI_FILE_SAMPLER, 1283 ureg->sampler[i].Index, 1 ); 1284 } 1285 1286 if (ureg->const_decls.nr_constant_ranges) { 1287 for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) { 1288 emit_decl_range(ureg, 1289 TGSI_FILE_CONSTANT, 1290 ureg->const_decls.constant_range[i].first, 1291 ureg->const_decls.constant_range[i].last - ureg->const_decls.constant_range[i].first + 1); 1292 } 1293 } 1294 1295 for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { 1296 struct const_decl *decl = &ureg->const_decls2D[i]; 1297 1298 if (decl->nr_constant_ranges) { 1299 uint j; 1300 1301 for (j = 0; j < decl->nr_constant_ranges; j++) { 1302 emit_decl_range2D(ureg, 1303 TGSI_FILE_CONSTANT, 1304 decl->constant_range[j].first, 1305 decl->constant_range[j].last, 1306 i); 1307 } 1308 } 1309 } 1310 1311 if (ureg->nr_temps) { 1312 emit_decl_range( ureg, 1313 TGSI_FILE_TEMPORARY, 1314 0, ureg->nr_temps ); 1315 } 1316 1317 if (ureg->nr_addrs) { 1318 emit_decl_range( ureg, 1319 TGSI_FILE_ADDRESS, 1320 0, ureg->nr_addrs ); 1321 } 1322 1323 if (ureg->nr_loops) { 1324 emit_decl_range(ureg, 1325 TGSI_FILE_LOOP, 1326 0, 1327 ureg->nr_loops); 1328 } 1329 1330 if (ureg->nr_preds) { 1331 emit_decl_range(ureg, 1332 TGSI_FILE_PREDICATE, 1333 0, 1334 ureg->nr_preds); 1335 } 1336 1337 for (i = 0; i < ureg->nr_immediates; i++) { 1338 emit_immediate( ureg, 1339 ureg->immediate[i].value.u, 1340 ureg->immediate[i].type ); 1341 } 1342} 1343 1344/* Append the instruction tokens onto the declarations to build a 1345 * contiguous stream suitable to send to the driver. 1346 */ 1347static void copy_instructions( struct ureg_program *ureg ) 1348{ 1349 unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count; 1350 union tgsi_any_token *out = get_tokens( ureg, 1351 DOMAIN_DECL, 1352 nr_tokens ); 1353 1354 memcpy(out, 1355 ureg->domain[DOMAIN_INSN].tokens, 1356 nr_tokens * sizeof out[0] ); 1357} 1358 1359 1360static void 1361fixup_header_size(struct ureg_program *ureg) 1362{ 1363 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 ); 1364 1365 out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2; 1366} 1367 1368 1369static void 1370emit_header( struct ureg_program *ureg ) 1371{ 1372 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 ); 1373 1374 out[0].header.HeaderSize = 2; 1375 out[0].header.BodySize = 0; 1376 1377 out[1].processor.Processor = ureg->processor; 1378 out[1].processor.Padding = 0; 1379} 1380 1381 1382const struct tgsi_token *ureg_finalize( struct ureg_program *ureg ) 1383{ 1384 const struct tgsi_token *tokens; 1385 1386 emit_header( ureg ); 1387 emit_decls( ureg ); 1388 copy_instructions( ureg ); 1389 fixup_header_size( ureg ); 1390 1391 if (ureg->domain[0].tokens == error_tokens || 1392 ureg->domain[1].tokens == error_tokens) { 1393 debug_printf("%s: error in generated shader\n", __FUNCTION__); 1394 assert(0); 1395 return NULL; 1396 } 1397 1398 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; 1399 1400 if (0) { 1401 debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__, 1402 ureg->domain[DOMAIN_DECL].count); 1403 tgsi_dump( tokens, 0 ); 1404 } 1405 1406#if DEBUG 1407 if (tokens && !tgsi_sanity_check(tokens)) { 1408 debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n"); 1409 tgsi_dump(tokens, 0); 1410 assert(0); 1411 } 1412#endif 1413 1414 1415 return tokens; 1416} 1417 1418 1419void *ureg_create_shader( struct ureg_program *ureg, 1420 struct pipe_context *pipe ) 1421{ 1422 struct pipe_shader_state state; 1423 1424 state.tokens = ureg_finalize(ureg); 1425 if(!state.tokens) 1426 return NULL; 1427 1428 if (ureg->processor == TGSI_PROCESSOR_VERTEX) 1429 return pipe->create_vs_state( pipe, &state ); 1430 else 1431 return pipe->create_fs_state( pipe, &state ); 1432} 1433 1434 1435const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg, 1436 unsigned *nr_tokens ) 1437{ 1438 const struct tgsi_token *tokens; 1439 1440 ureg_finalize(ureg); 1441 1442 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; 1443 1444 if (nr_tokens) 1445 *nr_tokens = ureg->domain[DOMAIN_DECL].size; 1446 1447 ureg->domain[DOMAIN_DECL].tokens = 0; 1448 ureg->domain[DOMAIN_DECL].size = 0; 1449 ureg->domain[DOMAIN_DECL].order = 0; 1450 ureg->domain[DOMAIN_DECL].count = 0; 1451 1452 return tokens; 1453} 1454 1455 1456struct ureg_program *ureg_create( unsigned processor ) 1457{ 1458 struct ureg_program *ureg = CALLOC_STRUCT( ureg_program ); 1459 if (ureg == NULL) 1460 return NULL; 1461 1462 ureg->processor = processor; 1463 ureg->property_gs_input_prim = ~0; 1464 ureg->property_gs_output_prim = ~0; 1465 ureg->property_gs_max_vertices = ~0; 1466 return ureg; 1467} 1468 1469 1470void ureg_destroy( struct ureg_program *ureg ) 1471{ 1472 unsigned i; 1473 1474 for (i = 0; i < Elements(ureg->domain); i++) { 1475 if (ureg->domain[i].tokens && 1476 ureg->domain[i].tokens != error_tokens) 1477 FREE(ureg->domain[i].tokens); 1478 } 1479 1480 FREE(ureg); 1481} 1482