tgsi_ureg.c revision 7c5f255201f42303188137f56ea8acc030444f0e
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include "pipe/p_context.h" 30#include "pipe/p_state.h" 31#include "tgsi/tgsi_ureg.h" 32#include "tgsi/tgsi_build.h" 33#include "tgsi/tgsi_info.h" 34#include "tgsi/tgsi_dump.h" 35#include "tgsi/tgsi_sanity.h" 36#include "util/u_memory.h" 37#include "util/u_math.h" 38 39union tgsi_any_token { 40 struct tgsi_header header; 41 struct tgsi_processor processor; 42 struct tgsi_token token; 43 struct tgsi_property prop; 44 struct tgsi_property_data prop_data; 45 struct tgsi_declaration decl; 46 struct tgsi_declaration_range decl_range; 47 struct tgsi_declaration_dimension decl_dim; 48 struct tgsi_declaration_semantic decl_semantic; 49 struct tgsi_immediate imm; 50 union tgsi_immediate_data imm_data; 51 struct tgsi_instruction insn; 52 struct tgsi_instruction_predicate insn_predicate; 53 struct tgsi_instruction_label insn_label; 54 struct tgsi_instruction_texture insn_texture; 55 struct tgsi_src_register src; 56 struct tgsi_dimension dim; 57 struct tgsi_dst_register dst; 58 unsigned value; 59}; 60 61 62struct ureg_tokens { 63 union tgsi_any_token *tokens; 64 unsigned size; 65 unsigned order; 66 unsigned count; 67}; 68 69#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS 70#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS 71#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS 72#define UREG_MAX_CONSTANT_RANGE 32 73#define UREG_MAX_IMMEDIATE 32 74#define UREG_MAX_TEMP 256 75#define UREG_MAX_ADDR 2 76#define UREG_MAX_LOOP 1 77#define UREG_MAX_PRED 1 78 79struct const_decl { 80 struct { 81 unsigned first; 82 unsigned last; 83 } constant_range[UREG_MAX_CONSTANT_RANGE]; 84 unsigned nr_constant_ranges; 85}; 86 87#define DOMAIN_DECL 0 88#define DOMAIN_INSN 1 89 90struct ureg_program 91{ 92 unsigned processor; 93 struct pipe_context *pipe; 94 95 struct { 96 unsigned semantic_name; 97 unsigned semantic_index; 98 unsigned interp; 99 } fs_input[UREG_MAX_INPUT]; 100 unsigned nr_fs_inputs; 101 102 unsigned vs_inputs[UREG_MAX_INPUT/32]; 103 104 struct { 105 unsigned index; 106 } gs_input[UREG_MAX_INPUT]; 107 unsigned nr_gs_inputs; 108 109 struct { 110 unsigned index; 111 unsigned semantic_name; 112 unsigned semantic_index; 113 } system_value[UREG_MAX_SYSTEM_VALUE]; 114 unsigned nr_system_values; 115 116 struct { 117 unsigned semantic_name; 118 unsigned semantic_index; 119 } output[UREG_MAX_OUTPUT]; 120 unsigned nr_outputs; 121 122 struct { 123 union { 124 float f[4]; 125 unsigned u[4]; 126 int i[4]; 127 } value; 128 unsigned nr; 129 unsigned type; 130 } immediate[UREG_MAX_IMMEDIATE]; 131 unsigned nr_immediates; 132 133 struct ureg_src sampler[PIPE_MAX_SAMPLERS]; 134 unsigned nr_samplers; 135 136 unsigned temps_active[UREG_MAX_TEMP / 32]; 137 unsigned nr_temps; 138 139 struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS]; 140 141 unsigned property_gs_input_prim; 142 143 unsigned nr_addrs; 144 unsigned nr_preds; 145 unsigned nr_loops; 146 unsigned nr_instructions; 147 148 struct ureg_tokens domain[2]; 149}; 150 151static union tgsi_any_token error_tokens[32]; 152 153static void tokens_error( struct ureg_tokens *tokens ) 154{ 155 if (tokens->tokens && tokens->tokens != error_tokens) 156 FREE(tokens->tokens); 157 158 tokens->tokens = error_tokens; 159 tokens->size = Elements(error_tokens); 160 tokens->count = 0; 161} 162 163 164static void tokens_expand( struct ureg_tokens *tokens, 165 unsigned count ) 166{ 167 unsigned old_size = tokens->size * sizeof(unsigned); 168 169 if (tokens->tokens == error_tokens) { 170 return; 171 } 172 173 while (tokens->count + count > tokens->size) { 174 tokens->size = (1 << ++tokens->order); 175 } 176 177 tokens->tokens = REALLOC(tokens->tokens, 178 old_size, 179 tokens->size * sizeof(unsigned)); 180 if (tokens->tokens == NULL) { 181 tokens_error(tokens); 182 } 183} 184 185static void set_bad( struct ureg_program *ureg ) 186{ 187 tokens_error(&ureg->domain[0]); 188} 189 190 191 192static union tgsi_any_token *get_tokens( struct ureg_program *ureg, 193 unsigned domain, 194 unsigned count ) 195{ 196 struct ureg_tokens *tokens = &ureg->domain[domain]; 197 union tgsi_any_token *result; 198 199 if (tokens->count + count > tokens->size) 200 tokens_expand(tokens, count); 201 202 result = &tokens->tokens[tokens->count]; 203 tokens->count += count; 204 return result; 205} 206 207 208static union tgsi_any_token *retrieve_token( struct ureg_program *ureg, 209 unsigned domain, 210 unsigned nr ) 211{ 212 if (ureg->domain[domain].tokens == error_tokens) 213 return &error_tokens[0]; 214 215 return &ureg->domain[domain].tokens[nr]; 216} 217 218 219 220static INLINE struct ureg_dst 221ureg_dst_register( unsigned file, 222 unsigned index ) 223{ 224 struct ureg_dst dst; 225 226 dst.File = file; 227 dst.WriteMask = TGSI_WRITEMASK_XYZW; 228 dst.Indirect = 0; 229 dst.IndirectIndex = 0; 230 dst.IndirectSwizzle = 0; 231 dst.Saturate = 0; 232 dst.Predicate = 0; 233 dst.PredNegate = 0; 234 dst.PredSwizzleX = TGSI_SWIZZLE_X; 235 dst.PredSwizzleY = TGSI_SWIZZLE_Y; 236 dst.PredSwizzleZ = TGSI_SWIZZLE_Z; 237 dst.PredSwizzleW = TGSI_SWIZZLE_W; 238 dst.Index = index; 239 240 return dst; 241} 242 243 244void 245ureg_property_gs_input_prim(struct ureg_program *ureg, 246 unsigned gs_input_prim) 247{ 248 ureg->property_gs_input_prim = gs_input_prim; 249} 250 251 252 253struct ureg_src 254ureg_DECL_fs_input( struct ureg_program *ureg, 255 unsigned name, 256 unsigned index, 257 unsigned interp_mode ) 258{ 259 unsigned i; 260 261 for (i = 0; i < ureg->nr_fs_inputs; i++) { 262 if (ureg->fs_input[i].semantic_name == name && 263 ureg->fs_input[i].semantic_index == index) 264 goto out; 265 } 266 267 if (ureg->nr_fs_inputs < UREG_MAX_INPUT) { 268 ureg->fs_input[i].semantic_name = name; 269 ureg->fs_input[i].semantic_index = index; 270 ureg->fs_input[i].interp = interp_mode; 271 ureg->nr_fs_inputs++; 272 } 273 else { 274 set_bad( ureg ); 275 } 276 277out: 278 return ureg_src_register( TGSI_FILE_INPUT, i ); 279} 280 281 282struct ureg_src 283ureg_DECL_vs_input( struct ureg_program *ureg, 284 unsigned index ) 285{ 286 assert(ureg->processor == TGSI_PROCESSOR_VERTEX); 287 288 ureg->vs_inputs[index/32] |= 1 << (index % 32); 289 return ureg_src_register( TGSI_FILE_INPUT, index ); 290} 291 292 293struct ureg_src 294ureg_DECL_gs_input(struct ureg_program *ureg, 295 unsigned index) 296{ 297 if (ureg->nr_gs_inputs < UREG_MAX_INPUT) { 298 ureg->gs_input[ureg->nr_gs_inputs].index = index; 299 ureg->nr_gs_inputs++; 300 } else { 301 set_bad(ureg); 302 } 303 304 /* XXX: Add suport for true 2D input registers. */ 305 return ureg_src_register(TGSI_FILE_INPUT, index); 306} 307 308 309struct ureg_src 310ureg_DECL_system_value(struct ureg_program *ureg, 311 unsigned index, 312 unsigned semantic_name, 313 unsigned semantic_index) 314{ 315 if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) { 316 ureg->system_value[ureg->nr_system_values].index = index; 317 ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name; 318 ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index; 319 ureg->nr_system_values++; 320 } else { 321 set_bad(ureg); 322 } 323 324 return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index); 325} 326 327 328struct ureg_dst 329ureg_DECL_output( struct ureg_program *ureg, 330 unsigned name, 331 unsigned index ) 332{ 333 unsigned i; 334 335 for (i = 0; i < ureg->nr_outputs; i++) { 336 if (ureg->output[i].semantic_name == name && 337 ureg->output[i].semantic_index == index) 338 goto out; 339 } 340 341 if (ureg->nr_outputs < UREG_MAX_OUTPUT) { 342 ureg->output[i].semantic_name = name; 343 ureg->output[i].semantic_index = index; 344 ureg->nr_outputs++; 345 } 346 else { 347 set_bad( ureg ); 348 } 349 350out: 351 return ureg_dst_register( TGSI_FILE_OUTPUT, i ); 352} 353 354 355/* Returns a new constant register. Keep track of which have been 356 * referred to so that we can emit decls later. 357 * 358 * There is nothing in this code to bind this constant to any tracked 359 * value or manage any constant_buffer contents -- that's the 360 * resposibility of the calling code. 361 */ 362void 363ureg_DECL_constant2D(struct ureg_program *ureg, 364 unsigned first, 365 unsigned last, 366 unsigned index2D) 367{ 368 struct const_decl *decl = &ureg->const_decls[index2D]; 369 370 assert(index2D < PIPE_MAX_CONSTANT_BUFFERS); 371 372 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { 373 uint i = decl->nr_constant_ranges++; 374 375 decl->constant_range[i].first = first; 376 decl->constant_range[i].last = last; 377 } 378} 379 380 381struct ureg_src 382ureg_DECL_constant(struct ureg_program *ureg, 383 unsigned index) 384{ 385 struct const_decl *decl = &ureg->const_decls[0]; 386 unsigned minconst = index, maxconst = index; 387 unsigned i; 388 389 /* Inside existing range? 390 */ 391 for (i = 0; i < decl->nr_constant_ranges; i++) { 392 if (decl->constant_range[i].first <= index && 393 decl->constant_range[i].last >= index) { 394 goto out; 395 } 396 } 397 398 /* Extend existing range? 399 */ 400 for (i = 0; i < decl->nr_constant_ranges; i++) { 401 if (decl->constant_range[i].last == index - 1) { 402 decl->constant_range[i].last = index; 403 goto out; 404 } 405 406 if (decl->constant_range[i].first == index + 1) { 407 decl->constant_range[i].first = index; 408 goto out; 409 } 410 411 minconst = MIN2(minconst, decl->constant_range[i].first); 412 maxconst = MAX2(maxconst, decl->constant_range[i].last); 413 } 414 415 /* Create new range? 416 */ 417 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) { 418 i = decl->nr_constant_ranges++; 419 decl->constant_range[i].first = index; 420 decl->constant_range[i].last = index; 421 goto out; 422 } 423 424 /* Collapse all ranges down to one: 425 */ 426 i = 0; 427 decl->constant_range[0].first = minconst; 428 decl->constant_range[0].last = maxconst; 429 decl->nr_constant_ranges = 1; 430 431out: 432 assert(i < decl->nr_constant_ranges); 433 assert(decl->constant_range[i].first <= index); 434 assert(decl->constant_range[i].last >= index); 435 return ureg_src_register(TGSI_FILE_CONSTANT, index); 436} 437 438 439/* Allocate a new temporary. Temporaries greater than UREG_MAX_TEMP 440 * are legal, but will not be released. 441 */ 442struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg ) 443{ 444 unsigned i; 445 446 for (i = 0; i < UREG_MAX_TEMP; i += 32) { 447 int bit = ffs(~ureg->temps_active[i/32]); 448 if (bit != 0) { 449 i += bit - 1; 450 goto out; 451 } 452 } 453 454 /* No reusable temps, so allocate a new one: 455 */ 456 i = ureg->nr_temps++; 457 458out: 459 if (i < UREG_MAX_TEMP) 460 ureg->temps_active[i/32] |= 1 << (i % 32); 461 462 if (i >= ureg->nr_temps) 463 ureg->nr_temps = i + 1; 464 465 return ureg_dst_register( TGSI_FILE_TEMPORARY, i ); 466} 467 468 469void ureg_release_temporary( struct ureg_program *ureg, 470 struct ureg_dst tmp ) 471{ 472 if(tmp.File == TGSI_FILE_TEMPORARY) 473 if (tmp.Index < UREG_MAX_TEMP) 474 ureg->temps_active[tmp.Index/32] &= ~(1 << (tmp.Index % 32)); 475} 476 477 478/* Allocate a new address register. 479 */ 480struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) 481{ 482 if (ureg->nr_addrs < UREG_MAX_ADDR) 483 return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ ); 484 485 assert( 0 ); 486 return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); 487} 488 489/* Allocate a new loop register. 490 */ 491struct ureg_dst 492ureg_DECL_loop(struct ureg_program *ureg) 493{ 494 if (ureg->nr_loops < UREG_MAX_LOOP) { 495 return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++); 496 } 497 498 assert(0); 499 return ureg_dst_register(TGSI_FILE_LOOP, 0); 500} 501 502/* Allocate a new predicate register. 503 */ 504struct ureg_dst 505ureg_DECL_predicate(struct ureg_program *ureg) 506{ 507 if (ureg->nr_preds < UREG_MAX_PRED) { 508 return ureg_dst_register(TGSI_FILE_PREDICATE, ureg->nr_preds++); 509 } 510 511 assert(0); 512 return ureg_dst_register(TGSI_FILE_PREDICATE, 0); 513} 514 515/* Allocate a new sampler. 516 */ 517struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, 518 unsigned nr ) 519{ 520 unsigned i; 521 522 for (i = 0; i < ureg->nr_samplers; i++) 523 if (ureg->sampler[i].Index == nr) 524 return ureg->sampler[i]; 525 526 if (i < PIPE_MAX_SAMPLERS) { 527 ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr ); 528 ureg->nr_samplers++; 529 return ureg->sampler[i]; 530 } 531 532 assert( 0 ); 533 return ureg->sampler[0]; 534} 535 536 537static int 538match_or_expand_immediate( const unsigned *v, 539 unsigned nr, 540 unsigned *v2, 541 unsigned *pnr2, 542 unsigned *swizzle ) 543{ 544 unsigned nr2 = *pnr2; 545 unsigned i, j; 546 547 *swizzle = 0; 548 549 for (i = 0; i < nr; i++) { 550 boolean found = FALSE; 551 552 for (j = 0; j < nr2 && !found; j++) { 553 if (v[i] == v2[j]) { 554 *swizzle |= j << (i * 2); 555 found = TRUE; 556 } 557 } 558 559 if (!found) { 560 if (nr2 >= 4) { 561 return FALSE; 562 } 563 564 v2[nr2] = v[i]; 565 *swizzle |= nr2 << (i * 2); 566 nr2++; 567 } 568 } 569 570 /* Actually expand immediate only when fully succeeded. 571 */ 572 *pnr2 = nr2; 573 return TRUE; 574} 575 576 577static struct ureg_src 578decl_immediate( struct ureg_program *ureg, 579 const unsigned *v, 580 unsigned nr, 581 unsigned type ) 582{ 583 unsigned i, j; 584 unsigned swizzle = 0; 585 586 /* Could do a first pass where we examine all existing immediates 587 * without expanding. 588 */ 589 590 for (i = 0; i < ureg->nr_immediates; i++) { 591 if (ureg->immediate[i].type != type) { 592 continue; 593 } 594 if (match_or_expand_immediate(v, 595 nr, 596 ureg->immediate[i].value.u, 597 &ureg->immediate[i].nr, 598 &swizzle)) { 599 goto out; 600 } 601 } 602 603 if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) { 604 i = ureg->nr_immediates++; 605 ureg->immediate[i].type = type; 606 if (match_or_expand_immediate(v, 607 nr, 608 ureg->immediate[i].value.u, 609 &ureg->immediate[i].nr, 610 &swizzle)) { 611 goto out; 612 } 613 } 614 615 set_bad(ureg); 616 617out: 618 /* Make sure that all referenced elements are from this immediate. 619 * Has the effect of making size-one immediates into scalars. 620 */ 621 for (j = nr; j < 4; j++) { 622 swizzle |= (swizzle & 0x3) << (j * 2); 623 } 624 625 return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i), 626 (swizzle >> 0) & 0x3, 627 (swizzle >> 2) & 0x3, 628 (swizzle >> 4) & 0x3, 629 (swizzle >> 6) & 0x3); 630} 631 632 633struct ureg_src 634ureg_DECL_immediate( struct ureg_program *ureg, 635 const float *v, 636 unsigned nr ) 637{ 638 union { 639 float f[4]; 640 unsigned u[4]; 641 } fu; 642 unsigned int i; 643 644 for (i = 0; i < nr; i++) { 645 fu.f[i] = v[i]; 646 } 647 648 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32); 649} 650 651 652struct ureg_src 653ureg_DECL_immediate_uint( struct ureg_program *ureg, 654 const unsigned *v, 655 unsigned nr ) 656{ 657 return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32); 658} 659 660 661struct ureg_src 662ureg_DECL_immediate_block_uint( struct ureg_program *ureg, 663 const unsigned *v, 664 unsigned nr ) 665{ 666 uint index; 667 uint i; 668 669 if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) { 670 set_bad(ureg); 671 return ureg_src_register(TGSI_FILE_IMMEDIATE, 0); 672 } 673 674 index = ureg->nr_immediates; 675 ureg->nr_immediates += (nr + 3) / 4; 676 677 for (i = index; i < ureg->nr_immediates; i++) { 678 ureg->immediate[i].type = TGSI_IMM_UINT32; 679 ureg->immediate[i].nr = nr > 4 ? 4 : nr; 680 memcpy(ureg->immediate[i].value.u, 681 &v[(i - index) * 4], 682 ureg->immediate[i].nr * sizeof(uint)); 683 nr -= 4; 684 } 685 686 return ureg_src_register(TGSI_FILE_IMMEDIATE, index); 687} 688 689 690struct ureg_src 691ureg_DECL_immediate_int( struct ureg_program *ureg, 692 const int *v, 693 unsigned nr ) 694{ 695 return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); 696} 697 698 699void 700ureg_emit_src( struct ureg_program *ureg, 701 struct ureg_src src ) 702{ 703 unsigned size = 1 + (src.Indirect ? 1 : 0) + (src.Dimension ? 1 : 0); 704 705 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); 706 unsigned n = 0; 707 708 assert(src.File != TGSI_FILE_NULL); 709 assert(src.File != TGSI_FILE_OUTPUT); 710 assert(src.File < TGSI_FILE_COUNT); 711 712 out[n].value = 0; 713 out[n].src.File = src.File; 714 out[n].src.SwizzleX = src.SwizzleX; 715 out[n].src.SwizzleY = src.SwizzleY; 716 out[n].src.SwizzleZ = src.SwizzleZ; 717 out[n].src.SwizzleW = src.SwizzleW; 718 out[n].src.Index = src.Index; 719 out[n].src.Negate = src.Negate; 720 out[0].src.Absolute = src.Absolute; 721 n++; 722 723 if (src.Indirect) { 724 out[0].src.Indirect = 1; 725 out[n].value = 0; 726 out[n].src.File = src.IndirectFile; 727 out[n].src.SwizzleX = src.IndirectSwizzle; 728 out[n].src.SwizzleY = src.IndirectSwizzle; 729 out[n].src.SwizzleZ = src.IndirectSwizzle; 730 out[n].src.SwizzleW = src.IndirectSwizzle; 731 out[n].src.Index = src.IndirectIndex; 732 n++; 733 } 734 735 if (src.Dimension) { 736 out[0].src.Dimension = 1; 737 out[n].dim.Indirect = 0; 738 out[n].dim.Dimension = 0; 739 out[n].dim.Padding = 0; 740 out[n].dim.Index = src.DimensionIndex; 741 n++; 742 } 743 744 assert(n == size); 745} 746 747 748void 749ureg_emit_dst( struct ureg_program *ureg, 750 struct ureg_dst dst ) 751{ 752 unsigned size = (1 + 753 (dst.Indirect ? 1 : 0)); 754 755 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); 756 unsigned n = 0; 757 758 assert(dst.File != TGSI_FILE_NULL); 759 assert(dst.File != TGSI_FILE_CONSTANT); 760 assert(dst.File != TGSI_FILE_INPUT); 761 assert(dst.File != TGSI_FILE_SAMPLER); 762 assert(dst.File != TGSI_FILE_IMMEDIATE); 763 assert(dst.File < TGSI_FILE_COUNT); 764 765 out[n].value = 0; 766 out[n].dst.File = dst.File; 767 out[n].dst.WriteMask = dst.WriteMask; 768 out[n].dst.Indirect = dst.Indirect; 769 out[n].dst.Index = dst.Index; 770 n++; 771 772 if (dst.Indirect) { 773 out[n].value = 0; 774 out[n].src.File = TGSI_FILE_ADDRESS; 775 out[n].src.SwizzleX = dst.IndirectSwizzle; 776 out[n].src.SwizzleY = dst.IndirectSwizzle; 777 out[n].src.SwizzleZ = dst.IndirectSwizzle; 778 out[n].src.SwizzleW = dst.IndirectSwizzle; 779 out[n].src.Index = dst.IndirectIndex; 780 n++; 781 } 782 783 assert(n == size); 784} 785 786 787static void validate( unsigned opcode, 788 unsigned nr_dst, 789 unsigned nr_src ) 790{ 791#ifdef DEBUG 792 const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode ); 793 assert(info); 794 if(info) { 795 assert(nr_dst == info->num_dst); 796 assert(nr_src == info->num_src); 797 } 798#endif 799} 800 801struct ureg_emit_insn_result 802ureg_emit_insn(struct ureg_program *ureg, 803 unsigned opcode, 804 boolean saturate, 805 boolean predicate, 806 boolean pred_negate, 807 unsigned pred_swizzle_x, 808 unsigned pred_swizzle_y, 809 unsigned pred_swizzle_z, 810 unsigned pred_swizzle_w, 811 unsigned num_dst, 812 unsigned num_src ) 813{ 814 union tgsi_any_token *out; 815 uint count = predicate ? 2 : 1; 816 struct ureg_emit_insn_result result; 817 818 validate( opcode, num_dst, num_src ); 819 820 out = get_tokens( ureg, DOMAIN_INSN, count ); 821 out[0].insn = tgsi_default_instruction(); 822 out[0].insn.Opcode = opcode; 823 out[0].insn.Saturate = saturate; 824 out[0].insn.NumDstRegs = num_dst; 825 out[0].insn.NumSrcRegs = num_src; 826 827 result.insn_token = ureg->domain[DOMAIN_INSN].count - count; 828 result.extended_token = result.insn_token; 829 830 if (predicate) { 831 out[0].insn.Predicate = 1; 832 out[1].insn_predicate = tgsi_default_instruction_predicate(); 833 out[1].insn_predicate.Negate = pred_negate; 834 out[1].insn_predicate.SwizzleX = pred_swizzle_x; 835 out[1].insn_predicate.SwizzleY = pred_swizzle_y; 836 out[1].insn_predicate.SwizzleZ = pred_swizzle_z; 837 out[1].insn_predicate.SwizzleW = pred_swizzle_w; 838 } 839 840 ureg->nr_instructions++; 841 842 return result; 843} 844 845 846void 847ureg_emit_label(struct ureg_program *ureg, 848 unsigned extended_token, 849 unsigned *label_token ) 850{ 851 union tgsi_any_token *out, *insn; 852 853 if(!label_token) 854 return; 855 856 out = get_tokens( ureg, DOMAIN_INSN, 1 ); 857 out[0].value = 0; 858 859 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); 860 insn->insn.Label = 1; 861 862 *label_token = ureg->domain[DOMAIN_INSN].count - 1; 863} 864 865/* Will return a number which can be used in a label to point to the 866 * next instruction to be emitted. 867 */ 868unsigned 869ureg_get_instruction_number( struct ureg_program *ureg ) 870{ 871 return ureg->nr_instructions; 872} 873 874/* Patch a given label (expressed as a token number) to point to a 875 * given instruction (expressed as an instruction number). 876 */ 877void 878ureg_fixup_label(struct ureg_program *ureg, 879 unsigned label_token, 880 unsigned instruction_number ) 881{ 882 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token ); 883 884 out->insn_label.Label = instruction_number; 885} 886 887 888void 889ureg_emit_texture(struct ureg_program *ureg, 890 unsigned extended_token, 891 unsigned target ) 892{ 893 union tgsi_any_token *out, *insn; 894 895 out = get_tokens( ureg, DOMAIN_INSN, 1 ); 896 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); 897 898 insn->insn.Texture = 1; 899 900 out[0].value = 0; 901 out[0].insn_texture.Texture = target; 902} 903 904 905void 906ureg_fixup_insn_size(struct ureg_program *ureg, 907 unsigned insn ) 908{ 909 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn ); 910 911 assert(out->insn.Type == TGSI_TOKEN_TYPE_INSTRUCTION); 912 out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1; 913} 914 915 916void 917ureg_insn(struct ureg_program *ureg, 918 unsigned opcode, 919 const struct ureg_dst *dst, 920 unsigned nr_dst, 921 const struct ureg_src *src, 922 unsigned nr_src ) 923{ 924 struct ureg_emit_insn_result insn; 925 unsigned i; 926 boolean saturate; 927 boolean predicate; 928 boolean negate = FALSE; 929 unsigned swizzle[4] = { 0 }; 930 931 saturate = nr_dst ? dst[0].Saturate : FALSE; 932 predicate = nr_dst ? dst[0].Predicate : FALSE; 933 if (predicate) { 934 negate = dst[0].PredNegate; 935 swizzle[0] = dst[0].PredSwizzleX; 936 swizzle[1] = dst[0].PredSwizzleY; 937 swizzle[2] = dst[0].PredSwizzleZ; 938 swizzle[3] = dst[0].PredSwizzleW; 939 } 940 941 insn = ureg_emit_insn(ureg, 942 opcode, 943 saturate, 944 predicate, 945 negate, 946 swizzle[0], 947 swizzle[1], 948 swizzle[2], 949 swizzle[3], 950 nr_dst, 951 nr_src); 952 953 for (i = 0; i < nr_dst; i++) 954 ureg_emit_dst( ureg, dst[i] ); 955 956 for (i = 0; i < nr_src; i++) 957 ureg_emit_src( ureg, src[i] ); 958 959 ureg_fixup_insn_size( ureg, insn.insn_token ); 960} 961 962void 963ureg_tex_insn(struct ureg_program *ureg, 964 unsigned opcode, 965 const struct ureg_dst *dst, 966 unsigned nr_dst, 967 unsigned target, 968 const struct ureg_src *src, 969 unsigned nr_src ) 970{ 971 struct ureg_emit_insn_result insn; 972 unsigned i; 973 boolean saturate; 974 boolean predicate; 975 boolean negate = FALSE; 976 unsigned swizzle[4] = { 0 }; 977 978 saturate = nr_dst ? dst[0].Saturate : FALSE; 979 predicate = nr_dst ? dst[0].Predicate : FALSE; 980 if (predicate) { 981 negate = dst[0].PredNegate; 982 swizzle[0] = dst[0].PredSwizzleX; 983 swizzle[1] = dst[0].PredSwizzleY; 984 swizzle[2] = dst[0].PredSwizzleZ; 985 swizzle[3] = dst[0].PredSwizzleW; 986 } 987 988 insn = ureg_emit_insn(ureg, 989 opcode, 990 saturate, 991 predicate, 992 negate, 993 swizzle[0], 994 swizzle[1], 995 swizzle[2], 996 swizzle[3], 997 nr_dst, 998 nr_src); 999 1000 ureg_emit_texture( ureg, insn.extended_token, target ); 1001 1002 for (i = 0; i < nr_dst; i++) 1003 ureg_emit_dst( ureg, dst[i] ); 1004 1005 for (i = 0; i < nr_src; i++) 1006 ureg_emit_src( ureg, src[i] ); 1007 1008 ureg_fixup_insn_size( ureg, insn.insn_token ); 1009} 1010 1011 1012void 1013ureg_label_insn(struct ureg_program *ureg, 1014 unsigned opcode, 1015 const struct ureg_src *src, 1016 unsigned nr_src, 1017 unsigned *label_token ) 1018{ 1019 struct ureg_emit_insn_result insn; 1020 unsigned i; 1021 1022 insn = ureg_emit_insn(ureg, 1023 opcode, 1024 FALSE, 1025 FALSE, 1026 FALSE, 1027 TGSI_SWIZZLE_X, 1028 TGSI_SWIZZLE_Y, 1029 TGSI_SWIZZLE_Z, 1030 TGSI_SWIZZLE_W, 1031 0, 1032 nr_src); 1033 1034 ureg_emit_label( ureg, insn.extended_token, label_token ); 1035 1036 for (i = 0; i < nr_src; i++) 1037 ureg_emit_src( ureg, src[i] ); 1038 1039 ureg_fixup_insn_size( ureg, insn.insn_token ); 1040} 1041 1042 1043 1044static void emit_decl( struct ureg_program *ureg, 1045 unsigned file, 1046 unsigned index, 1047 unsigned semantic_name, 1048 unsigned semantic_index, 1049 unsigned interp ) 1050{ 1051 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 ); 1052 1053 out[0].value = 0; 1054 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1055 out[0].decl.NrTokens = 3; 1056 out[0].decl.File = file; 1057 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */ 1058 out[0].decl.Interpolate = interp; 1059 out[0].decl.Semantic = 1; 1060 1061 out[1].value = 0; 1062 out[1].decl_range.First = 1063 out[1].decl_range.Last = index; 1064 1065 out[2].value = 0; 1066 out[2].decl_semantic.Name = semantic_name; 1067 out[2].decl_semantic.Index = semantic_index; 1068 1069} 1070 1071 1072static void emit_decl_range( struct ureg_program *ureg, 1073 unsigned file, 1074 unsigned first, 1075 unsigned count ) 1076{ 1077 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 ); 1078 1079 out[0].value = 0; 1080 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1081 out[0].decl.NrTokens = 2; 1082 out[0].decl.File = file; 1083 out[0].decl.UsageMask = 0xf; 1084 out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; 1085 out[0].decl.Semantic = 0; 1086 1087 out[1].value = 0; 1088 out[1].decl_range.First = first; 1089 out[1].decl_range.Last = first + count - 1; 1090} 1091 1092static void 1093emit_decl_range2D(struct ureg_program *ureg, 1094 unsigned file, 1095 unsigned first, 1096 unsigned last, 1097 unsigned index2D) 1098{ 1099 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); 1100 1101 out[0].value = 0; 1102 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 1103 out[0].decl.NrTokens = 3; 1104 out[0].decl.File = file; 1105 out[0].decl.UsageMask = 0xf; 1106 out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; 1107 out[0].decl.Dimension = 1; 1108 1109 out[1].value = 0; 1110 out[1].decl_range.First = first; 1111 out[1].decl_range.Last = last; 1112 1113 out[2].value = 0; 1114 out[2].decl_dim.Index2D = index2D; 1115} 1116 1117static void 1118emit_immediate( struct ureg_program *ureg, 1119 const unsigned *v, 1120 unsigned type ) 1121{ 1122 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 ); 1123 1124 out[0].value = 0; 1125 out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE; 1126 out[0].imm.NrTokens = 5; 1127 out[0].imm.DataType = type; 1128 out[0].imm.Padding = 0; 1129 1130 out[1].imm_data.Uint = v[0]; 1131 out[2].imm_data.Uint = v[1]; 1132 out[3].imm_data.Uint = v[2]; 1133 out[4].imm_data.Uint = v[3]; 1134} 1135 1136static void 1137emit_property(struct ureg_program *ureg, 1138 unsigned name, 1139 unsigned data) 1140{ 1141 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2); 1142 1143 out[0].value = 0; 1144 out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY; 1145 out[0].prop.NrTokens = 2; 1146 out[0].prop.PropertyName = name; 1147 1148 out[1].prop_data.Data = data; 1149} 1150 1151 1152static void emit_decls( struct ureg_program *ureg ) 1153{ 1154 unsigned i; 1155 1156 if (ureg->property_gs_input_prim != ~0) { 1157 assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); 1158 1159 emit_property(ureg, 1160 TGSI_PROPERTY_GS_INPUT_PRIM, 1161 ureg->property_gs_input_prim); 1162 } 1163 1164 if (ureg->processor == TGSI_PROCESSOR_VERTEX) { 1165 for (i = 0; i < UREG_MAX_INPUT; i++) { 1166 if (ureg->vs_inputs[i/32] & (1 << (i%32))) { 1167 emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 ); 1168 } 1169 } 1170 } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) { 1171 for (i = 0; i < ureg->nr_fs_inputs; i++) { 1172 emit_decl( ureg, 1173 TGSI_FILE_INPUT, 1174 i, 1175 ureg->fs_input[i].semantic_name, 1176 ureg->fs_input[i].semantic_index, 1177 ureg->fs_input[i].interp ); 1178 } 1179 } else { 1180 for (i = 0; i < ureg->nr_gs_inputs; i++) { 1181 emit_decl_range(ureg, 1182 TGSI_FILE_INPUT, 1183 ureg->gs_input[i].index, 1184 1); 1185 } 1186 } 1187 1188 for (i = 0; i < ureg->nr_system_values; i++) { 1189 emit_decl(ureg, 1190 TGSI_FILE_SYSTEM_VALUE, 1191 ureg->system_value[i].index, 1192 ureg->system_value[i].semantic_name, 1193 ureg->system_value[i].semantic_index, 1194 TGSI_INTERPOLATE_CONSTANT); 1195 } 1196 1197 for (i = 0; i < ureg->nr_outputs; i++) { 1198 emit_decl( ureg, 1199 TGSI_FILE_OUTPUT, 1200 i, 1201 ureg->output[i].semantic_name, 1202 ureg->output[i].semantic_index, 1203 TGSI_INTERPOLATE_CONSTANT ); 1204 } 1205 1206 for (i = 0; i < ureg->nr_samplers; i++) { 1207 emit_decl_range( ureg, 1208 TGSI_FILE_SAMPLER, 1209 ureg->sampler[i].Index, 1 ); 1210 } 1211 1212 for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { 1213 struct const_decl *decl = &ureg->const_decls[i]; 1214 1215 if (decl->nr_constant_ranges) { 1216 uint j; 1217 1218 for (j = 0; j < decl->nr_constant_ranges; j++) { 1219 emit_decl_range2D(ureg, 1220 TGSI_FILE_CONSTANT, 1221 decl->constant_range[j].first, 1222 decl->constant_range[j].last, 1223 i); 1224 } 1225 } 1226 } 1227 1228 if (ureg->nr_temps) { 1229 emit_decl_range( ureg, 1230 TGSI_FILE_TEMPORARY, 1231 0, ureg->nr_temps ); 1232 } 1233 1234 if (ureg->nr_addrs) { 1235 emit_decl_range( ureg, 1236 TGSI_FILE_ADDRESS, 1237 0, ureg->nr_addrs ); 1238 } 1239 1240 if (ureg->nr_loops) { 1241 emit_decl_range(ureg, 1242 TGSI_FILE_LOOP, 1243 0, 1244 ureg->nr_loops); 1245 } 1246 1247 if (ureg->nr_preds) { 1248 emit_decl_range(ureg, 1249 TGSI_FILE_PREDICATE, 1250 0, 1251 ureg->nr_preds); 1252 } 1253 1254 for (i = 0; i < ureg->nr_immediates; i++) { 1255 emit_immediate( ureg, 1256 ureg->immediate[i].value.u, 1257 ureg->immediate[i].type ); 1258 } 1259} 1260 1261/* Append the instruction tokens onto the declarations to build a 1262 * contiguous stream suitable to send to the driver. 1263 */ 1264static void copy_instructions( struct ureg_program *ureg ) 1265{ 1266 unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count; 1267 union tgsi_any_token *out = get_tokens( ureg, 1268 DOMAIN_DECL, 1269 nr_tokens ); 1270 1271 memcpy(out, 1272 ureg->domain[DOMAIN_INSN].tokens, 1273 nr_tokens * sizeof out[0] ); 1274} 1275 1276 1277static void 1278fixup_header_size(struct ureg_program *ureg) 1279{ 1280 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 ); 1281 1282 out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2; 1283} 1284 1285 1286static void 1287emit_header( struct ureg_program *ureg ) 1288{ 1289 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 ); 1290 1291 out[0].header.HeaderSize = 2; 1292 out[0].header.BodySize = 0; 1293 1294 out[1].processor.Processor = ureg->processor; 1295 out[1].processor.Padding = 0; 1296} 1297 1298 1299const struct tgsi_token *ureg_finalize( struct ureg_program *ureg ) 1300{ 1301 const struct tgsi_token *tokens; 1302 1303 emit_header( ureg ); 1304 emit_decls( ureg ); 1305 copy_instructions( ureg ); 1306 fixup_header_size( ureg ); 1307 1308 if (ureg->domain[0].tokens == error_tokens || 1309 ureg->domain[1].tokens == error_tokens) { 1310 debug_printf("%s: error in generated shader\n", __FUNCTION__); 1311 assert(0); 1312 return NULL; 1313 } 1314 1315 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; 1316 1317 if (0) { 1318 debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__, 1319 ureg->domain[DOMAIN_DECL].count); 1320 tgsi_dump( tokens, 0 ); 1321 } 1322 1323#if DEBUG 1324 if (tokens && !tgsi_sanity_check(tokens)) { 1325 debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n"); 1326 tgsi_dump(tokens, 0); 1327 assert(0); 1328 } 1329#endif 1330 1331 1332 return tokens; 1333} 1334 1335 1336void *ureg_create_shader( struct ureg_program *ureg, 1337 struct pipe_context *pipe ) 1338{ 1339 struct pipe_shader_state state; 1340 1341 state.tokens = ureg_finalize(ureg); 1342 if(!state.tokens) 1343 return NULL; 1344 1345 if (ureg->processor == TGSI_PROCESSOR_VERTEX) 1346 return pipe->create_vs_state( pipe, &state ); 1347 else 1348 return pipe->create_fs_state( pipe, &state ); 1349} 1350 1351 1352const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg, 1353 unsigned *nr_tokens ) 1354{ 1355 const struct tgsi_token *tokens; 1356 1357 ureg_finalize(ureg); 1358 1359 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token; 1360 1361 if (nr_tokens) 1362 *nr_tokens = ureg->domain[DOMAIN_DECL].size; 1363 1364 ureg->domain[DOMAIN_DECL].tokens = 0; 1365 ureg->domain[DOMAIN_DECL].size = 0; 1366 ureg->domain[DOMAIN_DECL].order = 0; 1367 ureg->domain[DOMAIN_DECL].count = 0; 1368 1369 return tokens; 1370} 1371 1372 1373struct ureg_program *ureg_create( unsigned processor ) 1374{ 1375 struct ureg_program *ureg = CALLOC_STRUCT( ureg_program ); 1376 if (ureg == NULL) 1377 return NULL; 1378 1379 ureg->processor = processor; 1380 ureg->property_gs_input_prim = ~0; 1381 return ureg; 1382} 1383 1384 1385void ureg_destroy( struct ureg_program *ureg ) 1386{ 1387 unsigned i; 1388 1389 for (i = 0; i < Elements(ureg->domain); i++) { 1390 if (ureg->domain[i].tokens && 1391 ureg->domain[i].tokens != error_tokens) 1392 FREE(ureg->domain[i].tokens); 1393 } 1394 1395 FREE(ureg); 1396} 1397