tgsi_ureg.c revision 749e52049dee6717023309f6446efb2c89ed720c
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include "pipe/p_context.h" 30#include "pipe/p_state.h" 31#include "tgsi/tgsi_ureg.h" 32#include "tgsi/tgsi_dump.h" 33#include "util/u_memory.h" 34 35union tgsi_any_token { 36 struct tgsi_version version; 37 struct tgsi_header header; 38 struct tgsi_processor processor; 39 struct tgsi_token token; 40 struct tgsi_declaration decl; 41 struct tgsi_declaration_range decl_range; 42 struct tgsi_declaration_semantic decl_semantic; 43 struct tgsi_immediate imm; 44 union tgsi_immediate_data imm_data; 45 struct tgsi_instruction insn; 46 struct tgsi_instruction_ext_nv insn_ext_nv; 47 struct tgsi_instruction_ext_label insn_ext_label; 48 struct tgsi_instruction_ext_texture insn_ext_texture; 49 struct tgsi_instruction_ext_predicate insn_ext_predicate; 50 struct tgsi_src_register src; 51 struct tgsi_src_register_ext_swz src_ext_swz; 52 struct tgsi_src_register_ext_mod src_ext_mod; 53 struct tgsi_dimension dim; 54 struct tgsi_dst_register dst; 55 struct tgsi_dst_register_ext_concode dst_ext_code; 56 struct tgsi_dst_register_ext_modulate dst_ext_mod; 57 struct tgsi_dst_register_ext_predicate dst_ext_pred; 58 unsigned value; 59}; 60 61 62struct ureg_tokens { 63 union tgsi_any_token *tokens; 64 unsigned size; 65 unsigned order; 66 unsigned count; 67}; 68 69#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS 70#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS 71#define UREG_MAX_IMMEDIATE 32 72 73#define DOMAIN_DECL 0 74#define DOMAIN_INSN 1 75 76struct ureg_program 77{ 78 unsigned processor; 79 struct pipe_context *pipe; 80 81 struct { 82 unsigned semantic_name; 83 unsigned semantic_index; 84 unsigned interp; 85 } input[UREG_MAX_INPUT]; 86 unsigned nr_inputs; 87 88 struct { 89 unsigned semantic_name; 90 unsigned semantic_index; 91 } output[UREG_MAX_OUTPUT]; 92 unsigned nr_outputs; 93 94 struct { 95 float v[4]; 96 unsigned nr; 97 } immediate[UREG_MAX_OUTPUT]; 98 unsigned nr_immediates; 99 100 101 unsigned nr_constants; 102 unsigned nr_temps; 103 unsigned nr_samplers; 104 105 struct ureg_tokens domain[2]; 106}; 107 108static union tgsi_any_token error_tokens[32]; 109 110static void tokens_error( struct ureg_tokens *tokens ) 111{ 112 tokens->tokens = error_tokens; 113 tokens->size = Elements(error_tokens); 114 tokens->count = 0; 115} 116 117 118static void tokens_expand( struct ureg_tokens *tokens, 119 unsigned count ) 120{ 121 unsigned old_size = tokens->size * sizeof(unsigned); 122 123 if (tokens->tokens == error_tokens) 124 goto fail; 125 126 while (tokens->count + count > tokens->size) { 127 tokens->size = (1 << ++tokens->order); 128 } 129 130 tokens->tokens = REALLOC(tokens->tokens, 131 old_size, 132 tokens->size * sizeof(unsigned)); 133 if (tokens->tokens == NULL) 134 goto fail; 135 136 return; 137 138fail: 139 tokens_error(tokens); 140} 141 142static void set_bad( struct ureg_program *ureg ) 143{ 144 tokens_error(&ureg->domain[0]); 145} 146 147 148 149static union tgsi_any_token *get_tokens( struct ureg_program *ureg, 150 unsigned domain, 151 unsigned count ) 152{ 153 struct ureg_tokens *tokens = &ureg->domain[domain]; 154 union tgsi_any_token *result; 155 156 if (tokens->count + count > tokens->size) 157 tokens_expand(tokens, count); 158 159 result = &tokens->tokens[tokens->count]; 160 tokens->count += count; 161 return result; 162} 163 164 165static union tgsi_any_token *retrieve_token( struct ureg_program *ureg, 166 unsigned domain, 167 unsigned nr ) 168{ 169 if (ureg->domain[domain].tokens == error_tokens) 170 return &error_tokens[0]; 171 172 return &ureg->domain[domain].tokens[nr]; 173} 174 175 176 177static INLINE struct ureg_dst 178ureg_dst_register( unsigned file, 179 unsigned index ) 180{ 181 struct ureg_dst dst; 182 183 dst.File = file; 184 dst.WriteMask = TGSI_WRITEMASK_XYZW; 185 dst.Indirect = 0; 186 dst.Saturate = 0; 187 dst.Index = index; 188 dst.Pad1 = 0; 189 dst.Pad2 = 0; 190 191 return dst; 192} 193 194static INLINE struct ureg_src 195ureg_src_register( unsigned file, 196 unsigned index ) 197{ 198 struct ureg_src src; 199 200 src.File = file; 201 src.SwizzleX = TGSI_SWIZZLE_X; 202 src.SwizzleY = TGSI_SWIZZLE_Y; 203 src.SwizzleZ = TGSI_SWIZZLE_Z; 204 src.SwizzleW = TGSI_SWIZZLE_W; 205 src.Pad = 0; 206 src.Indirect = 0; 207 src.Absolute = 0; 208 src.Index = index; 209 src.Negate = 0; 210 211 return src; 212} 213 214 215 216 217static struct ureg_src 218ureg_DECL_input( struct ureg_program *ureg, 219 unsigned name, 220 unsigned index, 221 unsigned interp_mode ) 222{ 223 unsigned i; 224 225 for (i = 0; i < ureg->nr_inputs; i++) { 226 if (ureg->input[i].semantic_name == name && 227 ureg->input[i].semantic_index == index) 228 goto out; 229 } 230 231 if (ureg->nr_inputs < UREG_MAX_INPUT) { 232 ureg->input[i].semantic_name = name; 233 ureg->input[i].semantic_index = index; 234 ureg->input[i].interp = interp_mode; 235 ureg->nr_inputs++; 236 } 237 else { 238 set_bad( ureg ); 239 } 240 241out: 242 return ureg_src_register( TGSI_FILE_INPUT, i ); 243} 244 245 246 247struct ureg_src 248ureg_DECL_fs_input( struct ureg_program *ureg, 249 unsigned name, 250 unsigned index, 251 unsigned interp ) 252{ 253 return ureg_DECL_input( ureg, name, index, interp ); 254} 255 256 257struct ureg_src 258ureg_DECL_vs_input( struct ureg_program *ureg, 259 unsigned name, 260 unsigned index ) 261{ 262 return ureg_DECL_input( ureg, name, index, TGSI_INTERPOLATE_CONSTANT ); 263} 264 265 266struct ureg_dst 267ureg_DECL_output( struct ureg_program *ureg, 268 unsigned name, 269 unsigned index ) 270{ 271 unsigned i; 272 273 for (i = 0; i < ureg->nr_outputs; i++) { 274 if (ureg->output[i].semantic_name == name && 275 ureg->output[i].semantic_index == index) 276 goto out; 277 } 278 279 if (ureg->nr_outputs < UREG_MAX_OUTPUT) { 280 ureg->output[i].semantic_name = name; 281 ureg->output[i].semantic_index = index; 282 ureg->nr_outputs++; 283 } 284 else { 285 set_bad( ureg ); 286 } 287 288out: 289 return ureg_dst_register( TGSI_FILE_OUTPUT, i ); 290} 291 292 293/* Returns a new constant register. Keep track of which have been 294 * referred to so that we can emit decls later. 295 * 296 * There is nothing in this code to bind this constant to any tracked 297 * value or manage any constant_buffer contents -- that's the 298 * resposibility of the calling code. 299 */ 300struct ureg_src ureg_DECL_constant(struct ureg_program *ureg ) 301{ 302 return ureg_src_register( TGSI_FILE_TEMPORARY, ureg->nr_constants++ ); 303} 304 305 306/* Allocate a new temporary. No way to release temporaries in this code. 307 */ 308struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg ) 309{ 310 return ureg_dst_register( TGSI_FILE_TEMPORARY, ureg->nr_temps++ ); 311} 312 313 314/* Allocate a new sampler. 315 */ 316struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg ) 317{ 318 return ureg_src_register( TGSI_FILE_SAMPLER, ureg->nr_samplers++ ); 319} 320 321 322 323 324static int match_or_expand_immediate( const float *v, 325 unsigned nr, 326 float *v2, 327 unsigned *nr2, 328 unsigned *swizzle ) 329{ 330 unsigned i, j; 331 332 for (i = 0; i < nr; i++) { 333 boolean found = FALSE; 334 335 for (j = 0; j < *nr2 && !found; j++) { 336 if (v[i] == v2[j]) { 337 *swizzle |= j << (i * 2); 338 found = TRUE; 339 } 340 } 341 342 if (!found) { 343 if (*nr2 >= 4) 344 return FALSE; 345 346 v2[*nr2] = v[i]; 347 *swizzle |= *nr2 << (i * 2); 348 (*nr2)++; 349 } 350 } 351 352 return TRUE; 353} 354 355 356 357 358struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, 359 const float *v, 360 unsigned nr ) 361{ 362 unsigned i; 363 unsigned swizzle; 364 365 /* Could do a first pass where we examine all existing immediates 366 * without expanding. 367 */ 368 369 for (i = 0; i < ureg->nr_immediates; i++) { 370 if (match_or_expand_immediate( v, 371 nr, 372 ureg->immediate[i].v, 373 &ureg->immediate[i].nr, 374 &swizzle )) 375 goto out; 376 } 377 378 if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) { 379 i = ureg->nr_immediates++; 380 if (match_or_expand_immediate( v, 381 nr, 382 ureg->immediate[i].v, 383 &ureg->immediate[i].nr, 384 &swizzle )) 385 goto out; 386 } 387 388 set_bad( ureg ); 389 390out: 391 return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ), 392 (swizzle >> 0) & 0x3, 393 (swizzle >> 2) & 0x3, 394 (swizzle >> 4) & 0x3, 395 (swizzle >> 6) & 0x3); 396} 397 398 399void 400ureg_emit_src( struct ureg_program *ureg, 401 struct ureg_src src ) 402{ 403 unsigned size = (1 + 404 (src.Absolute ? 1 : 0) + 405 (src.Indirect ? 1 : 0)); 406 407 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); 408 unsigned n = 0; 409 410 out[n].value = 0; 411 out[n].src.File = src.File; 412 out[n].src.SwizzleX = src.SwizzleX; 413 out[n].src.SwizzleY = src.SwizzleY; 414 out[n].src.SwizzleZ = src.SwizzleZ; 415 out[n].src.SwizzleW = src.SwizzleW; 416 out[n].src.Indirect = src.Indirect; 417 out[n].src.Index = src.Index; 418 n++; 419 420 if (src.Absolute) { 421 out[n].value = 0; 422 out[n].src_ext_mod.Absolute = 1; 423 n++; 424 } 425 426 if (src.Indirect) { 427 out[n].value = 0; 428 out[n].src.File = TGSI_FILE_ADDRESS; 429 out[n].src.SwizzleX = TGSI_SWIZZLE_X; 430 out[n].src.SwizzleY = TGSI_SWIZZLE_X; 431 out[n].src.SwizzleZ = TGSI_SWIZZLE_X; 432 out[n].src.SwizzleW = TGSI_SWIZZLE_X; 433 out[n].src.Indirect = 0; 434 out[n].src.Index = 0; 435 n++; 436 } 437 438 assert(n == size); 439} 440 441 442void 443ureg_emit_dst( struct ureg_program *ureg, 444 struct ureg_dst dst ) 445{ 446 unsigned size = (1 + 447 (dst.Indirect ? 1 : 0)); 448 449 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); 450 unsigned n = 0; 451 452 out[n].value = 0; 453 out[n].dst.File = dst.File; 454 out[n].dst.WriteMask = dst.WriteMask; 455 out[n].dst.Indirect = dst.Indirect; 456 out[n].dst.Index = dst.Index; 457 n++; 458 459 if (dst.Indirect) { 460 out[n].value = 0; 461 out[n].src.File = TGSI_FILE_ADDRESS; 462 out[n].src.SwizzleX = TGSI_SWIZZLE_X; 463 out[n].src.SwizzleY = TGSI_SWIZZLE_X; 464 out[n].src.SwizzleZ = TGSI_SWIZZLE_X; 465 out[n].src.SwizzleW = TGSI_SWIZZLE_X; 466 out[n].src.Indirect = 0; 467 out[n].src.Index = 0; 468 n++; 469 } 470 471 assert(n == size); 472} 473 474 475 476unsigned 477ureg_emit_insn(struct ureg_program *ureg, 478 unsigned opcode, 479 boolean saturate, 480 unsigned num_dst, 481 unsigned num_src ) 482{ 483 union tgsi_any_token *out; 484 485 out = get_tokens( ureg, DOMAIN_INSN, 1 ); 486 out[0].value = 0; 487 out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION; 488 out[0].insn.NrTokens = 0; 489 out[0].insn.Opcode = opcode; 490 out[0].insn.Saturate = saturate; 491 out[0].insn.NrTokens = 0; 492 out[0].insn.NumDstRegs = num_dst; 493 out[0].insn.NumSrcRegs = num_src; 494 out[0].insn.Padding = 0; 495 out[0].insn.Extended = 0; 496 497 return ureg->domain[DOMAIN_INSN].count - 1; 498} 499 500 501void 502ureg_emit_label(struct ureg_program *ureg, 503 unsigned insn_token, 504 unsigned *label_token ) 505{ 506 union tgsi_any_token *out, *insn; 507 508 out = get_tokens( ureg, DOMAIN_INSN, 1 ); 509 insn = retrieve_token( ureg, DOMAIN_INSN, insn_token ); 510 511 insn->insn.Extended = 1; 512 513 out[0].value = 0; 514 out[0].insn_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; 515} 516 517 518void 519ureg_emit_texture(struct ureg_program *ureg, 520 unsigned insn_token, 521 unsigned target ) 522{ 523 union tgsi_any_token *out, *insn; 524 525 out = get_tokens( ureg, DOMAIN_INSN, 1 ); 526 insn = retrieve_token( ureg, DOMAIN_INSN, insn_token ); 527 528 insn->insn.Extended = 1; 529 530 out[0].value = 0; 531 out[0].insn_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; 532 out[0].insn_ext_texture.Texture = target; 533} 534 535 536void 537ureg_fixup_insn_size(struct ureg_program *ureg, 538 unsigned insn ) 539{ 540 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn ); 541 542 out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1; 543} 544 545 546 547 548 549static void emit_decl( struct ureg_program *ureg, 550 unsigned file, 551 unsigned index, 552 unsigned semantic_name, 553 unsigned semantic_index, 554 unsigned interp ) 555{ 556 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 ); 557 558 out[0].value = 0; 559 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 560 out[0].decl.NrTokens = 3; 561 out[0].decl.File = file; 562 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */ 563 out[0].decl.Interpolate = interp; 564 out[0].decl.Semantic = 1; 565 566 out[1].value = 0; 567 out[1].decl_range.First = 568 out[1].decl_range.Last = index; 569 570 out[2].value = 0; 571 out[2].decl_semantic.SemanticName = semantic_name; 572 out[2].decl_semantic.SemanticIndex = semantic_index; 573 574} 575 576 577static void emit_decl_range( struct ureg_program *ureg, 578 unsigned file, 579 unsigned first, 580 unsigned count ) 581{ 582 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 ); 583 584 out[0].value = 0; 585 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; 586 out[0].decl.NrTokens = 2; 587 out[0].decl.File = file; 588 out[0].decl.UsageMask = 0xf; 589 out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; 590 out[0].decl.Semantic = 0; 591 592 out[1].value = 0; 593 out[1].decl_range.First = first; 594 out[1].decl_range.Last = first + count - 1; 595} 596 597static void emit_immediate( struct ureg_program *ureg, 598 const float *v ) 599{ 600 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 ); 601 602 out[0].value = 0; 603 out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE; 604 out[0].imm.NrTokens = 5; 605 out[0].imm.DataType = TGSI_IMM_FLOAT32; 606 out[0].imm.Padding = 0; 607 out[0].imm.Extended = 0; 608 609 out[1].imm_data.Float = v[0]; 610 out[2].imm_data.Float = v[1]; 611 out[3].imm_data.Float = v[2]; 612 out[4].imm_data.Float = v[3]; 613} 614 615 616 617 618static void emit_decls( struct ureg_program *ureg ) 619{ 620 unsigned i; 621 622 for (i = 0; i < ureg->nr_inputs; i++) { 623 emit_decl( ureg, 624 TGSI_FILE_INPUT, 625 i, 626 ureg->input[i].semantic_name, 627 ureg->input[i].semantic_index, 628 ureg->input[i].interp ); 629 } 630 631 for (i = 0; i < ureg->nr_outputs; i++) { 632 emit_decl( ureg, 633 TGSI_FILE_OUTPUT, 634 i, 635 ureg->output[i].semantic_name, 636 ureg->output[i].semantic_index, 637 TGSI_INTERPOLATE_CONSTANT ); 638 } 639 640 if (ureg->nr_samplers) { 641 emit_decl_range( ureg, 642 TGSI_FILE_SAMPLER, 643 0, ureg->nr_samplers ); 644 } 645 646 if (ureg->nr_constants) { 647 emit_decl_range( ureg, 648 TGSI_FILE_CONSTANT, 649 0, ureg->nr_constants ); 650 } 651 652 if (ureg->nr_temps) { 653 emit_decl_range( ureg, 654 TGSI_FILE_TEMPORARY, 655 0, ureg->nr_temps ); 656 } 657 658 for (i = 0; i < ureg->nr_immediates; i++) { 659 emit_immediate( ureg, 660 ureg->immediate[i].v ); 661 } 662} 663 664/* Append the instruction tokens onto the declarations to build a 665 * contiguous stream suitable to send to the driver. 666 */ 667static void copy_instructions( struct ureg_program *ureg ) 668{ 669 unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count; 670 union tgsi_any_token *out = get_tokens( ureg, 671 DOMAIN_DECL, 672 nr_tokens ); 673 674 memcpy(out, 675 ureg->domain[DOMAIN_INSN].tokens, 676 nr_tokens * sizeof out[0] ); 677} 678 679 680static void 681fixup_header_size(struct ureg_program *ureg, 682 unsigned insn ) 683{ 684 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 1 ); 685 686 out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 3; 687} 688 689 690static void 691emit_header( struct ureg_program *ureg ) 692{ 693 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 ); 694 695 out[0].version.MajorVersion = 1; 696 out[0].version.MinorVersion = 1; 697 out[0].version.Padding = 0; 698 699 out[1].header.HeaderSize = 2; 700 out[1].header.BodySize = 0; 701 702 out[2].processor.Processor = ureg->processor; 703 out[2].processor.Padding = 0; 704} 705 706 707void *ureg_create_shader( struct ureg_program *ureg ) 708{ 709 struct pipe_shader_state state; 710 unsigned insn; 711 712 emit_header( ureg ); 713 emit_decls( ureg ); 714 copy_instructions( ureg ); 715 fixup_header_size( ureg, insn ); 716 717 if (ureg->domain[0].tokens == error_tokens || 718 ureg->domain[1].tokens == error_tokens) { 719 debug_printf("%s: error in generated shader\n", __FUNCTION__); 720 assert(0); 721 return NULL; 722 } 723 724 state.tokens = (const struct tgsi_token *)ureg->domain[DOMAIN_DECL].tokens; 725 726 if (1) { 727 debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__, 728 ureg->domain[DOMAIN_DECL].count); 729 tgsi_dump( state.tokens, 0 ); 730 } 731 732 if (ureg->processor == TGSI_PROCESSOR_VERTEX) 733 return ureg->pipe->create_vs_state( ureg->pipe, &state ); 734 else 735 return ureg->pipe->create_fs_state( ureg->pipe, &state ); 736} 737 738 739 740 741struct ureg_program *ureg_create( struct pipe_context *pipe, 742 unsigned processor ) 743{ 744 struct ureg_program *ureg = CALLOC_STRUCT( ureg_program ); 745 if (ureg == NULL) 746 return NULL; 747 748 ureg->pipe = pipe; 749 ureg->processor = processor; 750 return ureg; 751} 752 753 754void ureg_destroy( struct ureg_program *ureg ) 755{ 756 unsigned i; 757 758 for (i = 0; i < Elements(ureg->domain); i++) { 759 if (ureg->domain[i].tokens && 760 ureg->domain[i].tokens != error_tokens) 761 FREE(ureg->domain[i].tokens); 762 } 763 764 FREE(ureg); 765} 766