tgsi_ureg.c revision 38f6f23fcf37247fd709d1c612d08bfa9b124e69
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#include "pipe/p_context.h"
30#include "pipe/p_state.h"
31#include "tgsi/tgsi_ureg.h"
32#include "tgsi/tgsi_build.h"
33#include "tgsi/tgsi_info.h"
34#include "tgsi/tgsi_dump.h"
35#include "tgsi/tgsi_sanity.h"
36#include "util/u_debug.h"
37#include "util/u_memory.h"
38#include "util/u_math.h"
39
40union tgsi_any_token {
41   struct tgsi_header header;
42   struct tgsi_processor processor;
43   struct tgsi_token token;
44   struct tgsi_property prop;
45   struct tgsi_property_data prop_data;
46   struct tgsi_declaration decl;
47   struct tgsi_declaration_range decl_range;
48   struct tgsi_declaration_dimension decl_dim;
49   struct tgsi_declaration_semantic decl_semantic;
50   struct tgsi_immediate imm;
51   union  tgsi_immediate_data imm_data;
52   struct tgsi_instruction insn;
53   struct tgsi_instruction_predicate insn_predicate;
54   struct tgsi_instruction_label insn_label;
55   struct tgsi_instruction_texture insn_texture;
56   struct tgsi_src_register src;
57   struct tgsi_dimension dim;
58   struct tgsi_dst_register dst;
59   unsigned value;
60};
61
62
63struct ureg_tokens {
64   union tgsi_any_token *tokens;
65   unsigned size;
66   unsigned order;
67   unsigned count;
68};
69
70#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS
71#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
72#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS
73#define UREG_MAX_CONSTANT_RANGE 32
74#define UREG_MAX_IMMEDIATE 32
75#define UREG_MAX_TEMP 256
76#define UREG_MAX_ADDR 2
77#define UREG_MAX_LOOP 1
78#define UREG_MAX_PRED 1
79
80struct const_decl {
81   struct {
82      unsigned first;
83      unsigned last;
84   } constant_range[UREG_MAX_CONSTANT_RANGE];
85   unsigned nr_constant_ranges;
86};
87
88#define DOMAIN_DECL 0
89#define DOMAIN_INSN 1
90
91struct ureg_program
92{
93   unsigned processor;
94   struct pipe_context *pipe;
95
96   struct {
97      unsigned semantic_name;
98      unsigned semantic_index;
99      unsigned interp;
100   } fs_input[UREG_MAX_INPUT];
101   unsigned nr_fs_inputs;
102
103   unsigned vs_inputs[UREG_MAX_INPUT/32];
104
105   struct {
106      unsigned index;
107   } gs_input[UREG_MAX_INPUT];
108   unsigned nr_gs_inputs;
109
110   struct {
111      unsigned index;
112      unsigned semantic_name;
113      unsigned semantic_index;
114   } system_value[UREG_MAX_SYSTEM_VALUE];
115   unsigned nr_system_values;
116
117   struct {
118      unsigned semantic_name;
119      unsigned semantic_index;
120   } output[UREG_MAX_OUTPUT];
121   unsigned nr_outputs;
122
123   struct {
124      union {
125         float f[4];
126         unsigned u[4];
127         int i[4];
128      } value;
129      unsigned nr;
130      unsigned type;
131   } immediate[UREG_MAX_IMMEDIATE];
132   unsigned nr_immediates;
133
134   struct ureg_src sampler[PIPE_MAX_SAMPLERS];
135   unsigned nr_samplers;
136
137   unsigned temps_active[UREG_MAX_TEMP / 32];
138   unsigned nr_temps;
139
140   struct const_decl const_decls;
141   struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS];
142
143   unsigned property_gs_input_prim;
144   unsigned property_gs_output_prim;
145   unsigned property_gs_max_vertices;
146   unsigned char property_fs_coord_origin; /* = TGSI_FS_COORD_ORIGIN_* */
147   unsigned char property_fs_coord_pixel_center; /* = TGSI_FS_COORD_PIXEL_CENTER_* */
148
149   unsigned nr_addrs;
150   unsigned nr_preds;
151   unsigned nr_loops;
152   unsigned nr_instructions;
153
154   struct ureg_tokens domain[2];
155};
156
157static union tgsi_any_token error_tokens[32];
158
159static void tokens_error( struct ureg_tokens *tokens )
160{
161   if (tokens->tokens && tokens->tokens != error_tokens)
162      FREE(tokens->tokens);
163
164   tokens->tokens = error_tokens;
165   tokens->size = Elements(error_tokens);
166   tokens->count = 0;
167}
168
169
170static void tokens_expand( struct ureg_tokens *tokens,
171                           unsigned count )
172{
173   unsigned old_size = tokens->size * sizeof(unsigned);
174
175   if (tokens->tokens == error_tokens) {
176      return;
177   }
178
179   while (tokens->count + count > tokens->size) {
180      tokens->size = (1 << ++tokens->order);
181   }
182
183   tokens->tokens = REALLOC(tokens->tokens,
184                            old_size,
185                            tokens->size * sizeof(unsigned));
186   if (tokens->tokens == NULL) {
187      tokens_error(tokens);
188   }
189}
190
191static void set_bad( struct ureg_program *ureg )
192{
193   tokens_error(&ureg->domain[0]);
194}
195
196
197
198static union tgsi_any_token *get_tokens( struct ureg_program *ureg,
199                                         unsigned domain,
200                                         unsigned count )
201{
202   struct ureg_tokens *tokens = &ureg->domain[domain];
203   union tgsi_any_token *result;
204
205   if (tokens->count + count > tokens->size)
206      tokens_expand(tokens, count);
207
208   result = &tokens->tokens[tokens->count];
209   tokens->count += count;
210   return result;
211}
212
213
214static union tgsi_any_token *retrieve_token( struct ureg_program *ureg,
215                                            unsigned domain,
216                                            unsigned nr )
217{
218   if (ureg->domain[domain].tokens == error_tokens)
219      return &error_tokens[0];
220
221   return &ureg->domain[domain].tokens[nr];
222}
223
224
225
226static INLINE struct ureg_dst
227ureg_dst_register( unsigned file,
228                   unsigned index )
229{
230   struct ureg_dst dst;
231
232   dst.File      = file;
233   dst.WriteMask = TGSI_WRITEMASK_XYZW;
234   dst.Indirect  = 0;
235   dst.IndirectIndex = 0;
236   dst.IndirectSwizzle = 0;
237   dst.Saturate  = 0;
238   dst.Predicate = 0;
239   dst.PredNegate = 0;
240   dst.PredSwizzleX = TGSI_SWIZZLE_X;
241   dst.PredSwizzleY = TGSI_SWIZZLE_Y;
242   dst.PredSwizzleZ = TGSI_SWIZZLE_Z;
243   dst.PredSwizzleW = TGSI_SWIZZLE_W;
244   dst.Index     = index;
245
246   return dst;
247}
248
249
250void
251ureg_property_gs_input_prim(struct ureg_program *ureg,
252                            unsigned input_prim)
253{
254   ureg->property_gs_input_prim = input_prim;
255}
256
257void
258ureg_property_gs_output_prim(struct ureg_program *ureg,
259                             unsigned output_prim)
260{
261   ureg->property_gs_output_prim = output_prim;
262}
263
264void
265ureg_property_gs_max_vertices(struct ureg_program *ureg,
266                              unsigned max_vertices)
267{
268   ureg->property_gs_max_vertices = max_vertices;
269}
270
271void
272ureg_property_fs_coord_origin(struct ureg_program *ureg,
273                            unsigned fs_coord_origin)
274{
275   ureg->property_fs_coord_origin = fs_coord_origin;
276}
277
278void
279ureg_property_fs_coord_pixel_center(struct ureg_program *ureg,
280                            unsigned fs_coord_pixel_center)
281{
282   ureg->property_fs_coord_pixel_center = fs_coord_pixel_center;
283}
284
285
286
287struct ureg_src
288ureg_DECL_fs_input( struct ureg_program *ureg,
289                    unsigned name,
290                    unsigned index,
291                    unsigned interp_mode )
292{
293   unsigned i;
294
295   for (i = 0; i < ureg->nr_fs_inputs; i++) {
296      if (ureg->fs_input[i].semantic_name == name &&
297          ureg->fs_input[i].semantic_index == index)
298         goto out;
299   }
300
301   if (ureg->nr_fs_inputs < UREG_MAX_INPUT) {
302      ureg->fs_input[i].semantic_name = name;
303      ureg->fs_input[i].semantic_index = index;
304      ureg->fs_input[i].interp = interp_mode;
305      ureg->nr_fs_inputs++;
306   }
307   else {
308      set_bad( ureg );
309   }
310
311out:
312   return ureg_src_register( TGSI_FILE_INPUT, i );
313}
314
315
316struct ureg_src
317ureg_DECL_vs_input( struct ureg_program *ureg,
318                    unsigned index )
319{
320   assert(ureg->processor == TGSI_PROCESSOR_VERTEX);
321
322   ureg->vs_inputs[index/32] |= 1 << (index % 32);
323   return ureg_src_register( TGSI_FILE_INPUT, index );
324}
325
326
327struct ureg_src
328ureg_DECL_gs_input(struct ureg_program *ureg,
329                   unsigned index)
330{
331   if (ureg->nr_gs_inputs < UREG_MAX_INPUT) {
332      ureg->gs_input[ureg->nr_gs_inputs].index = index;
333      ureg->nr_gs_inputs++;
334   } else {
335      set_bad(ureg);
336   }
337
338   /* XXX: Add suport for true 2D input registers. */
339   return ureg_src_register(TGSI_FILE_INPUT, index);
340}
341
342
343struct ureg_src
344ureg_DECL_system_value(struct ureg_program *ureg,
345                       unsigned index,
346                       unsigned semantic_name,
347                       unsigned semantic_index)
348{
349   if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) {
350      ureg->system_value[ureg->nr_system_values].index = index;
351      ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name;
352      ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index;
353      ureg->nr_system_values++;
354   } else {
355      set_bad(ureg);
356   }
357
358   return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index);
359}
360
361
362struct ureg_dst
363ureg_DECL_output( struct ureg_program *ureg,
364                  unsigned name,
365                  unsigned index )
366{
367   unsigned i;
368
369   for (i = 0; i < ureg->nr_outputs; i++) {
370      if (ureg->output[i].semantic_name == name &&
371          ureg->output[i].semantic_index == index)
372         goto out;
373   }
374
375   if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
376      ureg->output[i].semantic_name = name;
377      ureg->output[i].semantic_index = index;
378      ureg->nr_outputs++;
379   }
380   else {
381      set_bad( ureg );
382   }
383
384out:
385   return ureg_dst_register( TGSI_FILE_OUTPUT, i );
386}
387
388
389/* Returns a new constant register.  Keep track of which have been
390 * referred to so that we can emit decls later.
391 *
392 * Constant operands declared with this function must be addressed
393 * with a two-dimensional index.
394 *
395 * There is nothing in this code to bind this constant to any tracked
396 * value or manage any constant_buffer contents -- that's the
397 * resposibility of the calling code.
398 */
399void
400ureg_DECL_constant2D(struct ureg_program *ureg,
401                     unsigned first,
402                     unsigned last,
403                     unsigned index2D)
404{
405   struct const_decl *decl = &ureg->const_decls2D[index2D];
406
407   assert(index2D < PIPE_MAX_CONSTANT_BUFFERS);
408
409   if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
410      uint i = decl->nr_constant_ranges++;
411
412      decl->constant_range[i].first = first;
413      decl->constant_range[i].last = last;
414   }
415}
416
417
418/* A one-dimensional, depricated version of ureg_DECL_constant2D().
419 *
420 * Constant operands declared with this function must be addressed
421 * with a one-dimensional index.
422 */
423struct ureg_src
424ureg_DECL_constant(struct ureg_program *ureg,
425                   unsigned index)
426{
427   struct const_decl *decl = &ureg->const_decls;
428   unsigned minconst = index, maxconst = index;
429   unsigned i;
430
431   /* Inside existing range?
432    */
433   for (i = 0; i < decl->nr_constant_ranges; i++) {
434      if (decl->constant_range[i].first <= index &&
435          decl->constant_range[i].last >= index) {
436         goto out;
437      }
438   }
439
440   /* Extend existing range?
441    */
442   for (i = 0; i < decl->nr_constant_ranges; i++) {
443      if (decl->constant_range[i].last == index - 1) {
444         decl->constant_range[i].last = index;
445         goto out;
446      }
447
448      if (decl->constant_range[i].first == index + 1) {
449         decl->constant_range[i].first = index;
450         goto out;
451      }
452
453      minconst = MIN2(minconst, decl->constant_range[i].first);
454      maxconst = MAX2(maxconst, decl->constant_range[i].last);
455   }
456
457   /* Create new range?
458    */
459   if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
460      i = decl->nr_constant_ranges++;
461      decl->constant_range[i].first = index;
462      decl->constant_range[i].last = index;
463      goto out;
464   }
465
466   /* Collapse all ranges down to one:
467    */
468   i = 0;
469   decl->constant_range[0].first = minconst;
470   decl->constant_range[0].last = maxconst;
471   decl->nr_constant_ranges = 1;
472
473out:
474   assert(i < decl->nr_constant_ranges);
475   assert(decl->constant_range[i].first <= index);
476   assert(decl->constant_range[i].last >= index);
477   return ureg_src_register(TGSI_FILE_CONSTANT, index);
478}
479
480
481/* Allocate a new temporary.  Temporaries greater than UREG_MAX_TEMP
482 * are legal, but will not be released.
483 */
484struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
485{
486   unsigned i;
487
488   for (i = 0; i < UREG_MAX_TEMP; i += 32) {
489      int bit = ffs(~ureg->temps_active[i/32]);
490      if (bit != 0) {
491         i += bit - 1;
492         goto out;
493      }
494   }
495
496   /* No reusable temps, so allocate a new one:
497    */
498   i = ureg->nr_temps++;
499
500out:
501   if (i < UREG_MAX_TEMP)
502      ureg->temps_active[i/32] |= 1 << (i % 32);
503
504   if (i >= ureg->nr_temps)
505      ureg->nr_temps = i + 1;
506
507   return ureg_dst_register( TGSI_FILE_TEMPORARY, i );
508}
509
510
511void ureg_release_temporary( struct ureg_program *ureg,
512                             struct ureg_dst tmp )
513{
514   if(tmp.File == TGSI_FILE_TEMPORARY)
515      if (tmp.Index < UREG_MAX_TEMP)
516         ureg->temps_active[tmp.Index/32] &= ~(1 << (tmp.Index % 32));
517}
518
519
520/* Allocate a new address register.
521 */
522struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
523{
524   if (ureg->nr_addrs < UREG_MAX_ADDR)
525      return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ );
526
527   assert( 0 );
528   return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
529}
530
531/* Allocate a new loop register.
532 */
533struct ureg_dst
534ureg_DECL_loop(struct ureg_program *ureg)
535{
536   if (ureg->nr_loops < UREG_MAX_LOOP) {
537      return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++);
538   }
539
540   assert(0);
541   return ureg_dst_register(TGSI_FILE_LOOP, 0);
542}
543
544/* Allocate a new predicate register.
545 */
546struct ureg_dst
547ureg_DECL_predicate(struct ureg_program *ureg)
548{
549   if (ureg->nr_preds < UREG_MAX_PRED) {
550      return ureg_dst_register(TGSI_FILE_PREDICATE, ureg->nr_preds++);
551   }
552
553   assert(0);
554   return ureg_dst_register(TGSI_FILE_PREDICATE, 0);
555}
556
557/* Allocate a new sampler.
558 */
559struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
560                                   unsigned nr )
561{
562   unsigned i;
563
564   for (i = 0; i < ureg->nr_samplers; i++)
565      if (ureg->sampler[i].Index == nr)
566         return ureg->sampler[i];
567
568   if (i < PIPE_MAX_SAMPLERS) {
569      ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr );
570      ureg->nr_samplers++;
571      return ureg->sampler[i];
572   }
573
574   assert( 0 );
575   return ureg->sampler[0];
576}
577
578
579static int
580match_or_expand_immediate( const unsigned *v,
581                           unsigned nr,
582                           unsigned *v2,
583                           unsigned *pnr2,
584                           unsigned *swizzle )
585{
586   unsigned nr2 = *pnr2;
587   unsigned i, j;
588
589   *swizzle = 0;
590
591   for (i = 0; i < nr; i++) {
592      boolean found = FALSE;
593
594      for (j = 0; j < nr2 && !found; j++) {
595         if (v[i] == v2[j]) {
596            *swizzle |= j << (i * 2);
597            found = TRUE;
598         }
599      }
600
601      if (!found) {
602         if (nr2 >= 4) {
603            return FALSE;
604         }
605
606         v2[nr2] = v[i];
607         *swizzle |= nr2 << (i * 2);
608         nr2++;
609      }
610   }
611
612   /* Actually expand immediate only when fully succeeded.
613    */
614   *pnr2 = nr2;
615   return TRUE;
616}
617
618
619static struct ureg_src
620decl_immediate( struct ureg_program *ureg,
621                const unsigned *v,
622                unsigned nr,
623                unsigned type )
624{
625   unsigned i, j;
626   unsigned swizzle = 0;
627
628   /* Could do a first pass where we examine all existing immediates
629    * without expanding.
630    */
631
632   for (i = 0; i < ureg->nr_immediates; i++) {
633      if (ureg->immediate[i].type != type) {
634         continue;
635      }
636      if (match_or_expand_immediate(v,
637                                    nr,
638                                    ureg->immediate[i].value.u,
639                                    &ureg->immediate[i].nr,
640                                    &swizzle)) {
641         goto out;
642      }
643   }
644
645   if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) {
646      i = ureg->nr_immediates++;
647      ureg->immediate[i].type = type;
648      if (match_or_expand_immediate(v,
649                                    nr,
650                                    ureg->immediate[i].value.u,
651                                    &ureg->immediate[i].nr,
652                                    &swizzle)) {
653         goto out;
654      }
655   }
656
657   set_bad(ureg);
658
659out:
660   /* Make sure that all referenced elements are from this immediate.
661    * Has the effect of making size-one immediates into scalars.
662    */
663   for (j = nr; j < 4; j++) {
664      swizzle |= (swizzle & 0x3) << (j * 2);
665   }
666
667   return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i),
668                       (swizzle >> 0) & 0x3,
669                       (swizzle >> 2) & 0x3,
670                       (swizzle >> 4) & 0x3,
671                       (swizzle >> 6) & 0x3);
672}
673
674
675struct ureg_src
676ureg_DECL_immediate( struct ureg_program *ureg,
677                     const float *v,
678                     unsigned nr )
679{
680   union {
681      float f[4];
682      unsigned u[4];
683   } fu;
684   unsigned int i;
685
686   for (i = 0; i < nr; i++) {
687      fu.f[i] = v[i];
688   }
689
690   return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32);
691}
692
693
694struct ureg_src
695ureg_DECL_immediate_uint( struct ureg_program *ureg,
696                          const unsigned *v,
697                          unsigned nr )
698{
699   return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32);
700}
701
702
703struct ureg_src
704ureg_DECL_immediate_block_uint( struct ureg_program *ureg,
705                                const unsigned *v,
706                                unsigned nr )
707{
708   uint index;
709   uint i;
710
711   if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) {
712      set_bad(ureg);
713      return ureg_src_register(TGSI_FILE_IMMEDIATE, 0);
714   }
715
716   index = ureg->nr_immediates;
717   ureg->nr_immediates += (nr + 3) / 4;
718
719   for (i = index; i < ureg->nr_immediates; i++) {
720      ureg->immediate[i].type = TGSI_IMM_UINT32;
721      ureg->immediate[i].nr = nr > 4 ? 4 : nr;
722      memcpy(ureg->immediate[i].value.u,
723             &v[(i - index) * 4],
724             ureg->immediate[i].nr * sizeof(uint));
725      nr -= 4;
726   }
727
728   return ureg_src_register(TGSI_FILE_IMMEDIATE, index);
729}
730
731
732struct ureg_src
733ureg_DECL_immediate_int( struct ureg_program *ureg,
734                         const int *v,
735                         unsigned nr )
736{
737   return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32);
738}
739
740
741void
742ureg_emit_src( struct ureg_program *ureg,
743               struct ureg_src src )
744{
745   unsigned size = 1 + (src.Indirect ? 1 : 0) + (src.Dimension ? 1 : 0);
746
747   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
748   unsigned n = 0;
749
750   assert(src.File != TGSI_FILE_NULL);
751   assert(src.File != TGSI_FILE_OUTPUT);
752   assert(src.File < TGSI_FILE_COUNT);
753
754   out[n].value = 0;
755   out[n].src.File = src.File;
756   out[n].src.SwizzleX = src.SwizzleX;
757   out[n].src.SwizzleY = src.SwizzleY;
758   out[n].src.SwizzleZ = src.SwizzleZ;
759   out[n].src.SwizzleW = src.SwizzleW;
760   out[n].src.Index = src.Index;
761   out[n].src.Negate = src.Negate;
762   out[0].src.Absolute = src.Absolute;
763   n++;
764
765   if (src.Indirect) {
766      out[0].src.Indirect = 1;
767      out[n].value = 0;
768      out[n].src.File = src.IndirectFile;
769      out[n].src.SwizzleX = src.IndirectSwizzle;
770      out[n].src.SwizzleY = src.IndirectSwizzle;
771      out[n].src.SwizzleZ = src.IndirectSwizzle;
772      out[n].src.SwizzleW = src.IndirectSwizzle;
773      out[n].src.Index = src.IndirectIndex;
774      n++;
775   }
776
777   if (src.Dimension) {
778      out[0].src.Dimension = 1;
779      out[n].dim.Indirect = 0;
780      out[n].dim.Dimension = 0;
781      out[n].dim.Padding = 0;
782      out[n].dim.Index = src.DimensionIndex;
783      n++;
784   }
785
786   assert(n == size);
787}
788
789
790void
791ureg_emit_dst( struct ureg_program *ureg,
792               struct ureg_dst dst )
793{
794   unsigned size = (1 +
795                    (dst.Indirect ? 1 : 0));
796
797   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
798   unsigned n = 0;
799
800   assert(dst.File != TGSI_FILE_NULL);
801   assert(dst.File != TGSI_FILE_CONSTANT);
802   assert(dst.File != TGSI_FILE_INPUT);
803   assert(dst.File != TGSI_FILE_SAMPLER);
804   assert(dst.File != TGSI_FILE_IMMEDIATE);
805   assert(dst.File < TGSI_FILE_COUNT);
806
807   out[n].value = 0;
808   out[n].dst.File = dst.File;
809   out[n].dst.WriteMask = dst.WriteMask;
810   out[n].dst.Indirect = dst.Indirect;
811   out[n].dst.Index = dst.Index;
812   n++;
813
814   if (dst.Indirect) {
815      out[n].value = 0;
816      out[n].src.File = TGSI_FILE_ADDRESS;
817      out[n].src.SwizzleX = dst.IndirectSwizzle;
818      out[n].src.SwizzleY = dst.IndirectSwizzle;
819      out[n].src.SwizzleZ = dst.IndirectSwizzle;
820      out[n].src.SwizzleW = dst.IndirectSwizzle;
821      out[n].src.Index = dst.IndirectIndex;
822      n++;
823   }
824
825   assert(n == size);
826}
827
828
829static void validate( unsigned opcode,
830                      unsigned nr_dst,
831                      unsigned nr_src )
832{
833#ifdef DEBUG
834   const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
835   assert(info);
836   if(info) {
837      assert(nr_dst == info->num_dst);
838      assert(nr_src == info->num_src);
839   }
840#endif
841}
842
843struct ureg_emit_insn_result
844ureg_emit_insn(struct ureg_program *ureg,
845               unsigned opcode,
846               boolean saturate,
847               boolean predicate,
848               boolean pred_negate,
849               unsigned pred_swizzle_x,
850               unsigned pred_swizzle_y,
851               unsigned pred_swizzle_z,
852               unsigned pred_swizzle_w,
853               unsigned num_dst,
854               unsigned num_src )
855{
856   union tgsi_any_token *out;
857   uint count = predicate ? 2 : 1;
858   struct ureg_emit_insn_result result;
859
860   validate( opcode, num_dst, num_src );
861
862   out = get_tokens( ureg, DOMAIN_INSN, count );
863   out[0].insn = tgsi_default_instruction();
864   out[0].insn.Opcode = opcode;
865   out[0].insn.Saturate = saturate;
866   out[0].insn.NumDstRegs = num_dst;
867   out[0].insn.NumSrcRegs = num_src;
868
869   result.insn_token = ureg->domain[DOMAIN_INSN].count - count;
870   result.extended_token = result.insn_token;
871
872   if (predicate) {
873      out[0].insn.Predicate = 1;
874      out[1].insn_predicate = tgsi_default_instruction_predicate();
875      out[1].insn_predicate.Negate = pred_negate;
876      out[1].insn_predicate.SwizzleX = pred_swizzle_x;
877      out[1].insn_predicate.SwizzleY = pred_swizzle_y;
878      out[1].insn_predicate.SwizzleZ = pred_swizzle_z;
879      out[1].insn_predicate.SwizzleW = pred_swizzle_w;
880   }
881
882   ureg->nr_instructions++;
883
884   return result;
885}
886
887
888void
889ureg_emit_label(struct ureg_program *ureg,
890                unsigned extended_token,
891                unsigned *label_token )
892{
893   union tgsi_any_token *out, *insn;
894
895   if(!label_token)
896      return;
897
898   out = get_tokens( ureg, DOMAIN_INSN, 1 );
899   out[0].value = 0;
900
901   insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
902   insn->insn.Label = 1;
903
904   *label_token = ureg->domain[DOMAIN_INSN].count - 1;
905}
906
907/* Will return a number which can be used in a label to point to the
908 * next instruction to be emitted.
909 */
910unsigned
911ureg_get_instruction_number( struct ureg_program *ureg )
912{
913   return ureg->nr_instructions;
914}
915
916/* Patch a given label (expressed as a token number) to point to a
917 * given instruction (expressed as an instruction number).
918 */
919void
920ureg_fixup_label(struct ureg_program *ureg,
921                 unsigned label_token,
922                 unsigned instruction_number )
923{
924   union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token );
925
926   out->insn_label.Label = instruction_number;
927}
928
929
930void
931ureg_emit_texture(struct ureg_program *ureg,
932                  unsigned extended_token,
933                  unsigned target )
934{
935   union tgsi_any_token *out, *insn;
936
937   out = get_tokens( ureg, DOMAIN_INSN, 1 );
938   insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
939
940   insn->insn.Texture = 1;
941
942   out[0].value = 0;
943   out[0].insn_texture.Texture = target;
944}
945
946
947void
948ureg_fixup_insn_size(struct ureg_program *ureg,
949                     unsigned insn )
950{
951   union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn );
952
953   assert(out->insn.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
954   out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1;
955}
956
957
958void
959ureg_insn(struct ureg_program *ureg,
960          unsigned opcode,
961          const struct ureg_dst *dst,
962          unsigned nr_dst,
963          const struct ureg_src *src,
964          unsigned nr_src )
965{
966   struct ureg_emit_insn_result insn;
967   unsigned i;
968   boolean saturate;
969   boolean predicate;
970   boolean negate = FALSE;
971   unsigned swizzle[4] = { 0 };
972
973   saturate = nr_dst ? dst[0].Saturate : FALSE;
974   predicate = nr_dst ? dst[0].Predicate : FALSE;
975   if (predicate) {
976      negate = dst[0].PredNegate;
977      swizzle[0] = dst[0].PredSwizzleX;
978      swizzle[1] = dst[0].PredSwizzleY;
979      swizzle[2] = dst[0].PredSwizzleZ;
980      swizzle[3] = dst[0].PredSwizzleW;
981   }
982
983   insn = ureg_emit_insn(ureg,
984                         opcode,
985                         saturate,
986                         predicate,
987                         negate,
988                         swizzle[0],
989                         swizzle[1],
990                         swizzle[2],
991                         swizzle[3],
992                         nr_dst,
993                         nr_src);
994
995   for (i = 0; i < nr_dst; i++)
996      ureg_emit_dst( ureg, dst[i] );
997
998   for (i = 0; i < nr_src; i++)
999      ureg_emit_src( ureg, src[i] );
1000
1001   ureg_fixup_insn_size( ureg, insn.insn_token );
1002}
1003
1004void
1005ureg_tex_insn(struct ureg_program *ureg,
1006              unsigned opcode,
1007              const struct ureg_dst *dst,
1008              unsigned nr_dst,
1009              unsigned target,
1010              const struct ureg_src *src,
1011              unsigned nr_src )
1012{
1013   struct ureg_emit_insn_result insn;
1014   unsigned i;
1015   boolean saturate;
1016   boolean predicate;
1017   boolean negate = FALSE;
1018   unsigned swizzle[4] = { 0 };
1019
1020   saturate = nr_dst ? dst[0].Saturate : FALSE;
1021   predicate = nr_dst ? dst[0].Predicate : FALSE;
1022   if (predicate) {
1023      negate = dst[0].PredNegate;
1024      swizzle[0] = dst[0].PredSwizzleX;
1025      swizzle[1] = dst[0].PredSwizzleY;
1026      swizzle[2] = dst[0].PredSwizzleZ;
1027      swizzle[3] = dst[0].PredSwizzleW;
1028   }
1029
1030   insn = ureg_emit_insn(ureg,
1031                         opcode,
1032                         saturate,
1033                         predicate,
1034                         negate,
1035                         swizzle[0],
1036                         swizzle[1],
1037                         swizzle[2],
1038                         swizzle[3],
1039                         nr_dst,
1040                         nr_src);
1041
1042   ureg_emit_texture( ureg, insn.extended_token, target );
1043
1044   for (i = 0; i < nr_dst; i++)
1045      ureg_emit_dst( ureg, dst[i] );
1046
1047   for (i = 0; i < nr_src; i++)
1048      ureg_emit_src( ureg, src[i] );
1049
1050   ureg_fixup_insn_size( ureg, insn.insn_token );
1051}
1052
1053
1054void
1055ureg_label_insn(struct ureg_program *ureg,
1056                unsigned opcode,
1057                const struct ureg_src *src,
1058                unsigned nr_src,
1059                unsigned *label_token )
1060{
1061   struct ureg_emit_insn_result insn;
1062   unsigned i;
1063
1064   insn = ureg_emit_insn(ureg,
1065                         opcode,
1066                         FALSE,
1067                         FALSE,
1068                         FALSE,
1069                         TGSI_SWIZZLE_X,
1070                         TGSI_SWIZZLE_Y,
1071                         TGSI_SWIZZLE_Z,
1072                         TGSI_SWIZZLE_W,
1073                         0,
1074                         nr_src);
1075
1076   ureg_emit_label( ureg, insn.extended_token, label_token );
1077
1078   for (i = 0; i < nr_src; i++)
1079      ureg_emit_src( ureg, src[i] );
1080
1081   ureg_fixup_insn_size( ureg, insn.insn_token );
1082}
1083
1084
1085
1086static void emit_decl( struct ureg_program *ureg,
1087                       unsigned file,
1088                       unsigned index,
1089                       unsigned semantic_name,
1090                       unsigned semantic_index,
1091                       unsigned interp )
1092{
1093   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 );
1094
1095   out[0].value = 0;
1096   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1097   out[0].decl.NrTokens = 3;
1098   out[0].decl.File = file;
1099   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
1100   out[0].decl.Interpolate = interp;
1101   out[0].decl.Semantic = 1;
1102
1103   out[1].value = 0;
1104   out[1].decl_range.First =
1105      out[1].decl_range.Last = index;
1106
1107   out[2].value = 0;
1108   out[2].decl_semantic.Name = semantic_name;
1109   out[2].decl_semantic.Index = semantic_index;
1110
1111}
1112
1113
1114static void emit_decl_range( struct ureg_program *ureg,
1115                             unsigned file,
1116                             unsigned first,
1117                             unsigned count )
1118{
1119   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
1120
1121   out[0].value = 0;
1122   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1123   out[0].decl.NrTokens = 2;
1124   out[0].decl.File = file;
1125   out[0].decl.UsageMask = 0xf;
1126   out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
1127   out[0].decl.Semantic = 0;
1128
1129   out[1].value = 0;
1130   out[1].decl_range.First = first;
1131   out[1].decl_range.Last = first + count - 1;
1132}
1133
1134static void
1135emit_decl_range2D(struct ureg_program *ureg,
1136                  unsigned file,
1137                  unsigned first,
1138                  unsigned last,
1139                  unsigned index2D)
1140{
1141   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1142
1143   out[0].value = 0;
1144   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1145   out[0].decl.NrTokens = 3;
1146   out[0].decl.File = file;
1147   out[0].decl.UsageMask = 0xf;
1148   out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
1149   out[0].decl.Dimension = 1;
1150
1151   out[1].value = 0;
1152   out[1].decl_range.First = first;
1153   out[1].decl_range.Last = last;
1154
1155   out[2].value = 0;
1156   out[2].decl_dim.Index2D = index2D;
1157}
1158
1159static void
1160emit_immediate( struct ureg_program *ureg,
1161                const unsigned *v,
1162                unsigned type )
1163{
1164   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 );
1165
1166   out[0].value = 0;
1167   out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
1168   out[0].imm.NrTokens = 5;
1169   out[0].imm.DataType = type;
1170   out[0].imm.Padding = 0;
1171
1172   out[1].imm_data.Uint = v[0];
1173   out[2].imm_data.Uint = v[1];
1174   out[3].imm_data.Uint = v[2];
1175   out[4].imm_data.Uint = v[3];
1176}
1177
1178static void
1179emit_property(struct ureg_program *ureg,
1180              unsigned name,
1181              unsigned data)
1182{
1183   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
1184
1185   out[0].value = 0;
1186   out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY;
1187   out[0].prop.NrTokens = 2;
1188   out[0].prop.PropertyName = name;
1189
1190   out[1].prop_data.Data = data;
1191}
1192
1193
1194static void emit_decls( struct ureg_program *ureg )
1195{
1196   unsigned i;
1197
1198   if (ureg->property_gs_input_prim != ~0) {
1199      assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY);
1200
1201      emit_property(ureg,
1202                    TGSI_PROPERTY_GS_INPUT_PRIM,
1203                    ureg->property_gs_input_prim);
1204   }
1205
1206   if (ureg->property_gs_output_prim != ~0) {
1207      assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY);
1208
1209      emit_property(ureg,
1210                    TGSI_PROPERTY_GS_OUTPUT_PRIM,
1211                    ureg->property_gs_output_prim);
1212   }
1213
1214   if (ureg->property_gs_max_vertices != ~0) {
1215      assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY);
1216
1217      emit_property(ureg,
1218                    TGSI_PROPERTY_GS_MAX_VERTICES,
1219                    ureg->property_gs_max_vertices);
1220   }
1221
1222   if (ureg->property_fs_coord_origin) {
1223      assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
1224
1225      emit_property(ureg,
1226                    TGSI_PROPERTY_FS_COORD_ORIGIN,
1227                    ureg->property_fs_coord_origin);
1228   }
1229
1230   if (ureg->property_fs_coord_pixel_center) {
1231      assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
1232
1233      emit_property(ureg,
1234                    TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
1235                    ureg->property_fs_coord_pixel_center);
1236   }
1237
1238   if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
1239      for (i = 0; i < UREG_MAX_INPUT; i++) {
1240         if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
1241            emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
1242         }
1243      }
1244   } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) {
1245      for (i = 0; i < ureg->nr_fs_inputs; i++) {
1246         emit_decl( ureg,
1247                    TGSI_FILE_INPUT,
1248                    i,
1249                    ureg->fs_input[i].semantic_name,
1250                    ureg->fs_input[i].semantic_index,
1251                    ureg->fs_input[i].interp );
1252      }
1253   } else {
1254      for (i = 0; i < ureg->nr_gs_inputs; i++) {
1255         emit_decl_range(ureg,
1256                         TGSI_FILE_INPUT,
1257                         ureg->gs_input[i].index,
1258                         1);
1259      }
1260   }
1261
1262   for (i = 0; i < ureg->nr_system_values; i++) {
1263      emit_decl(ureg,
1264                TGSI_FILE_SYSTEM_VALUE,
1265                ureg->system_value[i].index,
1266                ureg->system_value[i].semantic_name,
1267                ureg->system_value[i].semantic_index,
1268                TGSI_INTERPOLATE_CONSTANT);
1269   }
1270
1271   for (i = 0; i < ureg->nr_outputs; i++) {
1272      emit_decl( ureg,
1273                 TGSI_FILE_OUTPUT,
1274                 i,
1275                 ureg->output[i].semantic_name,
1276                 ureg->output[i].semantic_index,
1277                 TGSI_INTERPOLATE_CONSTANT );
1278   }
1279
1280   for (i = 0; i < ureg->nr_samplers; i++) {
1281      emit_decl_range( ureg,
1282                       TGSI_FILE_SAMPLER,
1283                       ureg->sampler[i].Index, 1 );
1284   }
1285
1286   if (ureg->const_decls.nr_constant_ranges) {
1287      for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
1288         emit_decl_range(ureg,
1289                         TGSI_FILE_CONSTANT,
1290                         ureg->const_decls.constant_range[i].first,
1291                         ureg->const_decls.constant_range[i].last - ureg->const_decls.constant_range[i].first + 1);
1292      }
1293   }
1294
1295   for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
1296      struct const_decl *decl = &ureg->const_decls2D[i];
1297
1298      if (decl->nr_constant_ranges) {
1299         uint j;
1300
1301         for (j = 0; j < decl->nr_constant_ranges; j++) {
1302            emit_decl_range2D(ureg,
1303                              TGSI_FILE_CONSTANT,
1304                              decl->constant_range[j].first,
1305                              decl->constant_range[j].last,
1306                              i);
1307         }
1308      }
1309   }
1310
1311   if (ureg->nr_temps) {
1312      emit_decl_range( ureg,
1313                       TGSI_FILE_TEMPORARY,
1314                       0, ureg->nr_temps );
1315   }
1316
1317   if (ureg->nr_addrs) {
1318      emit_decl_range( ureg,
1319                       TGSI_FILE_ADDRESS,
1320                       0, ureg->nr_addrs );
1321   }
1322
1323   if (ureg->nr_loops) {
1324      emit_decl_range(ureg,
1325                      TGSI_FILE_LOOP,
1326                      0,
1327                      ureg->nr_loops);
1328   }
1329
1330   if (ureg->nr_preds) {
1331      emit_decl_range(ureg,
1332                      TGSI_FILE_PREDICATE,
1333                      0,
1334                      ureg->nr_preds);
1335   }
1336
1337   for (i = 0; i < ureg->nr_immediates; i++) {
1338      emit_immediate( ureg,
1339                      ureg->immediate[i].value.u,
1340                      ureg->immediate[i].type );
1341   }
1342}
1343
1344/* Append the instruction tokens onto the declarations to build a
1345 * contiguous stream suitable to send to the driver.
1346 */
1347static void copy_instructions( struct ureg_program *ureg )
1348{
1349   unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count;
1350   union tgsi_any_token *out = get_tokens( ureg,
1351                                           DOMAIN_DECL,
1352                                           nr_tokens );
1353
1354   memcpy(out,
1355          ureg->domain[DOMAIN_INSN].tokens,
1356          nr_tokens * sizeof out[0] );
1357}
1358
1359
1360static void
1361fixup_header_size(struct ureg_program *ureg)
1362{
1363   union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 );
1364
1365   out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2;
1366}
1367
1368
1369static void
1370emit_header( struct ureg_program *ureg )
1371{
1372   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
1373
1374   out[0].header.HeaderSize = 2;
1375   out[0].header.BodySize = 0;
1376
1377   out[1].processor.Processor = ureg->processor;
1378   out[1].processor.Padding = 0;
1379}
1380
1381
1382const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
1383{
1384   const struct tgsi_token *tokens;
1385
1386   emit_header( ureg );
1387   emit_decls( ureg );
1388   copy_instructions( ureg );
1389   fixup_header_size( ureg );
1390
1391   if (ureg->domain[0].tokens == error_tokens ||
1392       ureg->domain[1].tokens == error_tokens) {
1393      debug_printf("%s: error in generated shader\n", __FUNCTION__);
1394      assert(0);
1395      return NULL;
1396   }
1397
1398   tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
1399
1400   if (0) {
1401      debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__,
1402                   ureg->domain[DOMAIN_DECL].count);
1403      tgsi_dump( tokens, 0 );
1404   }
1405
1406#if DEBUG
1407   if (tokens && !tgsi_sanity_check(tokens)) {
1408      debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n");
1409      tgsi_dump(tokens, 0);
1410      assert(0);
1411   }
1412#endif
1413
1414
1415   return tokens;
1416}
1417
1418
1419void *ureg_create_shader( struct ureg_program *ureg,
1420                          struct pipe_context *pipe )
1421{
1422   struct pipe_shader_state state;
1423
1424   state.tokens = ureg_finalize(ureg);
1425   if(!state.tokens)
1426      return NULL;
1427
1428   if (ureg->processor == TGSI_PROCESSOR_VERTEX)
1429      return pipe->create_vs_state( pipe, &state );
1430   else
1431      return pipe->create_fs_state( pipe, &state );
1432}
1433
1434
1435const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg,
1436                                          unsigned *nr_tokens )
1437{
1438   const struct tgsi_token *tokens;
1439
1440   ureg_finalize(ureg);
1441
1442   tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
1443
1444   if (nr_tokens)
1445      *nr_tokens = ureg->domain[DOMAIN_DECL].size;
1446
1447   ureg->domain[DOMAIN_DECL].tokens = 0;
1448   ureg->domain[DOMAIN_DECL].size = 0;
1449   ureg->domain[DOMAIN_DECL].order = 0;
1450   ureg->domain[DOMAIN_DECL].count = 0;
1451
1452   return tokens;
1453}
1454
1455
1456struct ureg_program *ureg_create( unsigned processor )
1457{
1458   struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
1459   if (ureg == NULL)
1460      return NULL;
1461
1462   ureg->processor = processor;
1463   ureg->property_gs_input_prim = ~0;
1464   ureg->property_gs_output_prim = ~0;
1465   ureg->property_gs_max_vertices = ~0;
1466   return ureg;
1467}
1468
1469
1470void ureg_destroy( struct ureg_program *ureg )
1471{
1472   unsigned i;
1473
1474   for (i = 0; i < Elements(ureg->domain); i++) {
1475      if (ureg->domain[i].tokens &&
1476          ureg->domain[i].tokens != error_tokens)
1477         FREE(ureg->domain[i].tokens);
1478   }
1479
1480   FREE(ureg);
1481}
1482