tgsi_ureg.c revision c14be63c5647e4406a0a4d80570a4def593b551b
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#include "pipe/p_context.h"
30#include "pipe/p_state.h"
31#include "tgsi/tgsi_ureg.h"
32#include "tgsi/tgsi_build.h"
33#include "tgsi/tgsi_info.h"
34#include "tgsi/tgsi_dump.h"
35#include "tgsi/tgsi_sanity.h"
36#include "util/u_memory.h"
37#include "util/u_math.h"
38
39union tgsi_any_token {
40   struct tgsi_header header;
41   struct tgsi_processor processor;
42   struct tgsi_token token;
43   struct tgsi_declaration decl;
44   struct tgsi_declaration_range decl_range;
45   struct tgsi_declaration_semantic decl_semantic;
46   struct tgsi_immediate imm;
47   union  tgsi_immediate_data imm_data;
48   struct tgsi_instruction insn;
49   struct tgsi_instruction_predicate insn_predicate;
50   struct tgsi_instruction_label insn_label;
51   struct tgsi_instruction_texture insn_texture;
52   struct tgsi_src_register src;
53   struct tgsi_dimension dim;
54   struct tgsi_dst_register dst;
55   unsigned value;
56};
57
58
59struct ureg_tokens {
60   union tgsi_any_token *tokens;
61   unsigned size;
62   unsigned order;
63   unsigned count;
64};
65
66#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS
67#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS
68#define UREG_MAX_CONSTANT_RANGE 32
69#define UREG_MAX_IMMEDIATE 32
70#define UREG_MAX_TEMP 256
71#define UREG_MAX_ADDR 2
72#define UREG_MAX_LOOP 1
73#define UREG_MAX_PRED 1
74
75#define DOMAIN_DECL 0
76#define DOMAIN_INSN 1
77
78struct ureg_program
79{
80   unsigned processor;
81   struct pipe_context *pipe;
82
83   struct {
84      unsigned semantic_name;
85      unsigned semantic_index;
86      unsigned interp;
87   } fs_input[UREG_MAX_INPUT];
88   unsigned nr_fs_inputs;
89
90   unsigned vs_inputs[UREG_MAX_INPUT/32];
91
92   struct {
93      unsigned index;
94   } gs_input[UREG_MAX_INPUT];
95   unsigned nr_gs_inputs;
96
97   struct {
98      unsigned semantic_name;
99      unsigned semantic_index;
100   } output[UREG_MAX_OUTPUT];
101   unsigned nr_outputs;
102
103   struct {
104      float v[4];
105      unsigned nr;
106   } immediate[UREG_MAX_IMMEDIATE];
107   unsigned nr_immediates;
108
109   struct ureg_src sampler[PIPE_MAX_SAMPLERS];
110   unsigned nr_samplers;
111
112   unsigned temps_active[UREG_MAX_TEMP / 32];
113   unsigned nr_temps;
114
115   struct {
116      unsigned first;
117      unsigned last;
118   } constant_range[UREG_MAX_CONSTANT_RANGE];
119   unsigned nr_constant_ranges;
120
121   unsigned nr_addrs;
122   unsigned nr_preds;
123   unsigned nr_loops;
124   unsigned nr_instructions;
125
126   struct ureg_tokens domain[2];
127};
128
129static union tgsi_any_token error_tokens[32];
130
131static void tokens_error( struct ureg_tokens *tokens )
132{
133   if (tokens->tokens && tokens->tokens != error_tokens)
134      FREE(tokens->tokens);
135
136   tokens->tokens = error_tokens;
137   tokens->size = Elements(error_tokens);
138   tokens->count = 0;
139}
140
141
142static void tokens_expand( struct ureg_tokens *tokens,
143                           unsigned count )
144{
145   unsigned old_size = tokens->size * sizeof(unsigned);
146
147   if (tokens->tokens == error_tokens) {
148      return;
149   }
150
151   while (tokens->count + count > tokens->size) {
152      tokens->size = (1 << ++tokens->order);
153   }
154
155   tokens->tokens = REALLOC(tokens->tokens,
156                            old_size,
157                            tokens->size * sizeof(unsigned));
158   if (tokens->tokens == NULL) {
159      tokens_error(tokens);
160   }
161}
162
163static void set_bad( struct ureg_program *ureg )
164{
165   tokens_error(&ureg->domain[0]);
166}
167
168
169
170static union tgsi_any_token *get_tokens( struct ureg_program *ureg,
171                                         unsigned domain,
172                                         unsigned count )
173{
174   struct ureg_tokens *tokens = &ureg->domain[domain];
175   union tgsi_any_token *result;
176
177   if (tokens->count + count > tokens->size)
178      tokens_expand(tokens, count);
179
180   result = &tokens->tokens[tokens->count];
181   tokens->count += count;
182   return result;
183}
184
185
186static union tgsi_any_token *retrieve_token( struct ureg_program *ureg,
187                                            unsigned domain,
188                                            unsigned nr )
189{
190   if (ureg->domain[domain].tokens == error_tokens)
191      return &error_tokens[0];
192
193   return &ureg->domain[domain].tokens[nr];
194}
195
196
197
198static INLINE struct ureg_dst
199ureg_dst_register( unsigned file,
200                   unsigned index )
201{
202   struct ureg_dst dst;
203
204   dst.File      = file;
205   dst.WriteMask = TGSI_WRITEMASK_XYZW;
206   dst.Indirect  = 0;
207   dst.IndirectIndex = 0;
208   dst.IndirectSwizzle = 0;
209   dst.Saturate  = 0;
210   dst.Predicate = 0;
211   dst.PredNegate = 0;
212   dst.PredSwizzleX = TGSI_SWIZZLE_X;
213   dst.PredSwizzleY = TGSI_SWIZZLE_Y;
214   dst.PredSwizzleZ = TGSI_SWIZZLE_Z;
215   dst.PredSwizzleW = TGSI_SWIZZLE_W;
216   dst.Index     = index;
217
218   return dst;
219}
220
221static INLINE struct ureg_src
222ureg_src_register( unsigned file,
223                   unsigned index )
224{
225   struct ureg_src src;
226
227   src.File     = file;
228   src.SwizzleX = TGSI_SWIZZLE_X;
229   src.SwizzleY = TGSI_SWIZZLE_Y;
230   src.SwizzleZ = TGSI_SWIZZLE_Z;
231   src.SwizzleW = TGSI_SWIZZLE_W;
232   src.Pad      = 0;
233   src.Indirect = 0;
234   src.IndirectIndex = 0;
235   src.IndirectSwizzle = 0;
236   src.Absolute = 0;
237   src.Index    = index;
238   src.Negate   = 0;
239
240   return src;
241}
242
243
244
245
246struct ureg_src
247ureg_DECL_fs_input( struct ureg_program *ureg,
248                    unsigned name,
249                    unsigned index,
250                    unsigned interp_mode )
251{
252   unsigned i;
253
254   for (i = 0; i < ureg->nr_fs_inputs; i++) {
255      if (ureg->fs_input[i].semantic_name == name &&
256          ureg->fs_input[i].semantic_index == index)
257         goto out;
258   }
259
260   if (ureg->nr_fs_inputs < UREG_MAX_INPUT) {
261      ureg->fs_input[i].semantic_name = name;
262      ureg->fs_input[i].semantic_index = index;
263      ureg->fs_input[i].interp = interp_mode;
264      ureg->nr_fs_inputs++;
265   }
266   else {
267      set_bad( ureg );
268   }
269
270out:
271   return ureg_src_register( TGSI_FILE_INPUT, i );
272}
273
274
275struct ureg_src
276ureg_DECL_vs_input( struct ureg_program *ureg,
277                    unsigned index )
278{
279   assert(ureg->processor == TGSI_PROCESSOR_VERTEX);
280
281   ureg->vs_inputs[index/32] |= 1 << (index % 32);
282   return ureg_src_register( TGSI_FILE_INPUT, index );
283}
284
285
286struct ureg_src
287ureg_DECL_gs_input(struct ureg_program *ureg,
288                   unsigned index)
289{
290   if (ureg->nr_gs_inputs < UREG_MAX_INPUT) {
291      ureg->gs_input[ureg->nr_gs_inputs].index = index;
292      ureg->nr_gs_inputs++;
293   } else {
294      set_bad(ureg);
295   }
296
297   /* XXX: Add suport for true 2D input registers. */
298   return ureg_src_register(TGSI_FILE_INPUT, index);
299}
300
301
302struct ureg_dst
303ureg_DECL_output( struct ureg_program *ureg,
304                  unsigned name,
305                  unsigned index )
306{
307   unsigned i;
308
309   for (i = 0; i < ureg->nr_outputs; i++) {
310      if (ureg->output[i].semantic_name == name &&
311          ureg->output[i].semantic_index == index)
312         goto out;
313   }
314
315   if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
316      ureg->output[i].semantic_name = name;
317      ureg->output[i].semantic_index = index;
318      ureg->nr_outputs++;
319   }
320   else {
321      set_bad( ureg );
322   }
323
324out:
325   return ureg_dst_register( TGSI_FILE_OUTPUT, i );
326}
327
328
329/* Returns a new constant register.  Keep track of which have been
330 * referred to so that we can emit decls later.
331 *
332 * There is nothing in this code to bind this constant to any tracked
333 * value or manage any constant_buffer contents -- that's the
334 * resposibility of the calling code.
335 */
336struct ureg_src ureg_DECL_constant(struct ureg_program *ureg,
337                                   unsigned index )
338{
339   unsigned minconst = index, maxconst = index;
340   unsigned i;
341
342   /* Inside existing range?
343    */
344   for (i = 0; i < ureg->nr_constant_ranges; i++) {
345      if (ureg->constant_range[i].first <= index &&
346          ureg->constant_range[i].last >= index)
347         goto out;
348   }
349
350   /* Extend existing range?
351    */
352   for (i = 0; i < ureg->nr_constant_ranges; i++) {
353      if (ureg->constant_range[i].last == index - 1) {
354         ureg->constant_range[i].last = index;
355         goto out;
356      }
357
358      if (ureg->constant_range[i].first == index + 1) {
359         ureg->constant_range[i].first = index;
360         goto out;
361      }
362
363      minconst = MIN2(minconst, ureg->constant_range[i].first);
364      maxconst = MAX2(maxconst, ureg->constant_range[i].last);
365   }
366
367   /* Create new range?
368    */
369   if (ureg->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
370      i = ureg->nr_constant_ranges++;
371      ureg->constant_range[i].first = index;
372      ureg->constant_range[i].last = index;
373      goto out;
374   }
375
376   /* Collapse all ranges down to one:
377    */
378   i = 0;
379   ureg->constant_range[0].first = minconst;
380   ureg->constant_range[0].last = maxconst;
381   ureg->nr_constant_ranges = 1;
382
383out:
384   assert(i < ureg->nr_constant_ranges);
385   assert(ureg->constant_range[i].first <= index);
386   assert(ureg->constant_range[i].last >= index);
387   return ureg_src_register( TGSI_FILE_CONSTANT, index );
388}
389
390
391/* Allocate a new temporary.  Temporaries greater than UREG_MAX_TEMP
392 * are legal, but will not be released.
393 */
394struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
395{
396   unsigned i;
397
398   for (i = 0; i < UREG_MAX_TEMP; i += 32) {
399      int bit = ffs(~ureg->temps_active[i/32]);
400      if (bit != 0) {
401         i += bit - 1;
402         goto out;
403      }
404   }
405
406   /* No reusable temps, so allocate a new one:
407    */
408   i = ureg->nr_temps++;
409
410out:
411   if (i < UREG_MAX_TEMP)
412      ureg->temps_active[i/32] |= 1 << (i % 32);
413
414   if (i >= ureg->nr_temps)
415      ureg->nr_temps = i + 1;
416
417   return ureg_dst_register( TGSI_FILE_TEMPORARY, i );
418}
419
420
421void ureg_release_temporary( struct ureg_program *ureg,
422                             struct ureg_dst tmp )
423{
424   if(tmp.File == TGSI_FILE_TEMPORARY)
425      if (tmp.Index < UREG_MAX_TEMP)
426         ureg->temps_active[tmp.Index/32] &= ~(1 << (tmp.Index % 32));
427}
428
429
430/* Allocate a new address register.
431 */
432struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
433{
434   if (ureg->nr_addrs < UREG_MAX_ADDR)
435      return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ );
436
437   assert( 0 );
438   return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
439}
440
441/* Allocate a new loop register.
442 */
443struct ureg_dst
444ureg_DECL_loop(struct ureg_program *ureg)
445{
446   if (ureg->nr_loops < UREG_MAX_LOOP) {
447      return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++);
448   }
449
450   assert(0);
451   return ureg_dst_register(TGSI_FILE_LOOP, 0);
452}
453
454/* Allocate a new predicate register.
455 */
456struct ureg_dst
457ureg_DECL_predicate(struct ureg_program *ureg)
458{
459   if (ureg->nr_preds < UREG_MAX_PRED) {
460      return ureg_dst_register(TGSI_FILE_PREDICATE, ureg->nr_preds++);
461   }
462
463   assert(0);
464   return ureg_dst_register(TGSI_FILE_PREDICATE, 0);
465}
466
467/* Allocate a new sampler.
468 */
469struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
470                                   unsigned nr )
471{
472   unsigned i;
473
474   for (i = 0; i < ureg->nr_samplers; i++)
475      if (ureg->sampler[i].Index == nr)
476         return ureg->sampler[i];
477
478   if (i < PIPE_MAX_SAMPLERS) {
479      ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr );
480      ureg->nr_samplers++;
481      return ureg->sampler[i];
482   }
483
484   assert( 0 );
485   return ureg->sampler[0];
486}
487
488
489
490
491static int match_or_expand_immediate( const float *v,
492                                      unsigned nr,
493                                      float *v2,
494                                      unsigned *nr2,
495                                      unsigned *swizzle )
496{
497   unsigned i, j;
498
499   *swizzle = 0;
500
501   for (i = 0; i < nr; i++) {
502      boolean found = FALSE;
503
504      for (j = 0; j < *nr2 && !found; j++) {
505         if (v[i] == v2[j]) {
506            *swizzle |= j << (i * 2);
507            found = TRUE;
508         }
509      }
510
511      if (!found) {
512         if (*nr2 >= 4)
513            return FALSE;
514
515         v2[*nr2] = v[i];
516         *swizzle |= *nr2 << (i * 2);
517         (*nr2)++;
518      }
519   }
520
521   return TRUE;
522}
523
524
525
526
527struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg,
528                                     const float *v,
529                                     unsigned nr )
530{
531   unsigned i, j;
532   unsigned swizzle;
533
534   /* Could do a first pass where we examine all existing immediates
535    * without expanding.
536    */
537
538   for (i = 0; i < ureg->nr_immediates; i++) {
539      if (match_or_expand_immediate( v,
540                                     nr,
541                                     ureg->immediate[i].v,
542                                     &ureg->immediate[i].nr,
543                                     &swizzle ))
544         goto out;
545   }
546
547   if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) {
548      i = ureg->nr_immediates++;
549      if (match_or_expand_immediate( v,
550                                     nr,
551                                     ureg->immediate[i].v,
552                                     &ureg->immediate[i].nr,
553                                     &swizzle ))
554         goto out;
555   }
556
557   set_bad( ureg );
558
559out:
560   /* Make sure that all referenced elements are from this immediate.
561    * Has the effect of making size-one immediates into scalars.
562    */
563   for (j = nr; j < 4; j++)
564      swizzle |= (swizzle & 0x3) << (j * 2);
565
566   return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ),
567                        (swizzle >> 0) & 0x3,
568                        (swizzle >> 2) & 0x3,
569                        (swizzle >> 4) & 0x3,
570                        (swizzle >> 6) & 0x3);
571}
572
573
574void
575ureg_emit_src( struct ureg_program *ureg,
576               struct ureg_src src )
577{
578   unsigned size = 1 + (src.Indirect ? 1 : 0);
579
580   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
581   unsigned n = 0;
582
583   assert(src.File != TGSI_FILE_NULL);
584   assert(src.File != TGSI_FILE_OUTPUT);
585   assert(src.File < TGSI_FILE_COUNT);
586
587   out[n].value = 0;
588   out[n].src.File = src.File;
589   out[n].src.SwizzleX = src.SwizzleX;
590   out[n].src.SwizzleY = src.SwizzleY;
591   out[n].src.SwizzleZ = src.SwizzleZ;
592   out[n].src.SwizzleW = src.SwizzleW;
593   out[n].src.Index = src.Index;
594   out[n].src.Negate = src.Negate;
595   out[0].src.Absolute = src.Absolute;
596   n++;
597
598   if (src.Indirect) {
599      out[0].src.Indirect = 1;
600      out[n].value = 0;
601      out[n].src.File = TGSI_FILE_ADDRESS;
602      out[n].src.SwizzleX = src.IndirectSwizzle;
603      out[n].src.SwizzleY = src.IndirectSwizzle;
604      out[n].src.SwizzleZ = src.IndirectSwizzle;
605      out[n].src.SwizzleW = src.IndirectSwizzle;
606      out[n].src.Index = src.IndirectIndex;
607      n++;
608   }
609
610   assert(n == size);
611}
612
613
614void
615ureg_emit_dst( struct ureg_program *ureg,
616               struct ureg_dst dst )
617{
618   unsigned size = (1 +
619                    (dst.Indirect ? 1 : 0));
620
621   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
622   unsigned n = 0;
623
624   assert(dst.File != TGSI_FILE_NULL);
625   assert(dst.File != TGSI_FILE_CONSTANT);
626   assert(dst.File != TGSI_FILE_INPUT);
627   assert(dst.File != TGSI_FILE_SAMPLER);
628   assert(dst.File != TGSI_FILE_IMMEDIATE);
629   assert(dst.File < TGSI_FILE_COUNT);
630
631   out[n].value = 0;
632   out[n].dst.File = dst.File;
633   out[n].dst.WriteMask = dst.WriteMask;
634   out[n].dst.Indirect = dst.Indirect;
635   out[n].dst.Index = dst.Index;
636   n++;
637
638   if (dst.Indirect) {
639      out[n].value = 0;
640      out[n].src.File = TGSI_FILE_ADDRESS;
641      out[n].src.SwizzleX = dst.IndirectSwizzle;
642      out[n].src.SwizzleY = dst.IndirectSwizzle;
643      out[n].src.SwizzleZ = dst.IndirectSwizzle;
644      out[n].src.SwizzleW = dst.IndirectSwizzle;
645      out[n].src.Index = dst.IndirectIndex;
646      n++;
647   }
648
649   assert(n == size);
650}
651
652
653static void validate( unsigned opcode,
654                      unsigned nr_dst,
655                      unsigned nr_src )
656{
657#ifdef DEBUG
658   const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
659   assert(info);
660   if(info) {
661      assert(nr_dst == info->num_dst);
662      assert(nr_src == info->num_src);
663   }
664#endif
665}
666
667struct ureg_emit_insn_result
668ureg_emit_insn(struct ureg_program *ureg,
669               unsigned opcode,
670               boolean saturate,
671               boolean predicate,
672               boolean pred_negate,
673               unsigned pred_swizzle_x,
674               unsigned pred_swizzle_y,
675               unsigned pred_swizzle_z,
676               unsigned pred_swizzle_w,
677               unsigned num_dst,
678               unsigned num_src )
679{
680   union tgsi_any_token *out;
681   uint count = predicate ? 2 : 1;
682   struct ureg_emit_insn_result result;
683
684   validate( opcode, num_dst, num_src );
685
686   out = get_tokens( ureg, DOMAIN_INSN, count );
687   out[0].insn = tgsi_default_instruction();
688   out[0].insn.Opcode = opcode;
689   out[0].insn.Saturate = saturate;
690   out[0].insn.NumDstRegs = num_dst;
691   out[0].insn.NumSrcRegs = num_src;
692
693   result.insn_token = ureg->domain[DOMAIN_INSN].count - count;
694   result.extended_token = result.insn_token;
695
696   if (predicate) {
697      out[0].insn.Predicate = 1;
698      out[1].insn_predicate = tgsi_default_instruction_predicate();
699      out[1].insn_predicate.Negate = pred_negate;
700      out[1].insn_predicate.SwizzleX = pred_swizzle_x;
701      out[1].insn_predicate.SwizzleY = pred_swizzle_y;
702      out[1].insn_predicate.SwizzleZ = pred_swizzle_z;
703      out[1].insn_predicate.SwizzleW = pred_swizzle_w;
704   }
705
706   ureg->nr_instructions++;
707
708   return result;
709}
710
711
712void
713ureg_emit_label(struct ureg_program *ureg,
714                unsigned extended_token,
715                unsigned *label_token )
716{
717   union tgsi_any_token *out, *insn;
718
719   if(!label_token)
720      return;
721
722   out = get_tokens( ureg, DOMAIN_INSN, 1 );
723   out[0].value = 0;
724
725   insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
726   insn->insn.Label = 1;
727
728   *label_token = ureg->domain[DOMAIN_INSN].count - 1;
729}
730
731/* Will return a number which can be used in a label to point to the
732 * next instruction to be emitted.
733 */
734unsigned
735ureg_get_instruction_number( struct ureg_program *ureg )
736{
737   return ureg->nr_instructions;
738}
739
740/* Patch a given label (expressed as a token number) to point to a
741 * given instruction (expressed as an instruction number).
742 */
743void
744ureg_fixup_label(struct ureg_program *ureg,
745                 unsigned label_token,
746                 unsigned instruction_number )
747{
748   union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token );
749
750   out->insn_label.Label = instruction_number;
751}
752
753
754void
755ureg_emit_texture(struct ureg_program *ureg,
756                  unsigned extended_token,
757                  unsigned target )
758{
759   union tgsi_any_token *out, *insn;
760
761   out = get_tokens( ureg, DOMAIN_INSN, 1 );
762   insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
763
764   insn->insn.Texture = 1;
765
766   out[0].value = 0;
767   out[0].insn_texture.Texture = target;
768}
769
770
771void
772ureg_fixup_insn_size(struct ureg_program *ureg,
773                     unsigned insn )
774{
775   union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn );
776
777   assert(out->insn.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
778   out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1;
779}
780
781
782void
783ureg_insn(struct ureg_program *ureg,
784          unsigned opcode,
785          const struct ureg_dst *dst,
786          unsigned nr_dst,
787          const struct ureg_src *src,
788          unsigned nr_src )
789{
790   struct ureg_emit_insn_result insn;
791   unsigned i;
792   boolean saturate;
793   boolean predicate;
794   boolean negate;
795   unsigned swizzle[4];
796
797   saturate = nr_dst ? dst[0].Saturate : FALSE;
798   predicate = nr_dst ? dst[0].Predicate : FALSE;
799   if (predicate) {
800      negate = dst[0].PredNegate;
801      swizzle[0] = dst[0].PredSwizzleX;
802      swizzle[1] = dst[0].PredSwizzleY;
803      swizzle[2] = dst[0].PredSwizzleZ;
804      swizzle[3] = dst[0].PredSwizzleW;
805   }
806
807   insn = ureg_emit_insn(ureg,
808                         opcode,
809                         saturate,
810                         predicate,
811                         negate,
812                         swizzle[0],
813                         swizzle[1],
814                         swizzle[2],
815                         swizzle[3],
816                         nr_dst,
817                         nr_src);
818
819   for (i = 0; i < nr_dst; i++)
820      ureg_emit_dst( ureg, dst[i] );
821
822   for (i = 0; i < nr_src; i++)
823      ureg_emit_src( ureg, src[i] );
824
825   ureg_fixup_insn_size( ureg, insn.insn_token );
826}
827
828void
829ureg_tex_insn(struct ureg_program *ureg,
830              unsigned opcode,
831              const struct ureg_dst *dst,
832              unsigned nr_dst,
833              unsigned target,
834              const struct ureg_src *src,
835              unsigned nr_src )
836{
837   struct ureg_emit_insn_result insn;
838   unsigned i;
839   boolean saturate;
840   boolean predicate;
841   boolean negate;
842   unsigned swizzle[4];
843
844   saturate = nr_dst ? dst[0].Saturate : FALSE;
845   predicate = nr_dst ? dst[0].Predicate : FALSE;
846   if (predicate) {
847      negate = dst[0].PredNegate;
848      swizzle[0] = dst[0].PredSwizzleX;
849      swizzle[1] = dst[0].PredSwizzleY;
850      swizzle[2] = dst[0].PredSwizzleZ;
851      swizzle[3] = dst[0].PredSwizzleW;
852   }
853
854   insn = ureg_emit_insn(ureg,
855                         opcode,
856                         saturate,
857                         predicate,
858                         negate,
859                         swizzle[0],
860                         swizzle[1],
861                         swizzle[2],
862                         swizzle[3],
863                         nr_dst,
864                         nr_src);
865
866   ureg_emit_texture( ureg, insn.extended_token, target );
867
868   for (i = 0; i < nr_dst; i++)
869      ureg_emit_dst( ureg, dst[i] );
870
871   for (i = 0; i < nr_src; i++)
872      ureg_emit_src( ureg, src[i] );
873
874   ureg_fixup_insn_size( ureg, insn.insn_token );
875}
876
877
878void
879ureg_label_insn(struct ureg_program *ureg,
880                unsigned opcode,
881                const struct ureg_src *src,
882                unsigned nr_src,
883                unsigned *label_token )
884{
885   struct ureg_emit_insn_result insn;
886   unsigned i;
887
888   insn = ureg_emit_insn(ureg,
889                         opcode,
890                         FALSE,
891                         FALSE,
892                         FALSE,
893                         TGSI_SWIZZLE_X,
894                         TGSI_SWIZZLE_Y,
895                         TGSI_SWIZZLE_Z,
896                         TGSI_SWIZZLE_W,
897                         0,
898                         nr_src);
899
900   ureg_emit_label( ureg, insn.extended_token, label_token );
901
902   for (i = 0; i < nr_src; i++)
903      ureg_emit_src( ureg, src[i] );
904
905   ureg_fixup_insn_size( ureg, insn.insn_token );
906}
907
908
909
910static void emit_decl( struct ureg_program *ureg,
911                       unsigned file,
912                       unsigned index,
913                       unsigned semantic_name,
914                       unsigned semantic_index,
915                       unsigned interp )
916{
917   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 );
918
919   out[0].value = 0;
920   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
921   out[0].decl.NrTokens = 3;
922   out[0].decl.File = file;
923   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
924   out[0].decl.Interpolate = interp;
925   out[0].decl.Semantic = 1;
926
927   out[1].value = 0;
928   out[1].decl_range.First =
929      out[1].decl_range.Last = index;
930
931   out[2].value = 0;
932   out[2].decl_semantic.Name = semantic_name;
933   out[2].decl_semantic.Index = semantic_index;
934
935}
936
937
938static void emit_decl_range( struct ureg_program *ureg,
939                             unsigned file,
940                             unsigned first,
941                             unsigned count )
942{
943   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
944
945   out[0].value = 0;
946   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
947   out[0].decl.NrTokens = 2;
948   out[0].decl.File = file;
949   out[0].decl.UsageMask = 0xf;
950   out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
951   out[0].decl.Semantic = 0;
952
953   out[1].value = 0;
954   out[1].decl_range.First = first;
955   out[1].decl_range.Last = first + count - 1;
956}
957
958static void emit_immediate( struct ureg_program *ureg,
959                            const float *v )
960{
961   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 );
962
963   out[0].value = 0;
964   out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
965   out[0].imm.NrTokens = 5;
966   out[0].imm.DataType = TGSI_IMM_FLOAT32;
967   out[0].imm.Padding = 0;
968
969   out[1].imm_data.Float = v[0];
970   out[2].imm_data.Float = v[1];
971   out[3].imm_data.Float = v[2];
972   out[4].imm_data.Float = v[3];
973}
974
975
976
977
978static void emit_decls( struct ureg_program *ureg )
979{
980   unsigned i;
981
982   if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
983      for (i = 0; i < UREG_MAX_INPUT; i++) {
984         if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
985            emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
986         }
987      }
988   } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) {
989      for (i = 0; i < ureg->nr_fs_inputs; i++) {
990         emit_decl( ureg,
991                    TGSI_FILE_INPUT,
992                    i,
993                    ureg->fs_input[i].semantic_name,
994                    ureg->fs_input[i].semantic_index,
995                    ureg->fs_input[i].interp );
996      }
997   } else {
998      for (i = 0; i < ureg->nr_gs_inputs; i++) {
999         emit_decl_range(ureg,
1000                         TGSI_FILE_INPUT,
1001                         ureg->gs_input[i].index,
1002                         1);
1003      }
1004   }
1005
1006   for (i = 0; i < ureg->nr_outputs; i++) {
1007      emit_decl( ureg,
1008                 TGSI_FILE_OUTPUT,
1009                 i,
1010                 ureg->output[i].semantic_name,
1011                 ureg->output[i].semantic_index,
1012                 TGSI_INTERPOLATE_CONSTANT );
1013   }
1014
1015   for (i = 0; i < ureg->nr_samplers; i++) {
1016      emit_decl_range( ureg,
1017                       TGSI_FILE_SAMPLER,
1018                       ureg->sampler[i].Index, 1 );
1019   }
1020
1021   if (ureg->nr_constant_ranges) {
1022      for (i = 0; i < ureg->nr_constant_ranges; i++)
1023         emit_decl_range( ureg,
1024                          TGSI_FILE_CONSTANT,
1025                          ureg->constant_range[i].first,
1026                          (ureg->constant_range[i].last + 1 -
1027                           ureg->constant_range[i].first) );
1028   }
1029
1030   if (ureg->nr_temps) {
1031      emit_decl_range( ureg,
1032                       TGSI_FILE_TEMPORARY,
1033                       0, ureg->nr_temps );
1034   }
1035
1036   if (ureg->nr_addrs) {
1037      emit_decl_range( ureg,
1038                       TGSI_FILE_ADDRESS,
1039                       0, ureg->nr_addrs );
1040   }
1041
1042   if (ureg->nr_loops) {
1043      emit_decl_range(ureg,
1044                      TGSI_FILE_LOOP,
1045                      0,
1046                      ureg->nr_loops);
1047   }
1048
1049   if (ureg->nr_preds) {
1050      emit_decl_range(ureg,
1051                      TGSI_FILE_PREDICATE,
1052                      0,
1053                      ureg->nr_preds);
1054   }
1055
1056   for (i = 0; i < ureg->nr_immediates; i++) {
1057      emit_immediate( ureg,
1058                      ureg->immediate[i].v );
1059   }
1060}
1061
1062/* Append the instruction tokens onto the declarations to build a
1063 * contiguous stream suitable to send to the driver.
1064 */
1065static void copy_instructions( struct ureg_program *ureg )
1066{
1067   unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count;
1068   union tgsi_any_token *out = get_tokens( ureg,
1069                                           DOMAIN_DECL,
1070                                           nr_tokens );
1071
1072   memcpy(out,
1073          ureg->domain[DOMAIN_INSN].tokens,
1074          nr_tokens * sizeof out[0] );
1075}
1076
1077
1078static void
1079fixup_header_size(struct ureg_program *ureg)
1080{
1081   union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 );
1082
1083   out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2;
1084}
1085
1086
1087static void
1088emit_header( struct ureg_program *ureg )
1089{
1090   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
1091
1092   out[0].header.HeaderSize = 2;
1093   out[0].header.BodySize = 0;
1094
1095   out[1].processor.Processor = ureg->processor;
1096   out[1].processor.Padding = 0;
1097}
1098
1099
1100const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
1101{
1102   const struct tgsi_token *tokens;
1103
1104   emit_header( ureg );
1105   emit_decls( ureg );
1106   copy_instructions( ureg );
1107   fixup_header_size( ureg );
1108
1109   if (ureg->domain[0].tokens == error_tokens ||
1110       ureg->domain[1].tokens == error_tokens) {
1111      debug_printf("%s: error in generated shader\n", __FUNCTION__);
1112      assert(0);
1113      return NULL;
1114   }
1115
1116   tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
1117
1118   if (0) {
1119      debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__,
1120                   ureg->domain[DOMAIN_DECL].count);
1121      tgsi_dump( tokens, 0 );
1122   }
1123
1124#if DEBUG
1125   if (tokens && !tgsi_sanity_check(tokens)) {
1126      debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n");
1127      tgsi_dump(tokens, 0);
1128      assert(0);
1129   }
1130#endif
1131
1132
1133   return tokens;
1134}
1135
1136
1137void *ureg_create_shader( struct ureg_program *ureg,
1138                          struct pipe_context *pipe )
1139{
1140   struct pipe_shader_state state;
1141
1142   state.tokens = ureg_finalize(ureg);
1143   if(!state.tokens)
1144      return NULL;
1145
1146   if (ureg->processor == TGSI_PROCESSOR_VERTEX)
1147      return pipe->create_vs_state( pipe, &state );
1148   else
1149      return pipe->create_fs_state( pipe, &state );
1150}
1151
1152
1153const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg,
1154                                          unsigned *nr_tokens )
1155{
1156   const struct tgsi_token *tokens;
1157
1158   ureg_finalize(ureg);
1159
1160   tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
1161
1162   if (nr_tokens)
1163      *nr_tokens = ureg->domain[DOMAIN_DECL].size;
1164
1165   ureg->domain[DOMAIN_DECL].tokens = 0;
1166   ureg->domain[DOMAIN_DECL].size = 0;
1167   ureg->domain[DOMAIN_DECL].order = 0;
1168   ureg->domain[DOMAIN_DECL].count = 0;
1169
1170   return tokens;
1171}
1172
1173
1174struct ureg_program *ureg_create( unsigned processor )
1175{
1176   struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
1177   if (ureg == NULL)
1178      return NULL;
1179
1180   ureg->processor = processor;
1181   return ureg;
1182}
1183
1184
1185void ureg_destroy( struct ureg_program *ureg )
1186{
1187   unsigned i;
1188
1189   for (i = 0; i < Elements(ureg->domain); i++) {
1190      if (ureg->domain[i].tokens &&
1191          ureg->domain[i].tokens != error_tokens)
1192         FREE(ureg->domain[i].tokens);
1193   }
1194
1195   FREE(ureg);
1196}
1197