tgsi_exec.c revision 80d3a653f0172f01be694a29456c70f1f4da1812
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * TGSI interpretor/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers.  This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 *   Michal Krol
50 *   Brian Paul
51 */
52
53#include "pipe/p_compiler.h"
54#include "pipe/p_state.h"
55#include "pipe/p_util.h"
56#include "pipe/p_shader_tokens.h"
57#include "tgsi/tgsi_parse.h"
58#include "tgsi/tgsi_util.h"
59#include "tgsi_exec.h"
60
61#define TILE_TOP_LEFT     0
62#define TILE_TOP_RIGHT    1
63#define TILE_BOTTOM_LEFT  2
64#define TILE_BOTTOM_RIGHT 3
65
66/*
67 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
68 */
69#define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
70#define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
71#define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
72#define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
73#define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
74#define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
75#define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
76#define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
77#define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
78#define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
79#define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
80#define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
81#define TEMP_128_I         TGSI_EXEC_TEMP_128_I
82#define TEMP_128_C         TGSI_EXEC_TEMP_128_C
83#define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
84#define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
85#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
86#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
87#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
88#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
89#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
90#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
91#define TEMP_3_I           TGSI_EXEC_TEMP_THREE_I
92#define TEMP_3_C           TGSI_EXEC_TEMP_THREE_C
93#define TEMP_HALF_I        TGSI_EXEC_TEMP_HALF_I
94#define TEMP_HALF_C        TGSI_EXEC_TEMP_HALF_C
95#define TEMP_R0            TGSI_EXEC_TEMP_R0
96
97#define FOR_EACH_CHANNEL(CHAN)\
98   for (CHAN = 0; CHAN < 4; CHAN++)
99
100#define IS_CHANNEL_ENABLED(INST, CHAN)\
101   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
102
103#define IS_CHANNEL_ENABLED2(INST, CHAN)\
104   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
105
106#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
107   FOR_EACH_CHANNEL( CHAN )\
108      if (IS_CHANNEL_ENABLED( INST, CHAN ))
109
110#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
111   FOR_EACH_CHANNEL( CHAN )\
112      if (IS_CHANNEL_ENABLED2( INST, CHAN ))
113
114
115/** The execution mask depends on the conditional mask and the loop mask */
116#define UPDATE_EXEC_MASK(MACH) \
117      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
118
119
120#define CHAN_X  0
121#define CHAN_Y  1
122#define CHAN_Z  2
123#define CHAN_W  3
124
125
126
127/**
128 * Initialize machine state by expanding tokens to full instructions,
129 * allocating temporary storage, setting up constants, etc.
130 * After this, we can call tgsi_exec_machine_run() many times.
131 */
132void
133tgsi_exec_machine_bind_shader(
134   struct tgsi_exec_machine *mach,
135   const struct tgsi_token *tokens,
136   uint numSamplers,
137   struct tgsi_sampler *samplers)
138{
139   uint k;
140   struct tgsi_parse_context parse;
141   struct tgsi_exec_labels *labels = &mach->Labels;
142   struct tgsi_full_instruction *instructions;
143   struct tgsi_full_declaration *declarations;
144   uint maxInstructions = 10, numInstructions = 0;
145   uint maxDeclarations = 10, numDeclarations = 0;
146   uint instno = 0;
147
148#if 0
149   tgsi_dump(tokens, 0);
150#endif
151
152   mach->Tokens = tokens;
153   mach->Samplers = samplers;
154
155   k = tgsi_parse_init (&parse, mach->Tokens);
156   if (k != TGSI_PARSE_OK) {
157      debug_printf( "Problem parsing!\n" );
158      return;
159   }
160
161   mach->Processor = parse.FullHeader.Processor.Processor;
162   mach->ImmLimit = 0;
163   labels->count = 0;
164
165   declarations = (struct tgsi_full_declaration *)
166      MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
167
168   if (!declarations) {
169      return;
170   }
171
172   instructions = (struct tgsi_full_instruction *)
173      MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
174
175   if (!instructions) {
176      FREE( declarations );
177      return;
178   }
179
180   while( !tgsi_parse_end_of_tokens( &parse ) ) {
181      uint pointer = parse.Position;
182      uint i;
183
184      tgsi_parse_token( &parse );
185      switch( parse.FullToken.Token.Type ) {
186      case TGSI_TOKEN_TYPE_DECLARATION:
187         /* save expanded declaration */
188         if (numDeclarations == maxDeclarations) {
189            declarations = REALLOC(declarations,
190                                   maxDeclarations
191                                   * sizeof(struct tgsi_full_declaration),
192                                   (maxDeclarations + 10)
193                                   * sizeof(struct tgsi_full_declaration));
194            maxDeclarations += 10;
195         }
196         memcpy(declarations + numDeclarations,
197                &parse.FullToken.FullDeclaration,
198                sizeof(declarations[0]));
199         numDeclarations++;
200         break;
201
202      case TGSI_TOKEN_TYPE_IMMEDIATE:
203         {
204            uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
205            assert( size % 4 == 0 );
206            assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
207
208            for( i = 0; i < size; i++ ) {
209               mach->Imms[mach->ImmLimit + i / 4][i % 4] =
210		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
211            }
212            mach->ImmLimit += size / 4;
213         }
214         break;
215
216      case TGSI_TOKEN_TYPE_INSTRUCTION:
217         assert( labels->count < MAX_LABELS );
218
219         labels->labels[labels->count][0] = instno;
220         labels->labels[labels->count][1] = pointer;
221         labels->count++;
222
223         /* save expanded instruction */
224         if (numInstructions == maxInstructions) {
225            instructions = REALLOC(instructions,
226                                   maxInstructions
227                                   * sizeof(struct tgsi_full_instruction),
228                                   (maxInstructions + 10)
229                                   * sizeof(struct tgsi_full_instruction));
230            maxInstructions += 10;
231         }
232         memcpy(instructions + numInstructions,
233                &parse.FullToken.FullInstruction,
234                sizeof(instructions[0]));
235         numInstructions++;
236         break;
237
238      default:
239         assert( 0 );
240      }
241   }
242   tgsi_parse_free (&parse);
243
244   if (mach->Declarations) {
245      FREE( mach->Declarations );
246   }
247   mach->Declarations = declarations;
248   mach->NumDeclarations = numDeclarations;
249
250   if (mach->Instructions) {
251      FREE( mach->Instructions );
252   }
253   mach->Instructions = instructions;
254   mach->NumInstructions = numInstructions;
255}
256
257
258void
259tgsi_exec_machine_init(
260   struct tgsi_exec_machine *mach )
261{
262   uint i;
263
264   mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
265   mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
266
267   /* Setup constants. */
268   for( i = 0; i < 4; i++ ) {
269      mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
270      mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
271      mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
272      mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
273      mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
274      mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
275      mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
276      mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
277      mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
278      mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
279   }
280}
281
282
283void
284tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
285{
286   if (mach->Instructions) {
287      FREE(mach->Instructions);
288      mach->Instructions = NULL;
289      mach->NumInstructions = 0;
290   }
291   if (mach->Declarations) {
292      FREE(mach->Declarations);
293      mach->Declarations = NULL;
294      mach->NumDeclarations = 0;
295   }
296}
297
298
299static void
300micro_abs(
301   union tgsi_exec_channel *dst,
302   const union tgsi_exec_channel *src )
303{
304   dst->f[0] = fabsf( src->f[0] );
305   dst->f[1] = fabsf( src->f[1] );
306   dst->f[2] = fabsf( src->f[2] );
307   dst->f[3] = fabsf( src->f[3] );
308}
309
310static void
311micro_add(
312   union tgsi_exec_channel *dst,
313   const union tgsi_exec_channel *src0,
314   const union tgsi_exec_channel *src1 )
315{
316   dst->f[0] = src0->f[0] + src1->f[0];
317   dst->f[1] = src0->f[1] + src1->f[1];
318   dst->f[2] = src0->f[2] + src1->f[2];
319   dst->f[3] = src0->f[3] + src1->f[3];
320}
321
322static void
323micro_iadd(
324   union tgsi_exec_channel *dst,
325   const union tgsi_exec_channel *src0,
326   const union tgsi_exec_channel *src1 )
327{
328   dst->i[0] = src0->i[0] + src1->i[0];
329   dst->i[1] = src0->i[1] + src1->i[1];
330   dst->i[2] = src0->i[2] + src1->i[2];
331   dst->i[3] = src0->i[3] + src1->i[3];
332}
333
334static void
335micro_and(
336   union tgsi_exec_channel *dst,
337   const union tgsi_exec_channel *src0,
338   const union tgsi_exec_channel *src1 )
339{
340   dst->u[0] = src0->u[0] & src1->u[0];
341   dst->u[1] = src0->u[1] & src1->u[1];
342   dst->u[2] = src0->u[2] & src1->u[2];
343   dst->u[3] = src0->u[3] & src1->u[3];
344}
345
346static void
347micro_ceil(
348   union tgsi_exec_channel *dst,
349   const union tgsi_exec_channel *src )
350{
351   dst->f[0] = ceilf( src->f[0] );
352   dst->f[1] = ceilf( src->f[1] );
353   dst->f[2] = ceilf( src->f[2] );
354   dst->f[3] = ceilf( src->f[3] );
355}
356
357static void
358micro_cos(
359   union tgsi_exec_channel *dst,
360   const union tgsi_exec_channel *src )
361{
362   dst->f[0] = cosf( src->f[0] );
363   dst->f[1] = cosf( src->f[1] );
364   dst->f[2] = cosf( src->f[2] );
365   dst->f[3] = cosf( src->f[3] );
366}
367
368static void
369micro_ddx(
370   union tgsi_exec_channel *dst,
371   const union tgsi_exec_channel *src )
372{
373   dst->f[0] =
374   dst->f[1] =
375   dst->f[2] =
376   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
377}
378
379static void
380micro_ddy(
381   union tgsi_exec_channel *dst,
382   const union tgsi_exec_channel *src )
383{
384   dst->f[0] =
385   dst->f[1] =
386   dst->f[2] =
387   dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
388}
389
390static void
391micro_div(
392   union tgsi_exec_channel *dst,
393   const union tgsi_exec_channel *src0,
394   const union tgsi_exec_channel *src1 )
395{
396   if (src1->f[0] != 0) {
397      dst->f[0] = src0->f[0] / src1->f[0];
398   }
399   if (src1->f[1] != 0) {
400      dst->f[1] = src0->f[1] / src1->f[1];
401   }
402   if (src1->f[2] != 0) {
403      dst->f[2] = src0->f[2] / src1->f[2];
404   }
405   if (src1->f[3] != 0) {
406      dst->f[3] = src0->f[3] / src1->f[3];
407   }
408}
409
410static void
411micro_udiv(
412   union tgsi_exec_channel *dst,
413   const union tgsi_exec_channel *src0,
414   const union tgsi_exec_channel *src1 )
415{
416   dst->u[0] = src0->u[0] / src1->u[0];
417   dst->u[1] = src0->u[1] / src1->u[1];
418   dst->u[2] = src0->u[2] / src1->u[2];
419   dst->u[3] = src0->u[3] / src1->u[3];
420}
421
422static void
423micro_eq(
424   union tgsi_exec_channel *dst,
425   const union tgsi_exec_channel *src0,
426   const union tgsi_exec_channel *src1,
427   const union tgsi_exec_channel *src2,
428   const union tgsi_exec_channel *src3 )
429{
430   dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
431   dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
432   dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
433   dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
434}
435
436static void
437micro_ieq(
438   union tgsi_exec_channel *dst,
439   const union tgsi_exec_channel *src0,
440   const union tgsi_exec_channel *src1,
441   const union tgsi_exec_channel *src2,
442   const union tgsi_exec_channel *src3 )
443{
444   dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
445   dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
446   dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
447   dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
448}
449
450static void
451micro_exp2(
452   union tgsi_exec_channel *dst,
453   const union tgsi_exec_channel *src)
454{
455   dst->f[0] = powf( 2.0f, src->f[0] );
456   dst->f[1] = powf( 2.0f, src->f[1] );
457   dst->f[2] = powf( 2.0f, src->f[2] );
458   dst->f[3] = powf( 2.0f, src->f[3] );
459}
460
461static void
462micro_f2it(
463   union tgsi_exec_channel *dst,
464   const union tgsi_exec_channel *src )
465{
466   dst->i[0] = (int) src->f[0];
467   dst->i[1] = (int) src->f[1];
468   dst->i[2] = (int) src->f[2];
469   dst->i[3] = (int) src->f[3];
470}
471
472static void
473micro_f2ut(
474   union tgsi_exec_channel *dst,
475   const union tgsi_exec_channel *src )
476{
477   dst->u[0] = (uint) src->f[0];
478   dst->u[1] = (uint) src->f[1];
479   dst->u[2] = (uint) src->f[2];
480   dst->u[3] = (uint) src->f[3];
481}
482
483static void
484micro_flr(
485   union tgsi_exec_channel *dst,
486   const union tgsi_exec_channel *src )
487{
488   dst->f[0] = floorf( src->f[0] );
489   dst->f[1] = floorf( src->f[1] );
490   dst->f[2] = floorf( src->f[2] );
491   dst->f[3] = floorf( src->f[3] );
492}
493
494static void
495micro_frc(
496   union tgsi_exec_channel *dst,
497   const union tgsi_exec_channel *src )
498{
499   dst->f[0] = src->f[0] - floorf( src->f[0] );
500   dst->f[1] = src->f[1] - floorf( src->f[1] );
501   dst->f[2] = src->f[2] - floorf( src->f[2] );
502   dst->f[3] = src->f[3] - floorf( src->f[3] );
503}
504
505static void
506micro_ge(
507   union tgsi_exec_channel *dst,
508   const union tgsi_exec_channel *src0,
509   const union tgsi_exec_channel *src1,
510   const union tgsi_exec_channel *src2,
511   const union tgsi_exec_channel *src3 )
512{
513   dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
514   dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
515   dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
516   dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
517}
518
519static void
520micro_i2f(
521   union tgsi_exec_channel *dst,
522   const union tgsi_exec_channel *src )
523{
524   dst->f[0] = (float) src->i[0];
525   dst->f[1] = (float) src->i[1];
526   dst->f[2] = (float) src->i[2];
527   dst->f[3] = (float) src->i[3];
528}
529
530static void
531micro_lg2(
532   union tgsi_exec_channel *dst,
533   const union tgsi_exec_channel *src )
534{
535   dst->f[0] = logf( src->f[0] ) * 1.442695f;
536   dst->f[1] = logf( src->f[1] ) * 1.442695f;
537   dst->f[2] = logf( src->f[2] ) * 1.442695f;
538   dst->f[3] = logf( src->f[3] ) * 1.442695f;
539}
540
541static void
542micro_le(
543   union tgsi_exec_channel *dst,
544   const union tgsi_exec_channel *src0,
545   const union tgsi_exec_channel *src1,
546   const union tgsi_exec_channel *src2,
547   const union tgsi_exec_channel *src3 )
548{
549   dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
550   dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
551   dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
552   dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
553}
554
555static void
556micro_lt(
557   union tgsi_exec_channel *dst,
558   const union tgsi_exec_channel *src0,
559   const union tgsi_exec_channel *src1,
560   const union tgsi_exec_channel *src2,
561   const union tgsi_exec_channel *src3 )
562{
563   dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
564   dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
565   dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
566   dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
567}
568
569static void
570micro_ilt(
571   union tgsi_exec_channel *dst,
572   const union tgsi_exec_channel *src0,
573   const union tgsi_exec_channel *src1,
574   const union tgsi_exec_channel *src2,
575   const union tgsi_exec_channel *src3 )
576{
577   dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
578   dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
579   dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
580   dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
581}
582
583static void
584micro_ult(
585   union tgsi_exec_channel *dst,
586   const union tgsi_exec_channel *src0,
587   const union tgsi_exec_channel *src1,
588   const union tgsi_exec_channel *src2,
589   const union tgsi_exec_channel *src3 )
590{
591   dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
592   dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
593   dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
594   dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
595}
596
597static void
598micro_max(
599   union tgsi_exec_channel *dst,
600   const union tgsi_exec_channel *src0,
601   const union tgsi_exec_channel *src1 )
602{
603   dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
604   dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
605   dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
606   dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
607}
608
609static void
610micro_imax(
611   union tgsi_exec_channel *dst,
612   const union tgsi_exec_channel *src0,
613   const union tgsi_exec_channel *src1 )
614{
615   dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
616   dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
617   dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
618   dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
619}
620
621static void
622micro_umax(
623   union tgsi_exec_channel *dst,
624   const union tgsi_exec_channel *src0,
625   const union tgsi_exec_channel *src1 )
626{
627   dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
628   dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
629   dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
630   dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
631}
632
633static void
634micro_min(
635   union tgsi_exec_channel *dst,
636   const union tgsi_exec_channel *src0,
637   const union tgsi_exec_channel *src1 )
638{
639   dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
640   dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
641   dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
642   dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
643}
644
645static void
646micro_imin(
647   union tgsi_exec_channel *dst,
648   const union tgsi_exec_channel *src0,
649   const union tgsi_exec_channel *src1 )
650{
651   dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
652   dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
653   dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
654   dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
655}
656
657static void
658micro_umin(
659   union tgsi_exec_channel *dst,
660   const union tgsi_exec_channel *src0,
661   const union tgsi_exec_channel *src1 )
662{
663   dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
664   dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
665   dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
666   dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
667}
668
669static void
670micro_umod(
671   union tgsi_exec_channel *dst,
672   const union tgsi_exec_channel *src0,
673   const union tgsi_exec_channel *src1 )
674{
675   dst->u[0] = src0->u[0] % src1->u[0];
676   dst->u[1] = src0->u[1] % src1->u[1];
677   dst->u[2] = src0->u[2] % src1->u[2];
678   dst->u[3] = src0->u[3] % src1->u[3];
679}
680
681static void
682micro_mul(
683   union tgsi_exec_channel *dst,
684   const union tgsi_exec_channel *src0,
685   const union tgsi_exec_channel *src1 )
686{
687   dst->f[0] = src0->f[0] * src1->f[0];
688   dst->f[1] = src0->f[1] * src1->f[1];
689   dst->f[2] = src0->f[2] * src1->f[2];
690   dst->f[3] = src0->f[3] * src1->f[3];
691}
692
693static void
694micro_imul(
695   union tgsi_exec_channel *dst,
696   const union tgsi_exec_channel *src0,
697   const union tgsi_exec_channel *src1 )
698{
699   dst->i[0] = src0->i[0] * src1->i[0];
700   dst->i[1] = src0->i[1] * src1->i[1];
701   dst->i[2] = src0->i[2] * src1->i[2];
702   dst->i[3] = src0->i[3] * src1->i[3];
703}
704
705static void
706micro_imul64(
707   union tgsi_exec_channel *dst0,
708   union tgsi_exec_channel *dst1,
709   const union tgsi_exec_channel *src0,
710   const union tgsi_exec_channel *src1 )
711{
712   dst1->i[0] = src0->i[0] * src1->i[0];
713   dst1->i[1] = src0->i[1] * src1->i[1];
714   dst1->i[2] = src0->i[2] * src1->i[2];
715   dst1->i[3] = src0->i[3] * src1->i[3];
716   dst0->i[0] = 0;
717   dst0->i[1] = 0;
718   dst0->i[2] = 0;
719   dst0->i[3] = 0;
720}
721
722static void
723micro_umul64(
724   union tgsi_exec_channel *dst0,
725   union tgsi_exec_channel *dst1,
726   const union tgsi_exec_channel *src0,
727   const union tgsi_exec_channel *src1 )
728{
729   dst1->u[0] = src0->u[0] * src1->u[0];
730   dst1->u[1] = src0->u[1] * src1->u[1];
731   dst1->u[2] = src0->u[2] * src1->u[2];
732   dst1->u[3] = src0->u[3] * src1->u[3];
733   dst0->u[0] = 0;
734   dst0->u[1] = 0;
735   dst0->u[2] = 0;
736   dst0->u[3] = 0;
737}
738
739static void
740micro_movc(
741   union tgsi_exec_channel *dst,
742   const union tgsi_exec_channel *src0,
743   const union tgsi_exec_channel *src1,
744   const union tgsi_exec_channel *src2 )
745{
746   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
747   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
748   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
749   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
750}
751
752static void
753micro_neg(
754   union tgsi_exec_channel *dst,
755   const union tgsi_exec_channel *src )
756{
757   dst->f[0] = -src->f[0];
758   dst->f[1] = -src->f[1];
759   dst->f[2] = -src->f[2];
760   dst->f[3] = -src->f[3];
761}
762
763static void
764micro_ineg(
765   union tgsi_exec_channel *dst,
766   const union tgsi_exec_channel *src )
767{
768   dst->i[0] = -src->i[0];
769   dst->i[1] = -src->i[1];
770   dst->i[2] = -src->i[2];
771   dst->i[3] = -src->i[3];
772}
773
774static void
775micro_not(
776   union tgsi_exec_channel *dst,
777   const union tgsi_exec_channel *src )
778{
779   dst->u[0] = ~src->u[0];
780   dst->u[1] = ~src->u[1];
781   dst->u[2] = ~src->u[2];
782   dst->u[3] = ~src->u[3];
783}
784
785static void
786micro_or(
787   union tgsi_exec_channel *dst,
788   const union tgsi_exec_channel *src0,
789   const union tgsi_exec_channel *src1 )
790{
791   dst->u[0] = src0->u[0] | src1->u[0];
792   dst->u[1] = src0->u[1] | src1->u[1];
793   dst->u[2] = src0->u[2] | src1->u[2];
794   dst->u[3] = src0->u[3] | src1->u[3];
795}
796
797static void
798micro_pow(
799   union tgsi_exec_channel *dst,
800   const union tgsi_exec_channel *src0,
801   const union tgsi_exec_channel *src1 )
802{
803   dst->f[0] = powf( src0->f[0], src1->f[0] );
804   dst->f[1] = powf( src0->f[1], src1->f[1] );
805   dst->f[2] = powf( src0->f[2], src1->f[2] );
806   dst->f[3] = powf( src0->f[3], src1->f[3] );
807}
808
809static void
810micro_rnd(
811   union tgsi_exec_channel *dst,
812   const union tgsi_exec_channel *src )
813{
814   dst->f[0] = floorf( src->f[0] + 0.5f );
815   dst->f[1] = floorf( src->f[1] + 0.5f );
816   dst->f[2] = floorf( src->f[2] + 0.5f );
817   dst->f[3] = floorf( src->f[3] + 0.5f );
818}
819
820static void
821micro_shl(
822   union tgsi_exec_channel *dst,
823   const union tgsi_exec_channel *src0,
824   const union tgsi_exec_channel *src1 )
825{
826   dst->i[0] = src0->i[0] << src1->i[0];
827   dst->i[1] = src0->i[1] << src1->i[1];
828   dst->i[2] = src0->i[2] << src1->i[2];
829   dst->i[3] = src0->i[3] << src1->i[3];
830}
831
832static void
833micro_ishr(
834   union tgsi_exec_channel *dst,
835   const union tgsi_exec_channel *src0,
836   const union tgsi_exec_channel *src1 )
837{
838   dst->i[0] = src0->i[0] >> src1->i[0];
839   dst->i[1] = src0->i[1] >> src1->i[1];
840   dst->i[2] = src0->i[2] >> src1->i[2];
841   dst->i[3] = src0->i[3] >> src1->i[3];
842}
843
844static void
845micro_trunc(
846   union tgsi_exec_channel *dst,
847   const union tgsi_exec_channel *src0 )
848{
849   dst->f[0] = (float) (int) src0->f[0];
850   dst->f[1] = (float) (int) src0->f[1];
851   dst->f[2] = (float) (int) src0->f[2];
852   dst->f[3] = (float) (int) src0->f[3];
853}
854
855static void
856micro_ushr(
857   union tgsi_exec_channel *dst,
858   const union tgsi_exec_channel *src0,
859   const union tgsi_exec_channel *src1 )
860{
861   dst->u[0] = src0->u[0] >> src1->u[0];
862   dst->u[1] = src0->u[1] >> src1->u[1];
863   dst->u[2] = src0->u[2] >> src1->u[2];
864   dst->u[3] = src0->u[3] >> src1->u[3];
865}
866
867static void
868micro_sin(
869   union tgsi_exec_channel *dst,
870   const union tgsi_exec_channel *src )
871{
872   dst->f[0] = sinf( src->f[0] );
873   dst->f[1] = sinf( src->f[1] );
874   dst->f[2] = sinf( src->f[2] );
875   dst->f[3] = sinf( src->f[3] );
876}
877
878static void
879micro_sqrt( union tgsi_exec_channel *dst,
880            const union tgsi_exec_channel *src )
881{
882   dst->f[0] = sqrtf( src->f[0] );
883   dst->f[1] = sqrtf( src->f[1] );
884   dst->f[2] = sqrtf( src->f[2] );
885   dst->f[3] = sqrtf( src->f[3] );
886}
887
888static void
889micro_sub(
890   union tgsi_exec_channel *dst,
891   const union tgsi_exec_channel *src0,
892   const union tgsi_exec_channel *src1 )
893{
894   dst->f[0] = src0->f[0] - src1->f[0];
895   dst->f[1] = src0->f[1] - src1->f[1];
896   dst->f[2] = src0->f[2] - src1->f[2];
897   dst->f[3] = src0->f[3] - src1->f[3];
898}
899
900static void
901micro_u2f(
902   union tgsi_exec_channel *dst,
903   const union tgsi_exec_channel *src )
904{
905   dst->f[0] = (float) src->u[0];
906   dst->f[1] = (float) src->u[1];
907   dst->f[2] = (float) src->u[2];
908   dst->f[3] = (float) src->u[3];
909}
910
911static void
912micro_xor(
913   union tgsi_exec_channel *dst,
914   const union tgsi_exec_channel *src0,
915   const union tgsi_exec_channel *src1 )
916{
917   dst->u[0] = src0->u[0] ^ src1->u[0];
918   dst->u[1] = src0->u[1] ^ src1->u[1];
919   dst->u[2] = src0->u[2] ^ src1->u[2];
920   dst->u[3] = src0->u[3] ^ src1->u[3];
921}
922
923static void
924fetch_src_file_channel(
925   const struct tgsi_exec_machine *mach,
926   const uint file,
927   const uint swizzle,
928   const union tgsi_exec_channel *index,
929   union tgsi_exec_channel *chan )
930{
931   switch( swizzle ) {
932   case TGSI_EXTSWIZZLE_X:
933   case TGSI_EXTSWIZZLE_Y:
934   case TGSI_EXTSWIZZLE_Z:
935   case TGSI_EXTSWIZZLE_W:
936      switch( file ) {
937      case TGSI_FILE_CONSTANT:
938         chan->f[0] = mach->Consts[index->i[0]][swizzle];
939         chan->f[1] = mach->Consts[index->i[1]][swizzle];
940         chan->f[2] = mach->Consts[index->i[2]][swizzle];
941         chan->f[3] = mach->Consts[index->i[3]][swizzle];
942         break;
943
944      case TGSI_FILE_INPUT:
945         chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
946         chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
947         chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
948         chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
949         break;
950
951      case TGSI_FILE_TEMPORARY:
952         assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
953         chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
954         chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
955         chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
956         chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
957         break;
958
959      case TGSI_FILE_IMMEDIATE:
960         assert( index->i[0] < (int) mach->ImmLimit );
961         chan->f[0] = mach->Imms[index->i[0]][swizzle];
962         assert( index->i[1] < (int) mach->ImmLimit );
963         chan->f[1] = mach->Imms[index->i[1]][swizzle];
964         assert( index->i[2] < (int) mach->ImmLimit );
965         chan->f[2] = mach->Imms[index->i[2]][swizzle];
966         assert( index->i[3] < (int) mach->ImmLimit );
967         chan->f[3] = mach->Imms[index->i[3]][swizzle];
968         break;
969
970      case TGSI_FILE_ADDRESS:
971         chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
972         chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
973         chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
974         chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
975         break;
976
977      case TGSI_FILE_OUTPUT:
978         /* vertex/fragment output vars can be read too */
979         chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
980         chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
981         chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
982         chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
983         break;
984
985      default:
986         assert( 0 );
987      }
988      break;
989
990   case TGSI_EXTSWIZZLE_ZERO:
991      *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
992      break;
993
994   case TGSI_EXTSWIZZLE_ONE:
995      *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
996      break;
997
998   default:
999      assert( 0 );
1000   }
1001}
1002
1003static void
1004fetch_source(
1005   const struct tgsi_exec_machine *mach,
1006   union tgsi_exec_channel *chan,
1007   const struct tgsi_full_src_register *reg,
1008   const uint chan_index )
1009{
1010   union tgsi_exec_channel index;
1011   uint swizzle;
1012
1013   index.i[0] =
1014   index.i[1] =
1015   index.i[2] =
1016   index.i[3] = reg->SrcRegister.Index;
1017
1018   if (reg->SrcRegister.Indirect) {
1019      union tgsi_exec_channel index2;
1020      union tgsi_exec_channel indir_index;
1021
1022      index2.i[0] =
1023      index2.i[1] =
1024      index2.i[2] =
1025      index2.i[3] = reg->SrcRegisterInd.Index;
1026
1027      swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1028      fetch_src_file_channel(
1029         mach,
1030         reg->SrcRegisterInd.File,
1031         swizzle,
1032         &index2,
1033         &indir_index );
1034
1035      index.i[0] += indir_index.i[0];
1036      index.i[1] += indir_index.i[1];
1037      index.i[2] += indir_index.i[2];
1038      index.i[3] += indir_index.i[3];
1039   }
1040
1041   if( reg->SrcRegister.Dimension ) {
1042      switch( reg->SrcRegister.File ) {
1043      case TGSI_FILE_INPUT:
1044         index.i[0] *= 17;
1045         index.i[1] *= 17;
1046         index.i[2] *= 17;
1047         index.i[3] *= 17;
1048         break;
1049      case TGSI_FILE_CONSTANT:
1050         index.i[0] *= 4096;
1051         index.i[1] *= 4096;
1052         index.i[2] *= 4096;
1053         index.i[3] *= 4096;
1054         break;
1055      default:
1056         assert( 0 );
1057      }
1058
1059      index.i[0] += reg->SrcRegisterDim.Index;
1060      index.i[1] += reg->SrcRegisterDim.Index;
1061      index.i[2] += reg->SrcRegisterDim.Index;
1062      index.i[3] += reg->SrcRegisterDim.Index;
1063
1064      if (reg->SrcRegisterDim.Indirect) {
1065         union tgsi_exec_channel index2;
1066         union tgsi_exec_channel indir_index;
1067
1068         index2.i[0] =
1069         index2.i[1] =
1070         index2.i[2] =
1071         index2.i[3] = reg->SrcRegisterDimInd.Index;
1072
1073         swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1074         fetch_src_file_channel(
1075            mach,
1076            reg->SrcRegisterDimInd.File,
1077            swizzle,
1078            &index2,
1079            &indir_index );
1080
1081         index.i[0] += indir_index.i[0];
1082         index.i[1] += indir_index.i[1];
1083         index.i[2] += indir_index.i[2];
1084         index.i[3] += indir_index.i[3];
1085      }
1086   }
1087
1088   swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1089   fetch_src_file_channel(
1090      mach,
1091      reg->SrcRegister.File,
1092      swizzle,
1093      &index,
1094      chan );
1095
1096   switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1097   case TGSI_UTIL_SIGN_CLEAR:
1098      micro_abs( chan, chan );
1099      break;
1100
1101   case TGSI_UTIL_SIGN_SET:
1102      micro_abs( chan, chan );
1103      micro_neg( chan, chan );
1104      break;
1105
1106   case TGSI_UTIL_SIGN_TOGGLE:
1107      micro_neg( chan, chan );
1108      break;
1109
1110   case TGSI_UTIL_SIGN_KEEP:
1111      break;
1112   }
1113
1114   if (reg->SrcRegisterExtMod.Complement) {
1115      micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1116   }
1117}
1118
1119static void
1120store_dest(
1121   struct tgsi_exec_machine *mach,
1122   const union tgsi_exec_channel *chan,
1123   const struct tgsi_full_dst_register *reg,
1124   const struct tgsi_full_instruction *inst,
1125   uint chan_index )
1126{
1127   union tgsi_exec_channel *dst;
1128
1129   switch( reg->DstRegister.File ) {
1130   case TGSI_FILE_NULL:
1131      return;
1132
1133   case TGSI_FILE_OUTPUT:
1134      dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1135                           + reg->DstRegister.Index].xyzw[chan_index];
1136      break;
1137
1138   case TGSI_FILE_TEMPORARY:
1139      assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS);
1140      dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1141      break;
1142
1143   case TGSI_FILE_ADDRESS:
1144      dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1145      break;
1146
1147   default:
1148      assert( 0 );
1149      return;
1150   }
1151
1152   switch (inst->Instruction.Saturate)
1153   {
1154   case TGSI_SAT_NONE:
1155      if (mach->ExecMask & 0x1)
1156         dst->i[0] = chan->i[0];
1157      if (mach->ExecMask & 0x2)
1158         dst->i[1] = chan->i[1];
1159      if (mach->ExecMask & 0x4)
1160         dst->i[2] = chan->i[2];
1161      if (mach->ExecMask & 0x8)
1162         dst->i[3] = chan->i[3];
1163      break;
1164
1165   case TGSI_SAT_ZERO_ONE:
1166      /* XXX need to obey ExecMask here */
1167      micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
1168      micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
1169      break;
1170
1171   case TGSI_SAT_MINUS_PLUS_ONE:
1172      assert( 0 );
1173      break;
1174
1175   default:
1176      assert( 0 );
1177   }
1178}
1179
1180#define FETCH(VAL,INDEX,CHAN)\
1181    fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1182
1183#define STORE(VAL,INDEX,CHAN)\
1184    store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1185
1186
1187/**
1188 * Execute ARB-style KIL which is predicated by a src register.
1189 * Kill fragment if any of the four values is less than zero.
1190 */
1191static void
1192exec_kilp(struct tgsi_exec_machine *mach,
1193          const struct tgsi_full_instruction *inst)
1194{
1195   uint uniquemask;
1196   uint chan_index;
1197   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1198   union tgsi_exec_channel r[1];
1199
1200   /* This mask stores component bits that were already tested. Note that
1201    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1202    * tested. */
1203   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1204
1205   for (chan_index = 0; chan_index < 4; chan_index++)
1206   {
1207      uint swizzle;
1208      uint i;
1209
1210      /* unswizzle channel */
1211      swizzle = tgsi_util_get_full_src_register_extswizzle (
1212                        &inst->FullSrcRegisters[0],
1213                        chan_index);
1214
1215      /* check if the component has not been already tested */
1216      if (uniquemask & (1 << swizzle))
1217         continue;
1218      uniquemask |= 1 << swizzle;
1219
1220      FETCH(&r[0], 0, chan_index);
1221      for (i = 0; i < 4; i++)
1222         if (r[0].f[i] < 0.0f)
1223            kilmask |= 1 << i;
1224   }
1225
1226   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1227}
1228
1229
1230/*
1231 * Fetch a texel using STR texture coordinates.
1232 */
1233static void
1234fetch_texel( struct tgsi_sampler *sampler,
1235             const union tgsi_exec_channel *s,
1236             const union tgsi_exec_channel *t,
1237             const union tgsi_exec_channel *p,
1238             float lodbias,  /* XXX should be float[4] */
1239             union tgsi_exec_channel *r,
1240             union tgsi_exec_channel *g,
1241             union tgsi_exec_channel *b,
1242             union tgsi_exec_channel *a )
1243{
1244   uint j;
1245   float rgba[NUM_CHANNELS][QUAD_SIZE];
1246
1247   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1248
1249   for (j = 0; j < 4; j++) {
1250      r->f[j] = rgba[0][j];
1251      g->f[j] = rgba[1][j];
1252      b->f[j] = rgba[2][j];
1253      a->f[j] = rgba[3][j];
1254   }
1255}
1256
1257
1258static void
1259exec_tex(struct tgsi_exec_machine *mach,
1260         const struct tgsi_full_instruction *inst,
1261         boolean biasLod,
1262         boolean projected)
1263{
1264   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1265   union tgsi_exec_channel r[8];
1266   uint chan_index;
1267   float lodBias;
1268
1269   /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
1270
1271   switch (inst->InstructionExtTexture.Texture) {
1272   case TGSI_TEXTURE_1D:
1273
1274      FETCH(&r[0], 0, CHAN_X);
1275
1276      if (projected) {
1277         FETCH(&r[1], 0, CHAN_W);
1278         micro_div( &r[0], &r[0], &r[1] );
1279      }
1280
1281      if (biasLod) {
1282         FETCH(&r[1], 0, CHAN_W);
1283         lodBias = r[2].f[0];
1284      }
1285      else
1286         lodBias = 0.0;
1287
1288      fetch_texel(&mach->Samplers[unit],
1289                  &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */
1290                  &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1291      break;
1292
1293   case TGSI_TEXTURE_2D:
1294   case TGSI_TEXTURE_RECT:
1295
1296      FETCH(&r[0], 0, CHAN_X);
1297      FETCH(&r[1], 0, CHAN_Y);
1298      FETCH(&r[2], 0, CHAN_Z);
1299
1300      if (projected) {
1301         FETCH(&r[3], 0, CHAN_W);
1302         micro_div( &r[0], &r[0], &r[3] );
1303         micro_div( &r[1], &r[1], &r[3] );
1304         micro_div( &r[2], &r[2], &r[3] );
1305      }
1306
1307      if (biasLod) {
1308         FETCH(&r[3], 0, CHAN_W);
1309         lodBias = r[3].f[0];
1310      }
1311      else
1312         lodBias = 0.0;
1313
1314      fetch_texel(&mach->Samplers[unit],
1315                  &r[0], &r[1], &r[2], lodBias,  /* inputs */
1316                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */
1317      break;
1318
1319   case TGSI_TEXTURE_3D:
1320   case TGSI_TEXTURE_CUBE:
1321
1322      FETCH(&r[0], 0, CHAN_X);
1323      FETCH(&r[1], 0, CHAN_Y);
1324      FETCH(&r[2], 0, CHAN_Z);
1325
1326      if (projected) {
1327         FETCH(&r[3], 0, CHAN_W);
1328         micro_div( &r[0], &r[0], &r[3] );
1329         micro_div( &r[1], &r[1], &r[3] );
1330         micro_div( &r[2], &r[2], &r[3] );
1331      }
1332
1333      if (biasLod) {
1334         FETCH(&r[3], 0, CHAN_W);
1335         lodBias = r[3].f[0];
1336      }
1337      else
1338         lodBias = 0.0;
1339
1340      fetch_texel(&mach->Samplers[unit],
1341                  &r[0], &r[1], &r[2], lodBias,
1342                  &r[0], &r[1], &r[2], &r[3]);
1343      break;
1344
1345   default:
1346      assert (0);
1347   }
1348
1349   FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1350      STORE( &r[chan_index], 0, chan_index );
1351   }
1352}
1353
1354
1355/**
1356 * Evaluate a constant-valued coefficient at the position of the
1357 * current quad.
1358 */
1359static void
1360eval_constant_coef(
1361   struct tgsi_exec_machine *mach,
1362   unsigned attrib,
1363   unsigned chan )
1364{
1365   unsigned i;
1366
1367   for( i = 0; i < QUAD_SIZE; i++ ) {
1368      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1369   }
1370}
1371
1372/**
1373 * Evaluate a linear-valued coefficient at the position of the
1374 * current quad.
1375 */
1376static void
1377eval_linear_coef(
1378   struct tgsi_exec_machine *mach,
1379   unsigned attrib,
1380   unsigned chan )
1381{
1382   const float x = mach->QuadPos.xyzw[0].f[0];
1383   const float y = mach->QuadPos.xyzw[1].f[0];
1384   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1385   const float dady = mach->InterpCoefs[attrib].dady[chan];
1386   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1387   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1388   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1389   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1390   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1391}
1392
1393/**
1394 * Evaluate a perspective-valued coefficient at the position of the
1395 * current quad.
1396 */
1397static void
1398eval_perspective_coef(
1399   struct tgsi_exec_machine *mach,
1400   unsigned attrib,
1401   unsigned chan )
1402{
1403   const float x = mach->QuadPos.xyzw[0].f[0];
1404   const float y = mach->QuadPos.xyzw[1].f[0];
1405   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1406   const float dady = mach->InterpCoefs[attrib].dady[chan];
1407   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1408   const float *w = mach->QuadPos.xyzw[3].f;
1409   /* divide by W here */
1410   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1411   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1412   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1413   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1414}
1415
1416
1417typedef void (* eval_coef_func)(
1418   struct tgsi_exec_machine *mach,
1419   unsigned attrib,
1420   unsigned chan );
1421
1422static void
1423exec_declaration(
1424   struct tgsi_exec_machine *mach,
1425   const struct tgsi_full_declaration *decl )
1426{
1427   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1428      if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1429         unsigned first, last, mask;
1430         eval_coef_func eval;
1431
1432         first = decl->DeclarationRange.First;
1433         last = decl->DeclarationRange.Last;
1434         mask = decl->Declaration.UsageMask;
1435
1436         switch( decl->Declaration.Interpolate ) {
1437         case TGSI_INTERPOLATE_CONSTANT:
1438            eval = eval_constant_coef;
1439            break;
1440
1441         case TGSI_INTERPOLATE_LINEAR:
1442            eval = eval_linear_coef;
1443            break;
1444
1445         case TGSI_INTERPOLATE_PERSPECTIVE:
1446            eval = eval_perspective_coef;
1447            break;
1448
1449         default:
1450            assert( 0 );
1451         }
1452
1453         if( mask == TGSI_WRITEMASK_XYZW ) {
1454            unsigned i, j;
1455
1456            for( i = first; i <= last; i++ ) {
1457               for( j = 0; j < NUM_CHANNELS; j++ ) {
1458                  eval( mach, i, j );
1459               }
1460            }
1461         }
1462         else {
1463            unsigned i, j;
1464
1465            for( j = 0; j < NUM_CHANNELS; j++ ) {
1466               if( mask & (1 << j) ) {
1467                  for( i = first; i <= last; i++ ) {
1468                     eval( mach, i, j );
1469                  }
1470               }
1471            }
1472         }
1473      }
1474   }
1475}
1476
1477static void
1478exec_instruction(
1479   struct tgsi_exec_machine *mach,
1480   const struct tgsi_full_instruction *inst,
1481   int *pc )
1482{
1483   uint chan_index;
1484   union tgsi_exec_channel r[8];
1485
1486   (*pc)++;
1487
1488   switch (inst->Instruction.Opcode) {
1489   case TGSI_OPCODE_ARL:
1490      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1491	 FETCH( &r[0], 0, chan_index );
1492	 micro_f2it( &r[0], &r[0] );
1493	 STORE( &r[0], 0, chan_index );
1494      }
1495      break;
1496
1497   case TGSI_OPCODE_MOV:
1498   case TGSI_OPCODE_SWZ:
1499      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1500         FETCH( &r[0], 0, chan_index );
1501         STORE( &r[0], 0, chan_index );
1502      }
1503      break;
1504
1505   case TGSI_OPCODE_LIT:
1506      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1507	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1508      }
1509
1510      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1511	 FETCH( &r[0], 0, CHAN_X );
1512	 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1513	    micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1514	    STORE( &r[0], 0, CHAN_Y );
1515	 }
1516
1517	 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1518	    FETCH( &r[1], 0, CHAN_Y );
1519	    micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1520
1521	    FETCH( &r[2], 0, CHAN_W );
1522	    micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1523	    micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1524	    micro_pow( &r[1], &r[1], &r[2] );
1525	    micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1526	    STORE( &r[0], 0, CHAN_Z );
1527	 }
1528      }
1529
1530      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1531	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1532      }
1533      break;
1534
1535   case TGSI_OPCODE_RCP:
1536   /* TGSI_OPCODE_RECIP */
1537      FETCH( &r[0], 0, CHAN_X );
1538      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1539      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1540	 STORE( &r[0], 0, chan_index );
1541      }
1542      break;
1543
1544   case TGSI_OPCODE_RSQ:
1545   /* TGSI_OPCODE_RECIPSQRT */
1546      FETCH( &r[0], 0, CHAN_X );
1547      micro_sqrt( &r[0], &r[0] );
1548      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1549      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1550	 STORE( &r[0], 0, chan_index );
1551      }
1552      break;
1553
1554   case TGSI_OPCODE_EXP:
1555      FETCH( &r[0], 0, CHAN_X );
1556      micro_flr( &r[1], &r[0] );  /* r1 = floor(r0) */
1557      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1558         micro_exp2( &r[2], &r[1] );       /* r2 = 2 ^ r1 */
1559	 STORE( &r[2], 0, CHAN_X );        /* store r2 */
1560      }
1561      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1562         micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1563	 STORE( &r[2], 0, CHAN_Y );        /* store r2 */
1564      }
1565      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1566         micro_exp2( &r[2], &r[0] );       /* r2 = 2 ^ r0 */
1567	 STORE( &r[2], 0, CHAN_Z );        /* store r2 */
1568      }
1569      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1570	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1571      }
1572      break;
1573
1574   case TGSI_OPCODE_LOG:
1575      FETCH( &r[0], 0, CHAN_X );
1576      micro_abs( &r[2], &r[0] );  /* r2 = abs(r0) */
1577      micro_lg2( &r[1], &r[2] );  /* r1 = lg2(r2) */
1578      micro_flr( &r[0], &r[1] );  /* r0 = floor(r1) */
1579      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1580	 STORE( &r[0], 0, CHAN_X );
1581      }
1582      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1583         micro_exp2( &r[0], &r[0] );       /* r0 = 2 ^ r0 */
1584         micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1585	 STORE( &r[0], 0, CHAN_Y );
1586      }
1587      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1588	 STORE( &r[1], 0, CHAN_Z );
1589      }
1590      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1591	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1592      }
1593      break;
1594
1595   case TGSI_OPCODE_MUL:
1596      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1597      {
1598         FETCH(&r[0], 0, chan_index);
1599         FETCH(&r[1], 1, chan_index);
1600
1601         micro_mul( &r[0], &r[0], &r[1] );
1602
1603         STORE(&r[0], 0, chan_index);
1604      }
1605      break;
1606
1607   case TGSI_OPCODE_ADD:
1608      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1609         FETCH( &r[0], 0, chan_index );
1610         FETCH( &r[1], 1, chan_index );
1611         micro_add( &r[0], &r[0], &r[1] );
1612         STORE( &r[0], 0, chan_index );
1613      }
1614      break;
1615
1616   case TGSI_OPCODE_DP3:
1617   /* TGSI_OPCODE_DOT3 */
1618      FETCH( &r[0], 0, CHAN_X );
1619      FETCH( &r[1], 1, CHAN_X );
1620      micro_mul( &r[0], &r[0], &r[1] );
1621
1622      FETCH( &r[1], 0, CHAN_Y );
1623      FETCH( &r[2], 1, CHAN_Y );
1624      micro_mul( &r[1], &r[1], &r[2] );
1625      micro_add( &r[0], &r[0], &r[1] );
1626
1627      FETCH( &r[1], 0, CHAN_Z );
1628      FETCH( &r[2], 1, CHAN_Z );
1629      micro_mul( &r[1], &r[1], &r[2] );
1630      micro_add( &r[0], &r[0], &r[1] );
1631
1632      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1633         STORE( &r[0], 0, chan_index );
1634      }
1635      break;
1636
1637    case TGSI_OPCODE_DP4:
1638    /* TGSI_OPCODE_DOT4 */
1639       FETCH(&r[0], 0, CHAN_X);
1640       FETCH(&r[1], 1, CHAN_X);
1641
1642       micro_mul( &r[0], &r[0], &r[1] );
1643
1644       FETCH(&r[1], 0, CHAN_Y);
1645       FETCH(&r[2], 1, CHAN_Y);
1646
1647       micro_mul( &r[1], &r[1], &r[2] );
1648       micro_add( &r[0], &r[0], &r[1] );
1649
1650       FETCH(&r[1], 0, CHAN_Z);
1651       FETCH(&r[2], 1, CHAN_Z);
1652
1653       micro_mul( &r[1], &r[1], &r[2] );
1654       micro_add( &r[0], &r[0], &r[1] );
1655
1656       FETCH(&r[1], 0, CHAN_W);
1657       FETCH(&r[2], 1, CHAN_W);
1658
1659       micro_mul( &r[1], &r[1], &r[2] );
1660       micro_add( &r[0], &r[0], &r[1] );
1661
1662      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1663	 STORE( &r[0], 0, chan_index );
1664      }
1665      break;
1666
1667   case TGSI_OPCODE_DST:
1668      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1669	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1670      }
1671
1672      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1673	 FETCH( &r[0], 0, CHAN_Y );
1674	 FETCH( &r[1], 1, CHAN_Y);
1675	 micro_mul( &r[0], &r[0], &r[1] );
1676	 STORE( &r[0], 0, CHAN_Y );
1677      }
1678
1679      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1680	 FETCH( &r[0], 0, CHAN_Z );
1681	 STORE( &r[0], 0, CHAN_Z );
1682      }
1683
1684      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1685	 FETCH( &r[0], 1, CHAN_W );
1686	 STORE( &r[0], 0, CHAN_W );
1687      }
1688      break;
1689
1690   case TGSI_OPCODE_MIN:
1691      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1692         FETCH(&r[0], 0, chan_index);
1693         FETCH(&r[1], 1, chan_index);
1694
1695         /* XXX use micro_min()?? */
1696         micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1697
1698         STORE(&r[0], 0, chan_index);
1699      }
1700      break;
1701
1702   case TGSI_OPCODE_MAX:
1703      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1704         FETCH(&r[0], 0, chan_index);
1705         FETCH(&r[1], 1, chan_index);
1706
1707         /* XXX use micro_max()?? */
1708         micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1709
1710         STORE(&r[0], 0, chan_index );
1711      }
1712      break;
1713
1714   case TGSI_OPCODE_SLT:
1715   /* TGSI_OPCODE_SETLT */
1716      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1717         FETCH( &r[0], 0, chan_index );
1718         FETCH( &r[1], 1, chan_index );
1719         micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1720         STORE( &r[0], 0, chan_index );
1721      }
1722      break;
1723
1724   case TGSI_OPCODE_SGE:
1725   /* TGSI_OPCODE_SETGE */
1726      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1727         FETCH( &r[0], 0, chan_index );
1728         FETCH( &r[1], 1, chan_index );
1729         micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1730         STORE( &r[0], 0, chan_index );
1731      }
1732      break;
1733
1734   case TGSI_OPCODE_MAD:
1735   /* TGSI_OPCODE_MADD */
1736      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1737         FETCH( &r[0], 0, chan_index );
1738         FETCH( &r[1], 1, chan_index );
1739         micro_mul( &r[0], &r[0], &r[1] );
1740         FETCH( &r[1], 2, chan_index );
1741         micro_add( &r[0], &r[0], &r[1] );
1742         STORE( &r[0], 0, chan_index );
1743      }
1744      break;
1745
1746   case TGSI_OPCODE_SUB:
1747      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1748         FETCH(&r[0], 0, chan_index);
1749         FETCH(&r[1], 1, chan_index);
1750
1751         micro_sub( &r[0], &r[0], &r[1] );
1752
1753         STORE(&r[0], 0, chan_index);
1754      }
1755      break;
1756
1757   case TGSI_OPCODE_LERP:
1758   /* TGSI_OPCODE_LRP */
1759      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1760         FETCH(&r[0], 0, chan_index);
1761         FETCH(&r[1], 1, chan_index);
1762         FETCH(&r[2], 2, chan_index);
1763
1764         micro_sub( &r[1], &r[1], &r[2] );
1765         micro_mul( &r[0], &r[0], &r[1] );
1766         micro_add( &r[0], &r[0], &r[2] );
1767
1768         STORE(&r[0], 0, chan_index);
1769      }
1770      break;
1771
1772   case TGSI_OPCODE_CND:
1773      assert (0);
1774      break;
1775
1776   case TGSI_OPCODE_CND0:
1777      assert (0);
1778      break;
1779
1780   case TGSI_OPCODE_DOT2ADD:
1781      /* TGSI_OPCODE_DP2A */
1782      assert (0);
1783      break;
1784
1785   case TGSI_OPCODE_INDEX:
1786      assert (0);
1787      break;
1788
1789   case TGSI_OPCODE_NEGATE:
1790      assert (0);
1791      break;
1792
1793   case TGSI_OPCODE_FRAC:
1794   /* TGSI_OPCODE_FRC */
1795      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1796         FETCH( &r[0], 0, chan_index );
1797         micro_frc( &r[0], &r[0] );
1798         STORE( &r[0], 0, chan_index );
1799      }
1800      break;
1801
1802   case TGSI_OPCODE_CLAMP:
1803      assert (0);
1804      break;
1805
1806   case TGSI_OPCODE_FLOOR:
1807   /* TGSI_OPCODE_FLR */
1808      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1809         FETCH( &r[0], 0, chan_index );
1810         micro_flr( &r[0], &r[0] );
1811         STORE( &r[0], 0, chan_index );
1812      }
1813      break;
1814
1815   case TGSI_OPCODE_ROUND:
1816      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1817         FETCH( &r[0], 0, chan_index );
1818         micro_rnd( &r[0], &r[0] );
1819         STORE( &r[0], 0, chan_index );
1820      }
1821      break;
1822
1823   case TGSI_OPCODE_EXPBASE2:
1824    /* TGSI_OPCODE_EX2 */
1825      FETCH(&r[0], 0, CHAN_X);
1826
1827      micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
1828
1829      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1830	 STORE( &r[0], 0, chan_index );
1831      }
1832      break;
1833
1834   case TGSI_OPCODE_LOGBASE2:
1835   /* TGSI_OPCODE_LG2 */
1836      FETCH( &r[0], 0, CHAN_X );
1837      micro_lg2( &r[0], &r[0] );
1838      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1839         STORE( &r[0], 0, chan_index );
1840      }
1841      break;
1842
1843   case TGSI_OPCODE_POWER:
1844      /* TGSI_OPCODE_POW */
1845      FETCH(&r[0], 0, CHAN_X);
1846      FETCH(&r[1], 1, CHAN_X);
1847
1848      micro_pow( &r[0], &r[0], &r[1] );
1849
1850      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1851	 STORE( &r[0], 0, chan_index );
1852      }
1853      break;
1854
1855   case TGSI_OPCODE_CROSSPRODUCT:
1856      /* TGSI_OPCODE_XPD */
1857      FETCH(&r[0], 0, CHAN_Y);
1858      FETCH(&r[1], 1, CHAN_Z);
1859
1860      micro_mul( &r[2], &r[0], &r[1] );
1861
1862      FETCH(&r[3], 0, CHAN_Z);
1863      FETCH(&r[4], 1, CHAN_Y);
1864
1865      micro_mul( &r[5], &r[3], &r[4] );
1866      micro_sub( &r[2], &r[2], &r[5] );
1867
1868      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1869         STORE( &r[2], 0, CHAN_X );
1870      }
1871
1872      FETCH(&r[2], 1, CHAN_X);
1873
1874      micro_mul( &r[3], &r[3], &r[2] );
1875
1876      FETCH(&r[5], 0, CHAN_X);
1877
1878      micro_mul( &r[1], &r[1], &r[5] );
1879      micro_sub( &r[3], &r[3], &r[1] );
1880
1881      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1882         STORE( &r[3], 0, CHAN_Y );
1883      }
1884
1885      micro_mul( &r[5], &r[5], &r[4] );
1886      micro_mul( &r[0], &r[0], &r[2] );
1887      micro_sub( &r[5], &r[5], &r[0] );
1888
1889      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1890         STORE( &r[5], 0, CHAN_Z );
1891      }
1892
1893      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1894         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1895      }
1896      break;
1897
1898    case TGSI_OPCODE_MULTIPLYMATRIX:
1899       assert (0);
1900       break;
1901
1902    case TGSI_OPCODE_ABS:
1903       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1904          FETCH(&r[0], 0, chan_index);
1905
1906          micro_abs( &r[0], &r[0] );
1907
1908          STORE(&r[0], 0, chan_index);
1909       }
1910       break;
1911
1912   case TGSI_OPCODE_RCC:
1913      assert (0);
1914      break;
1915
1916   case TGSI_OPCODE_DPH:
1917      FETCH(&r[0], 0, CHAN_X);
1918      FETCH(&r[1], 1, CHAN_X);
1919
1920      micro_mul( &r[0], &r[0], &r[1] );
1921
1922      FETCH(&r[1], 0, CHAN_Y);
1923      FETCH(&r[2], 1, CHAN_Y);
1924
1925      micro_mul( &r[1], &r[1], &r[2] );
1926      micro_add( &r[0], &r[0], &r[1] );
1927
1928      FETCH(&r[1], 0, CHAN_Z);
1929      FETCH(&r[2], 1, CHAN_Z);
1930
1931      micro_mul( &r[1], &r[1], &r[2] );
1932      micro_add( &r[0], &r[0], &r[1] );
1933
1934      FETCH(&r[1], 1, CHAN_W);
1935
1936      micro_add( &r[0], &r[0], &r[1] );
1937
1938      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1939	 STORE( &r[0], 0, chan_index );
1940      }
1941      break;
1942
1943   case TGSI_OPCODE_COS:
1944      FETCH(&r[0], 0, CHAN_X);
1945
1946      micro_cos( &r[0], &r[0] );
1947
1948      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1949	 STORE( &r[0], 0, chan_index );
1950      }
1951      break;
1952
1953   case TGSI_OPCODE_DDX:
1954      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1955         FETCH( &r[0], 0, chan_index );
1956         micro_ddx( &r[0], &r[0] );
1957         STORE( &r[0], 0, chan_index );
1958      }
1959      break;
1960
1961   case TGSI_OPCODE_DDY:
1962      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1963         FETCH( &r[0], 0, chan_index );
1964         micro_ddy( &r[0], &r[0] );
1965         STORE( &r[0], 0, chan_index );
1966      }
1967      break;
1968
1969   case TGSI_OPCODE_KILP:
1970      exec_kilp (mach, inst);
1971      break;
1972
1973   case TGSI_OPCODE_KIL:
1974      /* for enabled ExecMask bits, set the killed bit */
1975      mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
1976      break;
1977
1978   case TGSI_OPCODE_PK2H:
1979      assert (0);
1980      break;
1981
1982   case TGSI_OPCODE_PK2US:
1983      assert (0);
1984      break;
1985
1986   case TGSI_OPCODE_PK4B:
1987      assert (0);
1988      break;
1989
1990   case TGSI_OPCODE_PK4UB:
1991      assert (0);
1992      break;
1993
1994   case TGSI_OPCODE_RFL:
1995      assert (0);
1996      break;
1997
1998   case TGSI_OPCODE_SEQ:
1999      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2000         FETCH( &r[0], 0, chan_index );
2001         FETCH( &r[1], 1, chan_index );
2002         micro_eq( &r[0], &r[0], &r[1],
2003                   &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2004                   &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2005         STORE( &r[0], 0, chan_index );
2006      }
2007      break;
2008
2009   case TGSI_OPCODE_SFL:
2010      assert (0);
2011      break;
2012
2013   case TGSI_OPCODE_SGT:
2014      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2015         FETCH( &r[0], 0, chan_index );
2016         FETCH( &r[1], 1, chan_index );
2017         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2018         STORE( &r[0], 0, chan_index );
2019      }
2020      break;
2021
2022   case TGSI_OPCODE_SIN:
2023      FETCH( &r[0], 0, CHAN_X );
2024      micro_sin( &r[0], &r[0] );
2025      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2026         STORE( &r[0], 0, chan_index );
2027      }
2028      break;
2029
2030   case TGSI_OPCODE_SLE:
2031      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2032         FETCH( &r[0], 0, chan_index );
2033         FETCH( &r[1], 1, chan_index );
2034         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2035         STORE( &r[0], 0, chan_index );
2036      }
2037      break;
2038
2039   case TGSI_OPCODE_SNE:
2040      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2041         FETCH( &r[0], 0, chan_index );
2042         FETCH( &r[1], 1, chan_index );
2043         micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2044         STORE( &r[0], 0, chan_index );
2045      }
2046      break;
2047
2048   case TGSI_OPCODE_STR:
2049      assert (0);
2050      break;
2051
2052   case TGSI_OPCODE_TEX:
2053      /* simple texture lookup */
2054      /* src[0] = texcoord */
2055      /* src[1] = sampler unit */
2056      exec_tex(mach, inst, FALSE, FALSE);
2057      break;
2058
2059   case TGSI_OPCODE_TXB:
2060      /* Texture lookup with lod bias */
2061      /* src[0] = texcoord (src[0].w = LOD bias) */
2062      /* src[1] = sampler unit */
2063      exec_tex(mach, inst, TRUE, FALSE);
2064      break;
2065
2066   case TGSI_OPCODE_TXD:
2067      /* Texture lookup with explict partial derivatives */
2068      /* src[0] = texcoord */
2069      /* src[1] = d[strq]/dx */
2070      /* src[2] = d[strq]/dy */
2071      /* src[3] = sampler unit */
2072      assert (0);
2073      break;
2074
2075   case TGSI_OPCODE_TXL:
2076      /* Texture lookup with explit LOD */
2077      /* src[0] = texcoord (src[0].w = LOD) */
2078      /* src[1] = sampler unit */
2079      exec_tex(mach, inst, TRUE, FALSE);
2080      break;
2081
2082   case TGSI_OPCODE_TXP:
2083      /* Texture lookup with projection */
2084      /* src[0] = texcoord (src[0].w = projection) */
2085      /* src[1] = sampler unit */
2086      exec_tex(mach, inst, FALSE, TRUE);
2087      break;
2088
2089   case TGSI_OPCODE_UP2H:
2090      assert (0);
2091      break;
2092
2093   case TGSI_OPCODE_UP2US:
2094      assert (0);
2095      break;
2096
2097   case TGSI_OPCODE_UP4B:
2098      assert (0);
2099      break;
2100
2101   case TGSI_OPCODE_UP4UB:
2102      assert (0);
2103      break;
2104
2105   case TGSI_OPCODE_X2D:
2106      assert (0);
2107      break;
2108
2109   case TGSI_OPCODE_ARA:
2110      assert (0);
2111      break;
2112
2113   case TGSI_OPCODE_ARR:
2114      assert (0);
2115      break;
2116
2117   case TGSI_OPCODE_BRA:
2118      assert (0);
2119      break;
2120
2121   case TGSI_OPCODE_CAL:
2122      /* skip the call if no execution channels are enabled */
2123      if (mach->ExecMask) {
2124         /* do the call */
2125
2126         /* push the Cond, Loop, Cont stacks */
2127         assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2128         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2129         assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2130         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2131         assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2132         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2133
2134         assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2135         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2136
2137         /* note that PC was already incremented above */
2138         mach->CallStack[mach->CallStackTop++] = *pc;
2139         *pc = inst->InstructionExtLabel.Label;
2140      }
2141      break;
2142
2143   case TGSI_OPCODE_RET:
2144      mach->FuncMask &= ~mach->ExecMask;
2145      UPDATE_EXEC_MASK(mach);
2146
2147      if (mach->ExecMask == 0x0) {
2148         /* really return now (otherwise, keep executing */
2149
2150         if (mach->CallStackTop == 0) {
2151            /* returning from main() */
2152            *pc = -1;
2153            return;
2154         }
2155         *pc = mach->CallStack[--mach->CallStackTop];
2156
2157         /* pop the Cond, Loop, Cont stacks */
2158         assert(mach->CondStackTop > 0);
2159         mach->CondMask = mach->CondStack[--mach->CondStackTop];
2160         assert(mach->LoopStackTop > 0);
2161         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2162         assert(mach->ContStackTop > 0);
2163         mach->ContMask = mach->ContStack[--mach->ContStackTop];
2164         assert(mach->FuncStackTop > 0);
2165         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2166
2167         UPDATE_EXEC_MASK(mach);
2168      }
2169      break;
2170
2171   case TGSI_OPCODE_SSG:
2172      assert (0);
2173      break;
2174
2175   case TGSI_OPCODE_CMP:
2176      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2177         FETCH(&r[0], 0, chan_index);
2178         FETCH(&r[1], 1, chan_index);
2179         FETCH(&r[2], 2, chan_index);
2180
2181         micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2182
2183         STORE(&r[0], 0, chan_index);
2184      }
2185      break;
2186
2187   case TGSI_OPCODE_SCS:
2188      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2189         FETCH( &r[0], 0, CHAN_X );
2190      }
2191      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2192         micro_cos( &r[1], &r[0] );
2193         STORE( &r[1], 0, CHAN_X );
2194      }
2195      if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2196         micro_sin( &r[1], &r[0] );
2197         STORE( &r[1], 0, CHAN_Y );
2198      }
2199      if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2200         STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2201      }
2202      if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2203         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2204      }
2205      break;
2206
2207   case TGSI_OPCODE_NRM:
2208      assert (0);
2209      break;
2210
2211   case TGSI_OPCODE_DIV:
2212      assert( 0 );
2213      break;
2214
2215   case TGSI_OPCODE_DP2:
2216      FETCH( &r[0], 0, CHAN_X );
2217      FETCH( &r[1], 1, CHAN_X );
2218      micro_mul( &r[0], &r[0], &r[1] );
2219
2220      FETCH( &r[1], 0, CHAN_Y );
2221      FETCH( &r[2], 1, CHAN_Y );
2222      micro_mul( &r[1], &r[1], &r[2] );
2223      micro_add( &r[0], &r[0], &r[1] );
2224
2225      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2226         STORE( &r[0], 0, chan_index );
2227      }
2228      break;
2229
2230   case TGSI_OPCODE_IF:
2231      /* push CondMask */
2232      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2233      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2234      FETCH( &r[0], 0, CHAN_X );
2235      /* update CondMask */
2236      if( ! r[0].u[0] ) {
2237         mach->CondMask &= ~0x1;
2238      }
2239      if( ! r[0].u[1] ) {
2240         mach->CondMask &= ~0x2;
2241      }
2242      if( ! r[0].u[2] ) {
2243         mach->CondMask &= ~0x4;
2244      }
2245      if( ! r[0].u[3] ) {
2246         mach->CondMask &= ~0x8;
2247      }
2248      UPDATE_EXEC_MASK(mach);
2249      /* Todo: If CondMask==0, jump to ELSE */
2250      break;
2251
2252   case TGSI_OPCODE_ELSE:
2253      /* invert CondMask wrt previous mask */
2254      {
2255         uint prevMask;
2256         assert(mach->CondStackTop > 0);
2257         prevMask = mach->CondStack[mach->CondStackTop - 1];
2258         mach->CondMask = ~mach->CondMask & prevMask;
2259         UPDATE_EXEC_MASK(mach);
2260         /* Todo: If CondMask==0, jump to ENDIF */
2261      }
2262      break;
2263
2264   case TGSI_OPCODE_ENDIF:
2265      /* pop CondMask */
2266      assert(mach->CondStackTop > 0);
2267      mach->CondMask = mach->CondStack[--mach->CondStackTop];
2268      UPDATE_EXEC_MASK(mach);
2269      break;
2270
2271   case TGSI_OPCODE_END:
2272      /* halt execution */
2273      *pc = -1;
2274      break;
2275
2276   case TGSI_OPCODE_REP:
2277      assert (0);
2278      break;
2279
2280   case TGSI_OPCODE_ENDREP:
2281       assert (0);
2282       break;
2283
2284   case TGSI_OPCODE_PUSHA:
2285      assert (0);
2286      break;
2287
2288   case TGSI_OPCODE_POPA:
2289      assert (0);
2290      break;
2291
2292   case TGSI_OPCODE_CEIL:
2293      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2294         FETCH( &r[0], 0, chan_index );
2295         micro_ceil( &r[0], &r[0] );
2296         STORE( &r[0], 0, chan_index );
2297      }
2298      break;
2299
2300   case TGSI_OPCODE_I2F:
2301      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2302         FETCH( &r[0], 0, chan_index );
2303         micro_i2f( &r[0], &r[0] );
2304         STORE( &r[0], 0, chan_index );
2305      }
2306      break;
2307
2308   case TGSI_OPCODE_NOT:
2309      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2310         FETCH( &r[0], 0, chan_index );
2311         micro_not( &r[0], &r[0] );
2312         STORE( &r[0], 0, chan_index );
2313      }
2314      break;
2315
2316   case TGSI_OPCODE_TRUNC:
2317      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2318         FETCH( &r[0], 0, chan_index );
2319         micro_trunc( &r[0], &r[0] );
2320         STORE( &r[0], 0, chan_index );
2321      }
2322      break;
2323
2324   case TGSI_OPCODE_SHL:
2325      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2326         FETCH( &r[0], 0, chan_index );
2327         FETCH( &r[1], 1, chan_index );
2328         micro_shl( &r[0], &r[0], &r[1] );
2329         STORE( &r[0], 0, chan_index );
2330      }
2331      break;
2332
2333   case TGSI_OPCODE_SHR:
2334      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2335         FETCH( &r[0], 0, chan_index );
2336         FETCH( &r[1], 1, chan_index );
2337         micro_ishr( &r[0], &r[0], &r[1] );
2338         STORE( &r[0], 0, chan_index );
2339      }
2340      break;
2341
2342   case TGSI_OPCODE_AND:
2343      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2344         FETCH( &r[0], 0, chan_index );
2345         FETCH( &r[1], 1, chan_index );
2346         micro_and( &r[0], &r[0], &r[1] );
2347         STORE( &r[0], 0, chan_index );
2348      }
2349      break;
2350
2351   case TGSI_OPCODE_OR:
2352      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2353         FETCH( &r[0], 0, chan_index );
2354         FETCH( &r[1], 1, chan_index );
2355         micro_or( &r[0], &r[0], &r[1] );
2356         STORE( &r[0], 0, chan_index );
2357      }
2358      break;
2359
2360   case TGSI_OPCODE_MOD:
2361      assert (0);
2362      break;
2363
2364   case TGSI_OPCODE_XOR:
2365      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2366         FETCH( &r[0], 0, chan_index );
2367         FETCH( &r[1], 1, chan_index );
2368         micro_xor( &r[0], &r[0], &r[1] );
2369         STORE( &r[0], 0, chan_index );
2370      }
2371      break;
2372
2373   case TGSI_OPCODE_SAD:
2374      assert (0);
2375      break;
2376
2377   case TGSI_OPCODE_TXF:
2378      assert (0);
2379      break;
2380
2381   case TGSI_OPCODE_TXQ:
2382      assert (0);
2383      break;
2384
2385   case TGSI_OPCODE_EMIT:
2386      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2387      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2388      break;
2389
2390   case TGSI_OPCODE_ENDPRIM:
2391      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2392      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2393      break;
2394
2395   case TGSI_OPCODE_LOOP:
2396      /* fall-through (for now) */
2397   case TGSI_OPCODE_BGNLOOP2:
2398      /* push LoopMask and ContMasks */
2399      assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2400      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2401      assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2402      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2403      break;
2404
2405   case TGSI_OPCODE_ENDLOOP:
2406      /* fall-through (for now at least) */
2407   case TGSI_OPCODE_ENDLOOP2:
2408      /* Restore ContMask, but don't pop */
2409      assert(mach->ContStackTop > 0);
2410      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2411      UPDATE_EXEC_MASK(mach);
2412      if (mach->ExecMask) {
2413         /* repeat loop: jump to instruction just past BGNLOOP */
2414         *pc = inst->InstructionExtLabel.Label + 1;
2415      }
2416      else {
2417         /* exit loop: pop LoopMask */
2418         assert(mach->LoopStackTop > 0);
2419         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2420         /* pop ContMask */
2421         assert(mach->ContStackTop > 0);
2422         mach->ContMask = mach->ContStack[--mach->ContStackTop];
2423      }
2424      UPDATE_EXEC_MASK(mach);
2425      break;
2426
2427   case TGSI_OPCODE_BRK:
2428      /* turn off loop channels for each enabled exec channel */
2429      mach->LoopMask &= ~mach->ExecMask;
2430      /* Todo: if mach->LoopMask == 0, jump to end of loop */
2431      UPDATE_EXEC_MASK(mach);
2432      break;
2433
2434   case TGSI_OPCODE_CONT:
2435      /* turn off cont channels for each enabled exec channel */
2436      mach->ContMask &= ~mach->ExecMask;
2437      /* Todo: if mach->LoopMask == 0, jump to end of loop */
2438      UPDATE_EXEC_MASK(mach);
2439      break;
2440
2441   case TGSI_OPCODE_BGNSUB:
2442      /* no-op */
2443      break;
2444
2445   case TGSI_OPCODE_ENDSUB:
2446      /* no-op */
2447      break;
2448
2449   case TGSI_OPCODE_NOISE1:
2450      assert( 0 );
2451      break;
2452
2453   case TGSI_OPCODE_NOISE2:
2454      assert( 0 );
2455      break;
2456
2457   case TGSI_OPCODE_NOISE3:
2458      assert( 0 );
2459      break;
2460
2461   case TGSI_OPCODE_NOISE4:
2462      assert( 0 );
2463      break;
2464
2465   case TGSI_OPCODE_NOP:
2466      break;
2467
2468   default:
2469      assert( 0 );
2470   }
2471}
2472
2473
2474/**
2475 * Run TGSI interpreter.
2476 * \return bitmask of "alive" quad components
2477 */
2478uint
2479tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2480{
2481   uint i;
2482   int pc = 0;
2483
2484   mach->CondMask = 0xf;
2485   mach->LoopMask = 0xf;
2486   mach->ContMask = 0xf;
2487   mach->FuncMask = 0xf;
2488   mach->ExecMask = 0xf;
2489
2490   mach->CondStackTop = 0; /* temporarily subvert this assertion */
2491   assert(mach->CondStackTop == 0);
2492   assert(mach->LoopStackTop == 0);
2493   assert(mach->ContStackTop == 0);
2494   assert(mach->CallStackTop == 0);
2495
2496   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2497   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2498
2499   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2500      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2501      mach->Primitives[0] = 0;
2502   }
2503
2504
2505   /* execute declarations (interpolants) */
2506   for (i = 0; i < mach->NumDeclarations; i++) {
2507      exec_declaration( mach, mach->Declarations+i );
2508   }
2509
2510   /* execute instructions, until pc is set to -1 */
2511   while (pc != -1) {
2512      assert(pc < (int) mach->NumInstructions);
2513      exec_instruction( mach, mach->Instructions + pc, &pc );
2514   }
2515
2516#if 0
2517   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2518   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2519      /*
2520       * Scale back depth component.
2521       */
2522      for (i = 0; i < 4; i++)
2523         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2524   }
2525#endif
2526
2527   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2528}
2529
2530
2531