tgsi_exec.c revision 321634d80b48e33b4e9572d99e82c45c65701dd1
1/**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers.  This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 *   Michal Krol
50 *   Brian Paul
51 */
52
53#include "pipe/p_compiler.h"
54#include "pipe/p_state.h"
55#include "pipe/p_shader_tokens.h"
56#include "tgsi/tgsi_parse.h"
57#include "tgsi/tgsi_util.h"
58#include "tgsi_exec.h"
59#include "util/u_memory.h"
60#include "util/u_math.h"
61
62#define FAST_MATH 1
63
64#define TILE_TOP_LEFT     0
65#define TILE_TOP_RIGHT    1
66#define TILE_BOTTOM_LEFT  2
67#define TILE_BOTTOM_RIGHT 3
68
69#define CHAN_X  0
70#define CHAN_Y  1
71#define CHAN_Z  2
72#define CHAN_W  3
73
74/*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77#define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
78#define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
79#define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
80#define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
81#define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
82#define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
83#define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
84#define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
85#define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
86#define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
87#define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
88#define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
89#define TEMP_128_I         TGSI_EXEC_TEMP_128_I
90#define TEMP_128_C         TGSI_EXEC_TEMP_128_C
91#define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
92#define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
93#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
94#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
95#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
96#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
97#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
98#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
99#define TEMP_CC_I          TGSI_EXEC_TEMP_CC_I
100#define TEMP_CC_C          TGSI_EXEC_TEMP_CC_C
101#define TEMP_3_I           TGSI_EXEC_TEMP_THREE_I
102#define TEMP_3_C           TGSI_EXEC_TEMP_THREE_C
103#define TEMP_HALF_I        TGSI_EXEC_TEMP_HALF_I
104#define TEMP_HALF_C        TGSI_EXEC_TEMP_HALF_C
105#define TEMP_R0            TGSI_EXEC_TEMP_R0
106
107#define IS_CHANNEL_ENABLED(INST, CHAN)\
108   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110#define IS_CHANNEL_ENABLED2(INST, CHAN)\
111   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115      if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119      if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122/** The execution mask depends on the conditional mask and the loop mask */
123#define UPDATE_EXEC_MASK(MACH) \
124      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126/**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131void
132tgsi_exec_machine_bind_shader(
133   struct tgsi_exec_machine *mach,
134   const struct tgsi_token *tokens,
135   uint numSamplers,
136   struct tgsi_sampler **samplers)
137{
138   uint k;
139   struct tgsi_parse_context parse;
140   struct tgsi_exec_labels *labels = &mach->Labels;
141   struct tgsi_full_instruction *instructions;
142   struct tgsi_full_declaration *declarations;
143   uint maxInstructions = 10, numInstructions = 0;
144   uint maxDeclarations = 10, numDeclarations = 0;
145   uint instno = 0;
146
147#if 0
148   tgsi_dump(tokens, 0);
149#endif
150
151   util_init_math();
152
153   mach->Tokens = tokens;
154   mach->Samplers = samplers;
155
156   k = tgsi_parse_init (&parse, mach->Tokens);
157   if (k != TGSI_PARSE_OK) {
158      debug_printf( "Problem parsing!\n" );
159      return;
160   }
161
162   mach->Processor = parse.FullHeader.Processor.Processor;
163   mach->ImmLimit = 0;
164   labels->count = 0;
165
166   declarations = (struct tgsi_full_declaration *)
167      MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169   if (!declarations) {
170      return;
171   }
172
173   instructions = (struct tgsi_full_instruction *)
174      MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176   if (!instructions) {
177      FREE( declarations );
178      return;
179   }
180
181   while( !tgsi_parse_end_of_tokens( &parse ) ) {
182      uint pointer = parse.Position;
183      uint i;
184
185      tgsi_parse_token( &parse );
186      switch( parse.FullToken.Token.Type ) {
187      case TGSI_TOKEN_TYPE_DECLARATION:
188         /* save expanded declaration */
189         if (numDeclarations == maxDeclarations) {
190            declarations = REALLOC(declarations,
191                                   maxDeclarations
192                                   * sizeof(struct tgsi_full_declaration),
193                                   (maxDeclarations + 10)
194                                   * sizeof(struct tgsi_full_declaration));
195            maxDeclarations += 10;
196         }
197         memcpy(declarations + numDeclarations,
198                &parse.FullToken.FullDeclaration,
199                sizeof(declarations[0]));
200         numDeclarations++;
201         break;
202
203      case TGSI_TOKEN_TYPE_IMMEDIATE:
204         {
205            uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
206            assert( size % 4 == 0 );
207            assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209            for( i = 0; i < size; i++ ) {
210               mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212            }
213            mach->ImmLimit += size / 4;
214         }
215         break;
216
217      case TGSI_TOKEN_TYPE_INSTRUCTION:
218         assert( labels->count < MAX_LABELS );
219
220         labels->labels[labels->count][0] = instno;
221         labels->labels[labels->count][1] = pointer;
222         labels->count++;
223
224         /* save expanded instruction */
225         if (numInstructions == maxInstructions) {
226            instructions = REALLOC(instructions,
227                                   maxInstructions
228                                   * sizeof(struct tgsi_full_instruction),
229                                   (maxInstructions + 10)
230                                   * sizeof(struct tgsi_full_instruction));
231            maxInstructions += 10;
232         }
233         memcpy(instructions + numInstructions,
234                &parse.FullToken.FullInstruction,
235                sizeof(instructions[0]));
236         numInstructions++;
237         break;
238
239      default:
240         assert( 0 );
241      }
242   }
243   tgsi_parse_free (&parse);
244
245   if (mach->Declarations) {
246      FREE( mach->Declarations );
247   }
248   mach->Declarations = declarations;
249   mach->NumDeclarations = numDeclarations;
250
251   if (mach->Instructions) {
252      FREE( mach->Instructions );
253   }
254   mach->Instructions = instructions;
255   mach->NumInstructions = numInstructions;
256}
257
258
259void
260tgsi_exec_machine_init(
261   struct tgsi_exec_machine *mach )
262{
263   uint i;
264
265   mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266   mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268   /* Setup constants. */
269   for( i = 0; i < 4; i++ ) {
270      mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271      mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272      mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273      mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274      mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275      mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276      mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277      mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278      mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279      mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280   }
281}
282
283
284void
285tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286{
287   if (mach->Instructions) {
288      FREE(mach->Instructions);
289      mach->Instructions = NULL;
290      mach->NumInstructions = 0;
291   }
292   if (mach->Declarations) {
293      FREE(mach->Declarations);
294      mach->Declarations = NULL;
295      mach->NumDeclarations = 0;
296   }
297}
298
299
300static void
301micro_abs(
302   union tgsi_exec_channel *dst,
303   const union tgsi_exec_channel *src )
304{
305   dst->f[0] = fabsf( src->f[0] );
306   dst->f[1] = fabsf( src->f[1] );
307   dst->f[2] = fabsf( src->f[2] );
308   dst->f[3] = fabsf( src->f[3] );
309}
310
311static void
312micro_add(
313   union tgsi_exec_channel *dst,
314   const union tgsi_exec_channel *src0,
315   const union tgsi_exec_channel *src1 )
316{
317   dst->f[0] = src0->f[0] + src1->f[0];
318   dst->f[1] = src0->f[1] + src1->f[1];
319   dst->f[2] = src0->f[2] + src1->f[2];
320   dst->f[3] = src0->f[3] + src1->f[3];
321}
322
323#if 0
324static void
325micro_iadd(
326   union tgsi_exec_channel *dst,
327   const union tgsi_exec_channel *src0,
328   const union tgsi_exec_channel *src1 )
329{
330   dst->i[0] = src0->i[0] + src1->i[0];
331   dst->i[1] = src0->i[1] + src1->i[1];
332   dst->i[2] = src0->i[2] + src1->i[2];
333   dst->i[3] = src0->i[3] + src1->i[3];
334}
335#endif
336
337static void
338micro_and(
339   union tgsi_exec_channel *dst,
340   const union tgsi_exec_channel *src0,
341   const union tgsi_exec_channel *src1 )
342{
343   dst->u[0] = src0->u[0] & src1->u[0];
344   dst->u[1] = src0->u[1] & src1->u[1];
345   dst->u[2] = src0->u[2] & src1->u[2];
346   dst->u[3] = src0->u[3] & src1->u[3];
347}
348
349static void
350micro_ceil(
351   union tgsi_exec_channel *dst,
352   const union tgsi_exec_channel *src )
353{
354   dst->f[0] = ceilf( src->f[0] );
355   dst->f[1] = ceilf( src->f[1] );
356   dst->f[2] = ceilf( src->f[2] );
357   dst->f[3] = ceilf( src->f[3] );
358}
359
360static void
361micro_cos(
362   union tgsi_exec_channel *dst,
363   const union tgsi_exec_channel *src )
364{
365   dst->f[0] = cosf( src->f[0] );
366   dst->f[1] = cosf( src->f[1] );
367   dst->f[2] = cosf( src->f[2] );
368   dst->f[3] = cosf( src->f[3] );
369}
370
371static void
372micro_ddx(
373   union tgsi_exec_channel *dst,
374   const union tgsi_exec_channel *src )
375{
376   dst->f[0] =
377   dst->f[1] =
378   dst->f[2] =
379   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
380}
381
382static void
383micro_ddy(
384   union tgsi_exec_channel *dst,
385   const union tgsi_exec_channel *src )
386{
387   dst->f[0] =
388   dst->f[1] =
389   dst->f[2] =
390   dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
391}
392
393static void
394micro_div(
395   union tgsi_exec_channel *dst,
396   const union tgsi_exec_channel *src0,
397   const union tgsi_exec_channel *src1 )
398{
399   if (src1->f[0] != 0) {
400      dst->f[0] = src0->f[0] / src1->f[0];
401   }
402   if (src1->f[1] != 0) {
403      dst->f[1] = src0->f[1] / src1->f[1];
404   }
405   if (src1->f[2] != 0) {
406      dst->f[2] = src0->f[2] / src1->f[2];
407   }
408   if (src1->f[3] != 0) {
409      dst->f[3] = src0->f[3] / src1->f[3];
410   }
411}
412
413#if 0
414static void
415micro_udiv(
416   union tgsi_exec_channel *dst,
417   const union tgsi_exec_channel *src0,
418   const union tgsi_exec_channel *src1 )
419{
420   dst->u[0] = src0->u[0] / src1->u[0];
421   dst->u[1] = src0->u[1] / src1->u[1];
422   dst->u[2] = src0->u[2] / src1->u[2];
423   dst->u[3] = src0->u[3] / src1->u[3];
424}
425#endif
426
427static void
428micro_eq(
429   union tgsi_exec_channel *dst,
430   const union tgsi_exec_channel *src0,
431   const union tgsi_exec_channel *src1,
432   const union tgsi_exec_channel *src2,
433   const union tgsi_exec_channel *src3 )
434{
435   dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
436   dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
437   dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
438   dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
439}
440
441#if 0
442static void
443micro_ieq(
444   union tgsi_exec_channel *dst,
445   const union tgsi_exec_channel *src0,
446   const union tgsi_exec_channel *src1,
447   const union tgsi_exec_channel *src2,
448   const union tgsi_exec_channel *src3 )
449{
450   dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
451   dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
452   dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
453   dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
454}
455#endif
456
457static void
458micro_exp2(
459   union tgsi_exec_channel *dst,
460   const union tgsi_exec_channel *src)
461{
462#if FAST_MATH
463   dst->f[0] = util_fast_exp2( src->f[0] );
464   dst->f[1] = util_fast_exp2( src->f[1] );
465   dst->f[2] = util_fast_exp2( src->f[2] );
466   dst->f[3] = util_fast_exp2( src->f[3] );
467#else
468   dst->f[0] = powf( 2.0f, src->f[0] );
469   dst->f[1] = powf( 2.0f, src->f[1] );
470   dst->f[2] = powf( 2.0f, src->f[2] );
471   dst->f[3] = powf( 2.0f, src->f[3] );
472#endif
473}
474
475#if 0
476static void
477micro_f2ut(
478   union tgsi_exec_channel *dst,
479   const union tgsi_exec_channel *src )
480{
481   dst->u[0] = (uint) src->f[0];
482   dst->u[1] = (uint) src->f[1];
483   dst->u[2] = (uint) src->f[2];
484   dst->u[3] = (uint) src->f[3];
485}
486#endif
487
488static void
489micro_float_clamp(union tgsi_exec_channel *dst,
490                  const union tgsi_exec_channel *src)
491{
492   uint i;
493
494   for (i = 0; i < 4; i++) {
495      if (src->f[i] > 0.0f) {
496         if (src->f[i] > 1.884467e+019f)
497            dst->f[i] = 1.884467e+019f;
498         else if (src->f[i] < 5.42101e-020f)
499            dst->f[i] = 5.42101e-020f;
500         else
501            dst->f[i] = src->f[i];
502      }
503      else {
504         if (src->f[i] < -1.884467e+019f)
505            dst->f[i] = -1.884467e+019f;
506         else if (src->f[i] > -5.42101e-020f)
507            dst->f[i] = -5.42101e-020f;
508         else
509            dst->f[i] = src->f[i];
510      }
511   }
512}
513
514static void
515micro_flr(
516   union tgsi_exec_channel *dst,
517   const union tgsi_exec_channel *src )
518{
519   dst->f[0] = floorf( src->f[0] );
520   dst->f[1] = floorf( src->f[1] );
521   dst->f[2] = floorf( src->f[2] );
522   dst->f[3] = floorf( src->f[3] );
523}
524
525static void
526micro_frc(
527   union tgsi_exec_channel *dst,
528   const union tgsi_exec_channel *src )
529{
530   dst->f[0] = src->f[0] - floorf( src->f[0] );
531   dst->f[1] = src->f[1] - floorf( src->f[1] );
532   dst->f[2] = src->f[2] - floorf( src->f[2] );
533   dst->f[3] = src->f[3] - floorf( src->f[3] );
534}
535
536static void
537micro_i2f(
538   union tgsi_exec_channel *dst,
539   const union tgsi_exec_channel *src )
540{
541   dst->f[0] = (float) src->i[0];
542   dst->f[1] = (float) src->i[1];
543   dst->f[2] = (float) src->i[2];
544   dst->f[3] = (float) src->i[3];
545}
546
547static void
548micro_lg2(
549   union tgsi_exec_channel *dst,
550   const union tgsi_exec_channel *src )
551{
552#if FAST_MATH
553   dst->f[0] = util_fast_log2( src->f[0] );
554   dst->f[1] = util_fast_log2( src->f[1] );
555   dst->f[2] = util_fast_log2( src->f[2] );
556   dst->f[3] = util_fast_log2( src->f[3] );
557#else
558   dst->f[0] = logf( src->f[0] ) * 1.442695f;
559   dst->f[1] = logf( src->f[1] ) * 1.442695f;
560   dst->f[2] = logf( src->f[2] ) * 1.442695f;
561   dst->f[3] = logf( src->f[3] ) * 1.442695f;
562#endif
563}
564
565static void
566micro_le(
567   union tgsi_exec_channel *dst,
568   const union tgsi_exec_channel *src0,
569   const union tgsi_exec_channel *src1,
570   const union tgsi_exec_channel *src2,
571   const union tgsi_exec_channel *src3 )
572{
573   dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
574   dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
575   dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
576   dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
577}
578
579static void
580micro_lt(
581   union tgsi_exec_channel *dst,
582   const union tgsi_exec_channel *src0,
583   const union tgsi_exec_channel *src1,
584   const union tgsi_exec_channel *src2,
585   const union tgsi_exec_channel *src3 )
586{
587   dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
588   dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
589   dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
590   dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
591}
592
593#if 0
594static void
595micro_ilt(
596   union tgsi_exec_channel *dst,
597   const union tgsi_exec_channel *src0,
598   const union tgsi_exec_channel *src1,
599   const union tgsi_exec_channel *src2,
600   const union tgsi_exec_channel *src3 )
601{
602   dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
603   dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
604   dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
605   dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
606}
607#endif
608
609#if 0
610static void
611micro_ult(
612   union tgsi_exec_channel *dst,
613   const union tgsi_exec_channel *src0,
614   const union tgsi_exec_channel *src1,
615   const union tgsi_exec_channel *src2,
616   const union tgsi_exec_channel *src3 )
617{
618   dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
619   dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
620   dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
621   dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
622}
623#endif
624
625static void
626micro_max(
627   union tgsi_exec_channel *dst,
628   const union tgsi_exec_channel *src0,
629   const union tgsi_exec_channel *src1 )
630{
631   dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
632   dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
633   dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
634   dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
635}
636
637#if 0
638static void
639micro_imax(
640   union tgsi_exec_channel *dst,
641   const union tgsi_exec_channel *src0,
642   const union tgsi_exec_channel *src1 )
643{
644   dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
645   dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
646   dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
647   dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
648}
649#endif
650
651#if 0
652static void
653micro_umax(
654   union tgsi_exec_channel *dst,
655   const union tgsi_exec_channel *src0,
656   const union tgsi_exec_channel *src1 )
657{
658   dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
659   dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
660   dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
661   dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
662}
663#endif
664
665static void
666micro_min(
667   union tgsi_exec_channel *dst,
668   const union tgsi_exec_channel *src0,
669   const union tgsi_exec_channel *src1 )
670{
671   dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
672   dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
673   dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
674   dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
675}
676
677#if 0
678static void
679micro_imin(
680   union tgsi_exec_channel *dst,
681   const union tgsi_exec_channel *src0,
682   const union tgsi_exec_channel *src1 )
683{
684   dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
685   dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
686   dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
687   dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
688}
689#endif
690
691#if 0
692static void
693micro_umin(
694   union tgsi_exec_channel *dst,
695   const union tgsi_exec_channel *src0,
696   const union tgsi_exec_channel *src1 )
697{
698   dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
699   dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
700   dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
701   dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
702}
703#endif
704
705#if 0
706static void
707micro_umod(
708   union tgsi_exec_channel *dst,
709   const union tgsi_exec_channel *src0,
710   const union tgsi_exec_channel *src1 )
711{
712   dst->u[0] = src0->u[0] % src1->u[0];
713   dst->u[1] = src0->u[1] % src1->u[1];
714   dst->u[2] = src0->u[2] % src1->u[2];
715   dst->u[3] = src0->u[3] % src1->u[3];
716}
717#endif
718
719static void
720micro_mul(
721   union tgsi_exec_channel *dst,
722   const union tgsi_exec_channel *src0,
723   const union tgsi_exec_channel *src1 )
724{
725   dst->f[0] = src0->f[0] * src1->f[0];
726   dst->f[1] = src0->f[1] * src1->f[1];
727   dst->f[2] = src0->f[2] * src1->f[2];
728   dst->f[3] = src0->f[3] * src1->f[3];
729}
730
731#if 0
732static void
733micro_imul(
734   union tgsi_exec_channel *dst,
735   const union tgsi_exec_channel *src0,
736   const union tgsi_exec_channel *src1 )
737{
738   dst->i[0] = src0->i[0] * src1->i[0];
739   dst->i[1] = src0->i[1] * src1->i[1];
740   dst->i[2] = src0->i[2] * src1->i[2];
741   dst->i[3] = src0->i[3] * src1->i[3];
742}
743#endif
744
745#if 0
746static void
747micro_imul64(
748   union tgsi_exec_channel *dst0,
749   union tgsi_exec_channel *dst1,
750   const union tgsi_exec_channel *src0,
751   const union tgsi_exec_channel *src1 )
752{
753   dst1->i[0] = src0->i[0] * src1->i[0];
754   dst1->i[1] = src0->i[1] * src1->i[1];
755   dst1->i[2] = src0->i[2] * src1->i[2];
756   dst1->i[3] = src0->i[3] * src1->i[3];
757   dst0->i[0] = 0;
758   dst0->i[1] = 0;
759   dst0->i[2] = 0;
760   dst0->i[3] = 0;
761}
762#endif
763
764#if 0
765static void
766micro_umul64(
767   union tgsi_exec_channel *dst0,
768   union tgsi_exec_channel *dst1,
769   const union tgsi_exec_channel *src0,
770   const union tgsi_exec_channel *src1 )
771{
772   dst1->u[0] = src0->u[0] * src1->u[0];
773   dst1->u[1] = src0->u[1] * src1->u[1];
774   dst1->u[2] = src0->u[2] * src1->u[2];
775   dst1->u[3] = src0->u[3] * src1->u[3];
776   dst0->u[0] = 0;
777   dst0->u[1] = 0;
778   dst0->u[2] = 0;
779   dst0->u[3] = 0;
780}
781#endif
782
783
784#if 0
785static void
786micro_movc(
787   union tgsi_exec_channel *dst,
788   const union tgsi_exec_channel *src0,
789   const union tgsi_exec_channel *src1,
790   const union tgsi_exec_channel *src2 )
791{
792   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
793   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
794   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
795   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
796}
797#endif
798
799static void
800micro_neg(
801   union tgsi_exec_channel *dst,
802   const union tgsi_exec_channel *src )
803{
804   dst->f[0] = -src->f[0];
805   dst->f[1] = -src->f[1];
806   dst->f[2] = -src->f[2];
807   dst->f[3] = -src->f[3];
808}
809
810#if 0
811static void
812micro_ineg(
813   union tgsi_exec_channel *dst,
814   const union tgsi_exec_channel *src )
815{
816   dst->i[0] = -src->i[0];
817   dst->i[1] = -src->i[1];
818   dst->i[2] = -src->i[2];
819   dst->i[3] = -src->i[3];
820}
821#endif
822
823static void
824micro_not(
825   union tgsi_exec_channel *dst,
826   const union tgsi_exec_channel *src )
827{
828   dst->u[0] = ~src->u[0];
829   dst->u[1] = ~src->u[1];
830   dst->u[2] = ~src->u[2];
831   dst->u[3] = ~src->u[3];
832}
833
834static void
835micro_or(
836   union tgsi_exec_channel *dst,
837   const union tgsi_exec_channel *src0,
838   const union tgsi_exec_channel *src1 )
839{
840   dst->u[0] = src0->u[0] | src1->u[0];
841   dst->u[1] = src0->u[1] | src1->u[1];
842   dst->u[2] = src0->u[2] | src1->u[2];
843   dst->u[3] = src0->u[3] | src1->u[3];
844}
845
846static void
847micro_pow(
848   union tgsi_exec_channel *dst,
849   const union tgsi_exec_channel *src0,
850   const union tgsi_exec_channel *src1 )
851{
852#if FAST_MATH
853   dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
854   dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
855   dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
856   dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
857#else
858   dst->f[0] = powf( src0->f[0], src1->f[0] );
859   dst->f[1] = powf( src0->f[1], src1->f[1] );
860   dst->f[2] = powf( src0->f[2], src1->f[2] );
861   dst->f[3] = powf( src0->f[3], src1->f[3] );
862#endif
863}
864
865static void
866micro_rnd(
867   union tgsi_exec_channel *dst,
868   const union tgsi_exec_channel *src )
869{
870   dst->f[0] = floorf( src->f[0] + 0.5f );
871   dst->f[1] = floorf( src->f[1] + 0.5f );
872   dst->f[2] = floorf( src->f[2] + 0.5f );
873   dst->f[3] = floorf( src->f[3] + 0.5f );
874}
875
876static void
877micro_sgn(
878   union tgsi_exec_channel *dst,
879   const union tgsi_exec_channel *src )
880{
881   dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
882   dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
883   dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
884   dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
885}
886
887static void
888micro_shl(
889   union tgsi_exec_channel *dst,
890   const union tgsi_exec_channel *src0,
891   const union tgsi_exec_channel *src1 )
892{
893   dst->i[0] = src0->i[0] << src1->i[0];
894   dst->i[1] = src0->i[1] << src1->i[1];
895   dst->i[2] = src0->i[2] << src1->i[2];
896   dst->i[3] = src0->i[3] << src1->i[3];
897}
898
899static void
900micro_ishr(
901   union tgsi_exec_channel *dst,
902   const union tgsi_exec_channel *src0,
903   const union tgsi_exec_channel *src1 )
904{
905   dst->i[0] = src0->i[0] >> src1->i[0];
906   dst->i[1] = src0->i[1] >> src1->i[1];
907   dst->i[2] = src0->i[2] >> src1->i[2];
908   dst->i[3] = src0->i[3] >> src1->i[3];
909}
910
911static void
912micro_trunc(
913   union tgsi_exec_channel *dst,
914   const union tgsi_exec_channel *src0 )
915{
916   dst->f[0] = (float) (int) src0->f[0];
917   dst->f[1] = (float) (int) src0->f[1];
918   dst->f[2] = (float) (int) src0->f[2];
919   dst->f[3] = (float) (int) src0->f[3];
920}
921
922#if 0
923static void
924micro_ushr(
925   union tgsi_exec_channel *dst,
926   const union tgsi_exec_channel *src0,
927   const union tgsi_exec_channel *src1 )
928{
929   dst->u[0] = src0->u[0] >> src1->u[0];
930   dst->u[1] = src0->u[1] >> src1->u[1];
931   dst->u[2] = src0->u[2] >> src1->u[2];
932   dst->u[3] = src0->u[3] >> src1->u[3];
933}
934#endif
935
936static void
937micro_sin(
938   union tgsi_exec_channel *dst,
939   const union tgsi_exec_channel *src )
940{
941   dst->f[0] = sinf( src->f[0] );
942   dst->f[1] = sinf( src->f[1] );
943   dst->f[2] = sinf( src->f[2] );
944   dst->f[3] = sinf( src->f[3] );
945}
946
947static void
948micro_sqrt( union tgsi_exec_channel *dst,
949            const union tgsi_exec_channel *src )
950{
951   dst->f[0] = sqrtf( src->f[0] );
952   dst->f[1] = sqrtf( src->f[1] );
953   dst->f[2] = sqrtf( src->f[2] );
954   dst->f[3] = sqrtf( src->f[3] );
955}
956
957static void
958micro_sub(
959   union tgsi_exec_channel *dst,
960   const union tgsi_exec_channel *src0,
961   const union tgsi_exec_channel *src1 )
962{
963   dst->f[0] = src0->f[0] - src1->f[0];
964   dst->f[1] = src0->f[1] - src1->f[1];
965   dst->f[2] = src0->f[2] - src1->f[2];
966   dst->f[3] = src0->f[3] - src1->f[3];
967}
968
969#if 0
970static void
971micro_u2f(
972   union tgsi_exec_channel *dst,
973   const union tgsi_exec_channel *src )
974{
975   dst->f[0] = (float) src->u[0];
976   dst->f[1] = (float) src->u[1];
977   dst->f[2] = (float) src->u[2];
978   dst->f[3] = (float) src->u[3];
979}
980#endif
981
982static void
983micro_xor(
984   union tgsi_exec_channel *dst,
985   const union tgsi_exec_channel *src0,
986   const union tgsi_exec_channel *src1 )
987{
988   dst->u[0] = src0->u[0] ^ src1->u[0];
989   dst->u[1] = src0->u[1] ^ src1->u[1];
990   dst->u[2] = src0->u[2] ^ src1->u[2];
991   dst->u[3] = src0->u[3] ^ src1->u[3];
992}
993
994static void
995fetch_src_file_channel(
996   const struct tgsi_exec_machine *mach,
997   const uint file,
998   const uint swizzle,
999   const union tgsi_exec_channel *index,
1000   union tgsi_exec_channel *chan )
1001{
1002   switch( swizzle ) {
1003   case TGSI_EXTSWIZZLE_X:
1004   case TGSI_EXTSWIZZLE_Y:
1005   case TGSI_EXTSWIZZLE_Z:
1006   case TGSI_EXTSWIZZLE_W:
1007      switch( file ) {
1008      case TGSI_FILE_CONSTANT:
1009         assert(mach->Consts);
1010         if (index->i[0] < 0)
1011            chan->f[0] = 0.0f;
1012         else
1013            chan->f[0] = mach->Consts[index->i[0]][swizzle];
1014         if (index->i[1] < 0)
1015            chan->f[1] = 0.0f;
1016         else
1017            chan->f[1] = mach->Consts[index->i[1]][swizzle];
1018         if (index->i[2] < 0)
1019            chan->f[2] = 0.0f;
1020         else
1021            chan->f[2] = mach->Consts[index->i[2]][swizzle];
1022         if (index->i[3] < 0)
1023            chan->f[3] = 0.0f;
1024         else
1025            chan->f[3] = mach->Consts[index->i[3]][swizzle];
1026         break;
1027
1028      case TGSI_FILE_INPUT:
1029         chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1030         chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1031         chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1032         chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1033         break;
1034
1035      case TGSI_FILE_TEMPORARY:
1036         assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1037         chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1038         chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1039         chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1040         chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1041         break;
1042
1043      case TGSI_FILE_IMMEDIATE:
1044         assert( index->i[0] < (int) mach->ImmLimit );
1045         chan->f[0] = mach->Imms[index->i[0]][swizzle];
1046         assert( index->i[1] < (int) mach->ImmLimit );
1047         chan->f[1] = mach->Imms[index->i[1]][swizzle];
1048         assert( index->i[2] < (int) mach->ImmLimit );
1049         chan->f[2] = mach->Imms[index->i[2]][swizzle];
1050         assert( index->i[3] < (int) mach->ImmLimit );
1051         chan->f[3] = mach->Imms[index->i[3]][swizzle];
1052         break;
1053
1054      case TGSI_FILE_ADDRESS:
1055         chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1056         chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1057         chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1058         chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1059         break;
1060
1061      case TGSI_FILE_OUTPUT:
1062         /* vertex/fragment output vars can be read too */
1063         chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1064         chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1065         chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1066         chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1067         break;
1068
1069      default:
1070         assert( 0 );
1071      }
1072      break;
1073
1074   case TGSI_EXTSWIZZLE_ZERO:
1075      *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1076      break;
1077
1078   case TGSI_EXTSWIZZLE_ONE:
1079      *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1080      break;
1081
1082   default:
1083      assert( 0 );
1084   }
1085}
1086
1087static void
1088fetch_source(
1089   const struct tgsi_exec_machine *mach,
1090   union tgsi_exec_channel *chan,
1091   const struct tgsi_full_src_register *reg,
1092   const uint chan_index )
1093{
1094   union tgsi_exec_channel index;
1095   uint swizzle;
1096
1097   /* We start with a direct index into a register file.
1098    *
1099    *    file[1],
1100    *    where:
1101    *       file = SrcRegister.File
1102    *       [1] = SrcRegister.Index
1103    */
1104   index.i[0] =
1105   index.i[1] =
1106   index.i[2] =
1107   index.i[3] = reg->SrcRegister.Index;
1108
1109   /* There is an extra source register that indirectly subscripts
1110    * a register file. The direct index now becomes an offset
1111    * that is being added to the indirect register.
1112    *
1113    *    file[ind[2].x+1],
1114    *    where:
1115    *       ind = SrcRegisterInd.File
1116    *       [2] = SrcRegisterInd.Index
1117    *       .x = SrcRegisterInd.SwizzleX
1118    */
1119   if (reg->SrcRegister.Indirect) {
1120      union tgsi_exec_channel index2;
1121      union tgsi_exec_channel indir_index;
1122      const uint execmask = mach->ExecMask;
1123      uint i;
1124
1125      /* which address register (always zero now) */
1126      index2.i[0] =
1127      index2.i[1] =
1128      index2.i[2] =
1129      index2.i[3] = reg->SrcRegisterInd.Index;
1130
1131      /* get current value of address register[swizzle] */
1132      swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1133      fetch_src_file_channel(
1134         mach,
1135         reg->SrcRegisterInd.File,
1136         swizzle,
1137         &index2,
1138         &indir_index );
1139
1140      /* add value of address register to the offset */
1141      index.i[0] += (int) indir_index.f[0];
1142      index.i[1] += (int) indir_index.f[1];
1143      index.i[2] += (int) indir_index.f[2];
1144      index.i[3] += (int) indir_index.f[3];
1145
1146      /* for disabled execution channels, zero-out the index to
1147       * avoid using a potential garbage value.
1148       */
1149      for (i = 0; i < QUAD_SIZE; i++) {
1150         if ((execmask & (1 << i)) == 0)
1151            index.i[i] = 0;
1152      }
1153   }
1154
1155   /* There is an extra source register that is a second
1156    * subscript to a register file. Effectively it means that
1157    * the register file is actually a 2D array of registers.
1158    *
1159    *    file[1][3] == file[1*sizeof(file[1])+3],
1160    *    where:
1161    *       [3] = SrcRegisterDim.Index
1162    */
1163   if (reg->SrcRegister.Dimension) {
1164      /* The size of the first-order array depends on the register file type.
1165       * We need to multiply the index to the first array to get an effective,
1166       * "flat" index that points to the beginning of the second-order array.
1167       */
1168      switch (reg->SrcRegister.File) {
1169      case TGSI_FILE_INPUT:
1170         index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1171         index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1172         index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1173         index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1174         break;
1175      case TGSI_FILE_CONSTANT:
1176         index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1177         index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1178         index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1179         index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1180         break;
1181      default:
1182         assert( 0 );
1183      }
1184
1185      index.i[0] += reg->SrcRegisterDim.Index;
1186      index.i[1] += reg->SrcRegisterDim.Index;
1187      index.i[2] += reg->SrcRegisterDim.Index;
1188      index.i[3] += reg->SrcRegisterDim.Index;
1189
1190      /* Again, the second subscript index can be addressed indirectly
1191       * identically to the first one.
1192       * Nothing stops us from indirectly addressing the indirect register,
1193       * but there is no need for that, so we won't exercise it.
1194       *
1195       *    file[1][ind[4].y+3],
1196       *    where:
1197       *       ind = SrcRegisterDimInd.File
1198       *       [4] = SrcRegisterDimInd.Index
1199       *       .y = SrcRegisterDimInd.SwizzleX
1200       */
1201      if (reg->SrcRegisterDim.Indirect) {
1202         union tgsi_exec_channel index2;
1203         union tgsi_exec_channel indir_index;
1204         const uint execmask = mach->ExecMask;
1205         uint i;
1206
1207         index2.i[0] =
1208         index2.i[1] =
1209         index2.i[2] =
1210         index2.i[3] = reg->SrcRegisterDimInd.Index;
1211
1212         swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1213         fetch_src_file_channel(
1214            mach,
1215            reg->SrcRegisterDimInd.File,
1216            swizzle,
1217            &index2,
1218            &indir_index );
1219
1220         index.i[0] += (int) indir_index.f[0];
1221         index.i[1] += (int) indir_index.f[1];
1222         index.i[2] += (int) indir_index.f[2];
1223         index.i[3] += (int) indir_index.f[3];
1224
1225         /* for disabled execution channels, zero-out the index to
1226          * avoid using a potential garbage value.
1227          */
1228         for (i = 0; i < QUAD_SIZE; i++) {
1229            if ((execmask & (1 << i)) == 0)
1230               index.i[i] = 0;
1231         }
1232      }
1233
1234      /* If by any chance there was a need for a 3D array of register
1235       * files, we would have to check whether SrcRegisterDim is followed
1236       * by a dimension register and continue the saga.
1237       */
1238   }
1239
1240   swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1241   fetch_src_file_channel(
1242      mach,
1243      reg->SrcRegister.File,
1244      swizzle,
1245      &index,
1246      chan );
1247
1248   switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1249   case TGSI_UTIL_SIGN_CLEAR:
1250      micro_abs( chan, chan );
1251      break;
1252
1253   case TGSI_UTIL_SIGN_SET:
1254      micro_abs( chan, chan );
1255      micro_neg( chan, chan );
1256      break;
1257
1258   case TGSI_UTIL_SIGN_TOGGLE:
1259      micro_neg( chan, chan );
1260      break;
1261
1262   case TGSI_UTIL_SIGN_KEEP:
1263      break;
1264   }
1265
1266   if (reg->SrcRegisterExtMod.Complement) {
1267      micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1268   }
1269}
1270
1271static void
1272store_dest(
1273   struct tgsi_exec_machine *mach,
1274   const union tgsi_exec_channel *chan,
1275   const struct tgsi_full_dst_register *reg,
1276   const struct tgsi_full_instruction *inst,
1277   uint chan_index )
1278{
1279   uint i;
1280   union tgsi_exec_channel null;
1281   union tgsi_exec_channel *dst;
1282   uint execmask = mach->ExecMask;
1283
1284   switch (reg->DstRegister.File) {
1285   case TGSI_FILE_NULL:
1286      dst = &null;
1287      break;
1288
1289   case TGSI_FILE_OUTPUT:
1290      dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1291                           + reg->DstRegister.Index].xyzw[chan_index];
1292      break;
1293
1294   case TGSI_FILE_TEMPORARY:
1295      assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1296      dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1297      break;
1298
1299   case TGSI_FILE_ADDRESS:
1300      dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1301      break;
1302
1303   default:
1304      assert( 0 );
1305      return;
1306   }
1307
1308   if (inst->InstructionExtNv.CondFlowEnable) {
1309      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1310      uint swizzle;
1311      uint shift;
1312      uint mask;
1313      uint test;
1314
1315      /* Only CC0 supported.
1316       */
1317      assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1318
1319      switch (chan_index) {
1320      case CHAN_X:
1321         swizzle = inst->InstructionExtNv.CondSwizzleX;
1322         break;
1323      case CHAN_Y:
1324         swizzle = inst->InstructionExtNv.CondSwizzleY;
1325         break;
1326      case CHAN_Z:
1327         swizzle = inst->InstructionExtNv.CondSwizzleZ;
1328         break;
1329      case CHAN_W:
1330         swizzle = inst->InstructionExtNv.CondSwizzleW;
1331         break;
1332      default:
1333         assert( 0 );
1334         return;
1335      }
1336
1337      switch (swizzle) {
1338      case TGSI_SWIZZLE_X:
1339         shift = TGSI_EXEC_CC_X_SHIFT;
1340         mask = TGSI_EXEC_CC_X_MASK;
1341         break;
1342      case TGSI_SWIZZLE_Y:
1343         shift = TGSI_EXEC_CC_Y_SHIFT;
1344         mask = TGSI_EXEC_CC_Y_MASK;
1345         break;
1346      case TGSI_SWIZZLE_Z:
1347         shift = TGSI_EXEC_CC_Z_SHIFT;
1348         mask = TGSI_EXEC_CC_Z_MASK;
1349         break;
1350      case TGSI_SWIZZLE_W:
1351         shift = TGSI_EXEC_CC_W_SHIFT;
1352         mask = TGSI_EXEC_CC_W_MASK;
1353         break;
1354      default:
1355         assert( 0 );
1356         return;
1357      }
1358
1359      switch (inst->InstructionExtNv.CondMask) {
1360      case TGSI_CC_GT:
1361         test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1362         for (i = 0; i < QUAD_SIZE; i++)
1363            if (cc->u[i] & test)
1364               execmask &= ~(1 << i);
1365         break;
1366
1367      case TGSI_CC_EQ:
1368         test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1369         for (i = 0; i < QUAD_SIZE; i++)
1370            if (cc->u[i] & test)
1371               execmask &= ~(1 << i);
1372         break;
1373
1374      case TGSI_CC_LT:
1375         test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1376         for (i = 0; i < QUAD_SIZE; i++)
1377            if (cc->u[i] & test)
1378               execmask &= ~(1 << i);
1379         break;
1380
1381      case TGSI_CC_GE:
1382         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1383         for (i = 0; i < QUAD_SIZE; i++)
1384            if (cc->u[i] & test)
1385               execmask &= ~(1 << i);
1386         break;
1387
1388      case TGSI_CC_LE:
1389         test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1390         for (i = 0; i < QUAD_SIZE; i++)
1391            if (cc->u[i] & test)
1392               execmask &= ~(1 << i);
1393         break;
1394
1395      case TGSI_CC_NE:
1396         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1397         for (i = 0; i < QUAD_SIZE; i++)
1398            if (cc->u[i] & test)
1399               execmask &= ~(1 << i);
1400         break;
1401
1402      case TGSI_CC_TR:
1403         break;
1404
1405      case TGSI_CC_FL:
1406         for (i = 0; i < QUAD_SIZE; i++)
1407            execmask &= ~(1 << i);
1408         break;
1409
1410      default:
1411         assert( 0 );
1412         return;
1413      }
1414   }
1415
1416   switch (inst->Instruction.Saturate) {
1417   case TGSI_SAT_NONE:
1418      for (i = 0; i < QUAD_SIZE; i++)
1419         if (execmask & (1 << i))
1420            dst->i[i] = chan->i[i];
1421      break;
1422
1423   case TGSI_SAT_ZERO_ONE:
1424      for (i = 0; i < QUAD_SIZE; i++)
1425         if (execmask & (1 << i)) {
1426            if (chan->f[i] < 0.0f)
1427               dst->f[i] = 0.0f;
1428            else if (chan->f[i] > 1.0f)
1429               dst->f[i] = 1.0f;
1430            else
1431               dst->i[i] = chan->i[i];
1432         }
1433      break;
1434
1435   case TGSI_SAT_MINUS_PLUS_ONE:
1436      for (i = 0; i < QUAD_SIZE; i++)
1437         if (execmask & (1 << i)) {
1438            if (chan->f[i] < -1.0f)
1439               dst->f[i] = -1.0f;
1440            else if (chan->f[i] > 1.0f)
1441               dst->f[i] = 1.0f;
1442            else
1443               dst->i[i] = chan->i[i];
1444         }
1445      break;
1446
1447   default:
1448      assert( 0 );
1449   }
1450
1451   if (inst->InstructionExtNv.CondDstUpdate) {
1452      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1453      uint shift;
1454      uint mask;
1455
1456      /* Only CC0 supported.
1457       */
1458      assert( inst->InstructionExtNv.CondDstIndex < 1 );
1459
1460      switch (chan_index) {
1461      case CHAN_X:
1462         shift = TGSI_EXEC_CC_X_SHIFT;
1463         mask = ~TGSI_EXEC_CC_X_MASK;
1464         break;
1465      case CHAN_Y:
1466         shift = TGSI_EXEC_CC_Y_SHIFT;
1467         mask = ~TGSI_EXEC_CC_Y_MASK;
1468         break;
1469      case CHAN_Z:
1470         shift = TGSI_EXEC_CC_Z_SHIFT;
1471         mask = ~TGSI_EXEC_CC_Z_MASK;
1472         break;
1473      case CHAN_W:
1474         shift = TGSI_EXEC_CC_W_SHIFT;
1475         mask = ~TGSI_EXEC_CC_W_MASK;
1476         break;
1477      default:
1478         assert( 0 );
1479         return;
1480      }
1481
1482      for (i = 0; i < QUAD_SIZE; i++)
1483         if (execmask & (1 << i)) {
1484            cc->u[i] &= mask;
1485            if (dst->f[i] < 0.0f)
1486               cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1487            else if (dst->f[i] > 0.0f)
1488               cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1489            else if (dst->f[i] == 0.0f)
1490               cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1491            else
1492               cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1493         }
1494   }
1495}
1496
1497#define FETCH(VAL,INDEX,CHAN)\
1498    fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1499
1500#define STORE(VAL,INDEX,CHAN)\
1501    store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1502
1503
1504/**
1505 * Execute ARB-style KIL which is predicated by a src register.
1506 * Kill fragment if any of the four values is less than zero.
1507 */
1508static void
1509exec_kil(struct tgsi_exec_machine *mach,
1510         const struct tgsi_full_instruction *inst)
1511{
1512   uint uniquemask;
1513   uint chan_index;
1514   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1515   union tgsi_exec_channel r[1];
1516
1517   /* This mask stores component bits that were already tested. Note that
1518    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1519    * tested. */
1520   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1521
1522   for (chan_index = 0; chan_index < 4; chan_index++)
1523   {
1524      uint swizzle;
1525      uint i;
1526
1527      /* unswizzle channel */
1528      swizzle = tgsi_util_get_full_src_register_extswizzle (
1529                        &inst->FullSrcRegisters[0],
1530                        chan_index);
1531
1532      /* check if the component has not been already tested */
1533      if (uniquemask & (1 << swizzle))
1534         continue;
1535      uniquemask |= 1 << swizzle;
1536
1537      FETCH(&r[0], 0, chan_index);
1538      for (i = 0; i < 4; i++)
1539         if (r[0].f[i] < 0.0f)
1540            kilmask |= 1 << i;
1541   }
1542
1543   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1544}
1545
1546/**
1547 * Execute NVIDIA-style KIL which is predicated by a condition code.
1548 * Kill fragment if the condition code is TRUE.
1549 */
1550static void
1551exec_kilp(struct tgsi_exec_machine *mach,
1552          const struct tgsi_full_instruction *inst)
1553{
1554   uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1555
1556   if (inst->InstructionExtNv.CondFlowEnable) {
1557      uint swizzle[4];
1558      uint chan_index;
1559
1560      kilmask = 0x0;
1561
1562      swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1563      swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1564      swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1565      swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1566
1567      for (chan_index = 0; chan_index < 4; chan_index++)
1568      {
1569         uint i;
1570
1571         for (i = 0; i < 4; i++) {
1572            /* TODO: evaluate the condition code */
1573            if (0)
1574               kilmask |= 1 << i;
1575         }
1576      }
1577   }
1578   else {
1579      /* "unconditional" kil */
1580      kilmask = mach->ExecMask;
1581   }
1582   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1583}
1584
1585
1586/*
1587 * Fetch a four texture samples using STR texture coordinates.
1588 */
1589static void
1590fetch_texel( struct tgsi_sampler *sampler,
1591             const union tgsi_exec_channel *s,
1592             const union tgsi_exec_channel *t,
1593             const union tgsi_exec_channel *p,
1594             float lodbias,  /* XXX should be float[4] */
1595             union tgsi_exec_channel *r,
1596             union tgsi_exec_channel *g,
1597             union tgsi_exec_channel *b,
1598             union tgsi_exec_channel *a )
1599{
1600   uint j;
1601   float rgba[NUM_CHANNELS][QUAD_SIZE];
1602
1603   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1604
1605   for (j = 0; j < 4; j++) {
1606      r->f[j] = rgba[0][j];
1607      g->f[j] = rgba[1][j];
1608      b->f[j] = rgba[2][j];
1609      a->f[j] = rgba[3][j];
1610   }
1611}
1612
1613
1614static void
1615exec_tex(struct tgsi_exec_machine *mach,
1616         const struct tgsi_full_instruction *inst,
1617         boolean biasLod,
1618         boolean projected)
1619{
1620   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1621   union tgsi_exec_channel r[4];
1622   uint chan_index;
1623   float lodBias;
1624
1625   /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
1626
1627   switch (inst->InstructionExtTexture.Texture) {
1628   case TGSI_TEXTURE_1D:
1629   case TGSI_TEXTURE_SHADOW1D:
1630
1631      FETCH(&r[0], 0, CHAN_X);
1632
1633      if (projected) {
1634         FETCH(&r[1], 0, CHAN_W);
1635         micro_div( &r[0], &r[0], &r[1] );
1636      }
1637
1638      if (biasLod) {
1639         FETCH(&r[1], 0, CHAN_W);
1640         lodBias = r[2].f[0];
1641      }
1642      else
1643         lodBias = 0.0;
1644
1645      fetch_texel(mach->Samplers[unit],
1646                  &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */
1647                  &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1648      break;
1649
1650   case TGSI_TEXTURE_2D:
1651   case TGSI_TEXTURE_RECT:
1652   case TGSI_TEXTURE_SHADOW2D:
1653   case TGSI_TEXTURE_SHADOWRECT:
1654
1655      FETCH(&r[0], 0, CHAN_X);
1656      FETCH(&r[1], 0, CHAN_Y);
1657      FETCH(&r[2], 0, CHAN_Z);
1658
1659      if (projected) {
1660         FETCH(&r[3], 0, CHAN_W);
1661         micro_div( &r[0], &r[0], &r[3] );
1662         micro_div( &r[1], &r[1], &r[3] );
1663         micro_div( &r[2], &r[2], &r[3] );
1664      }
1665
1666      if (biasLod) {
1667         FETCH(&r[3], 0, CHAN_W);
1668         lodBias = r[3].f[0];
1669      }
1670      else
1671         lodBias = 0.0;
1672
1673      fetch_texel(mach->Samplers[unit],
1674                  &r[0], &r[1], &r[2], lodBias,  /* inputs */
1675                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */
1676      break;
1677
1678   case TGSI_TEXTURE_3D:
1679   case TGSI_TEXTURE_CUBE:
1680
1681      FETCH(&r[0], 0, CHAN_X);
1682      FETCH(&r[1], 0, CHAN_Y);
1683      FETCH(&r[2], 0, CHAN_Z);
1684
1685      if (projected) {
1686         FETCH(&r[3], 0, CHAN_W);
1687         micro_div( &r[0], &r[0], &r[3] );
1688         micro_div( &r[1], &r[1], &r[3] );
1689         micro_div( &r[2], &r[2], &r[3] );
1690      }
1691
1692      if (biasLod) {
1693         FETCH(&r[3], 0, CHAN_W);
1694         lodBias = r[3].f[0];
1695      }
1696      else
1697         lodBias = 0.0;
1698
1699      fetch_texel(mach->Samplers[unit],
1700                  &r[0], &r[1], &r[2], lodBias,
1701                  &r[0], &r[1], &r[2], &r[3]);
1702      break;
1703
1704   default:
1705      assert (0);
1706   }
1707
1708   FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1709      STORE( &r[chan_index], 0, chan_index );
1710   }
1711}
1712
1713
1714/**
1715 * Evaluate a constant-valued coefficient at the position of the
1716 * current quad.
1717 */
1718static void
1719eval_constant_coef(
1720   struct tgsi_exec_machine *mach,
1721   unsigned attrib,
1722   unsigned chan )
1723{
1724   unsigned i;
1725
1726   for( i = 0; i < QUAD_SIZE; i++ ) {
1727      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1728   }
1729}
1730
1731/**
1732 * Evaluate a linear-valued coefficient at the position of the
1733 * current quad.
1734 */
1735static void
1736eval_linear_coef(
1737   struct tgsi_exec_machine *mach,
1738   unsigned attrib,
1739   unsigned chan )
1740{
1741   const float x = mach->QuadPos.xyzw[0].f[0];
1742   const float y = mach->QuadPos.xyzw[1].f[0];
1743   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1744   const float dady = mach->InterpCoefs[attrib].dady[chan];
1745   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1746   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1747   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1748   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1749   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1750}
1751
1752/**
1753 * Evaluate a perspective-valued coefficient at the position of the
1754 * current quad.
1755 */
1756static void
1757eval_perspective_coef(
1758   struct tgsi_exec_machine *mach,
1759   unsigned attrib,
1760   unsigned chan )
1761{
1762   const float x = mach->QuadPos.xyzw[0].f[0];
1763   const float y = mach->QuadPos.xyzw[1].f[0];
1764   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1765   const float dady = mach->InterpCoefs[attrib].dady[chan];
1766   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1767   const float *w = mach->QuadPos.xyzw[3].f;
1768   /* divide by W here */
1769   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1770   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1771   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1772   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1773}
1774
1775
1776typedef void (* eval_coef_func)(
1777   struct tgsi_exec_machine *mach,
1778   unsigned attrib,
1779   unsigned chan );
1780
1781static void
1782exec_declaration(
1783   struct tgsi_exec_machine *mach,
1784   const struct tgsi_full_declaration *decl )
1785{
1786   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1787      if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1788         unsigned first, last, mask;
1789         eval_coef_func eval;
1790
1791         first = decl->DeclarationRange.First;
1792         last = decl->DeclarationRange.Last;
1793         mask = decl->Declaration.UsageMask;
1794
1795         switch( decl->Declaration.Interpolate ) {
1796         case TGSI_INTERPOLATE_CONSTANT:
1797            eval = eval_constant_coef;
1798            break;
1799
1800         case TGSI_INTERPOLATE_LINEAR:
1801            eval = eval_linear_coef;
1802            break;
1803
1804         case TGSI_INTERPOLATE_PERSPECTIVE:
1805            eval = eval_perspective_coef;
1806            break;
1807
1808         default:
1809            eval = NULL;
1810            assert( 0 );
1811         }
1812
1813         if( mask == TGSI_WRITEMASK_XYZW ) {
1814            unsigned i, j;
1815
1816            for( i = first; i <= last; i++ ) {
1817               for( j = 0; j < NUM_CHANNELS; j++ ) {
1818                  eval( mach, i, j );
1819               }
1820            }
1821         }
1822         else {
1823            unsigned i, j;
1824
1825            for( j = 0; j < NUM_CHANNELS; j++ ) {
1826               if( mask & (1 << j) ) {
1827                  for( i = first; i <= last; i++ ) {
1828                     eval( mach, i, j );
1829                  }
1830               }
1831            }
1832         }
1833      }
1834   }
1835}
1836
1837static void
1838exec_instruction(
1839   struct tgsi_exec_machine *mach,
1840   const struct tgsi_full_instruction *inst,
1841   int *pc )
1842{
1843   uint chan_index;
1844   union tgsi_exec_channel r[10];
1845
1846   (*pc)++;
1847
1848   switch (inst->Instruction.Opcode) {
1849   case TGSI_OPCODE_ARL:
1850   /* TGSI_OPCODE_FLOOR */
1851   /* TGSI_OPCODE_FLR */
1852      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1853         FETCH( &r[0], 0, chan_index );
1854         micro_flr( &r[0], &r[0] );
1855         STORE( &r[0], 0, chan_index );
1856      }
1857      break;
1858
1859   case TGSI_OPCODE_MOV:
1860   case TGSI_OPCODE_SWZ:
1861      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1862         FETCH( &r[0], 0, chan_index );
1863         STORE( &r[0], 0, chan_index );
1864      }
1865      break;
1866
1867   case TGSI_OPCODE_LIT:
1868      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1869         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1870      }
1871
1872      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1873         FETCH( &r[0], 0, CHAN_X );
1874         if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1875            micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1876            STORE( &r[0], 0, CHAN_Y );
1877         }
1878
1879         if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1880            FETCH( &r[1], 0, CHAN_Y );
1881            micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1882
1883            FETCH( &r[2], 0, CHAN_W );
1884            micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1885            micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1886            micro_pow( &r[1], &r[1], &r[2] );
1887            micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1888            STORE( &r[0], 0, CHAN_Z );
1889         }
1890      }
1891
1892      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1893         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1894      }
1895      break;
1896
1897   case TGSI_OPCODE_RCP:
1898   /* TGSI_OPCODE_RECIP */
1899      FETCH( &r[0], 0, CHAN_X );
1900      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1901      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1902         STORE( &r[0], 0, chan_index );
1903      }
1904      break;
1905
1906   case TGSI_OPCODE_RSQ:
1907   /* TGSI_OPCODE_RECIPSQRT */
1908      FETCH( &r[0], 0, CHAN_X );
1909      micro_abs( &r[0], &r[0] );
1910      micro_sqrt( &r[0], &r[0] );
1911      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1912      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1913         STORE( &r[0], 0, chan_index );
1914      }
1915      break;
1916
1917   case TGSI_OPCODE_EXP:
1918      FETCH( &r[0], 0, CHAN_X );
1919      micro_flr( &r[1], &r[0] );  /* r1 = floor(r0) */
1920      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1921         micro_exp2( &r[2], &r[1] );       /* r2 = 2 ^ r1 */
1922         STORE( &r[2], 0, CHAN_X );        /* store r2 */
1923      }
1924      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1925         micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1926         STORE( &r[2], 0, CHAN_Y );        /* store r2 */
1927      }
1928      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1929         micro_exp2( &r[2], &r[0] );       /* r2 = 2 ^ r0 */
1930         STORE( &r[2], 0, CHAN_Z );        /* store r2 */
1931      }
1932      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1933         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1934      }
1935      break;
1936
1937   case TGSI_OPCODE_LOG:
1938      FETCH( &r[0], 0, CHAN_X );
1939      micro_abs( &r[2], &r[0] );  /* r2 = abs(r0) */
1940      micro_lg2( &r[1], &r[2] );  /* r1 = lg2(r2) */
1941      micro_flr( &r[0], &r[1] );  /* r0 = floor(r1) */
1942      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1943         STORE( &r[0], 0, CHAN_X );
1944      }
1945      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1946         micro_exp2( &r[0], &r[0] );       /* r0 = 2 ^ r0 */
1947         micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1948         STORE( &r[0], 0, CHAN_Y );
1949      }
1950      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1951         STORE( &r[1], 0, CHAN_Z );
1952      }
1953      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1954         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1955      }
1956      break;
1957
1958   case TGSI_OPCODE_MUL:
1959      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1960      {
1961         FETCH(&r[0], 0, chan_index);
1962         FETCH(&r[1], 1, chan_index);
1963
1964         micro_mul( &r[0], &r[0], &r[1] );
1965
1966         STORE(&r[0], 0, chan_index);
1967      }
1968      break;
1969
1970   case TGSI_OPCODE_ADD:
1971      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1972         FETCH( &r[0], 0, chan_index );
1973         FETCH( &r[1], 1, chan_index );
1974         micro_add( &r[0], &r[0], &r[1] );
1975         STORE( &r[0], 0, chan_index );
1976      }
1977      break;
1978
1979   case TGSI_OPCODE_DP3:
1980   /* TGSI_OPCODE_DOT3 */
1981      FETCH( &r[0], 0, CHAN_X );
1982      FETCH( &r[1], 1, CHAN_X );
1983      micro_mul( &r[0], &r[0], &r[1] );
1984
1985      FETCH( &r[1], 0, CHAN_Y );
1986      FETCH( &r[2], 1, CHAN_Y );
1987      micro_mul( &r[1], &r[1], &r[2] );
1988      micro_add( &r[0], &r[0], &r[1] );
1989
1990      FETCH( &r[1], 0, CHAN_Z );
1991      FETCH( &r[2], 1, CHAN_Z );
1992      micro_mul( &r[1], &r[1], &r[2] );
1993      micro_add( &r[0], &r[0], &r[1] );
1994
1995      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1996         STORE( &r[0], 0, chan_index );
1997      }
1998      break;
1999
2000    case TGSI_OPCODE_DP4:
2001    /* TGSI_OPCODE_DOT4 */
2002       FETCH(&r[0], 0, CHAN_X);
2003       FETCH(&r[1], 1, CHAN_X);
2004
2005       micro_mul( &r[0], &r[0], &r[1] );
2006
2007       FETCH(&r[1], 0, CHAN_Y);
2008       FETCH(&r[2], 1, CHAN_Y);
2009
2010       micro_mul( &r[1], &r[1], &r[2] );
2011       micro_add( &r[0], &r[0], &r[1] );
2012
2013       FETCH(&r[1], 0, CHAN_Z);
2014       FETCH(&r[2], 1, CHAN_Z);
2015
2016       micro_mul( &r[1], &r[1], &r[2] );
2017       micro_add( &r[0], &r[0], &r[1] );
2018
2019       FETCH(&r[1], 0, CHAN_W);
2020       FETCH(&r[2], 1, CHAN_W);
2021
2022       micro_mul( &r[1], &r[1], &r[2] );
2023       micro_add( &r[0], &r[0], &r[1] );
2024
2025      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2026         STORE( &r[0], 0, chan_index );
2027      }
2028      break;
2029
2030   case TGSI_OPCODE_DST:
2031      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2032         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2033      }
2034
2035      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2036         FETCH( &r[0], 0, CHAN_Y );
2037         FETCH( &r[1], 1, CHAN_Y);
2038         micro_mul( &r[0], &r[0], &r[1] );
2039         STORE( &r[0], 0, CHAN_Y );
2040      }
2041
2042      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2043         FETCH( &r[0], 0, CHAN_Z );
2044         STORE( &r[0], 0, CHAN_Z );
2045      }
2046
2047      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2048         FETCH( &r[0], 1, CHAN_W );
2049         STORE( &r[0], 0, CHAN_W );
2050      }
2051      break;
2052
2053   case TGSI_OPCODE_MIN:
2054      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2055         FETCH(&r[0], 0, chan_index);
2056         FETCH(&r[1], 1, chan_index);
2057
2058         /* XXX use micro_min()?? */
2059         micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
2060
2061         STORE(&r[0], 0, chan_index);
2062      }
2063      break;
2064
2065   case TGSI_OPCODE_MAX:
2066      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2067         FETCH(&r[0], 0, chan_index);
2068         FETCH(&r[1], 1, chan_index);
2069
2070         /* XXX use micro_max()?? */
2071         micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2072
2073         STORE(&r[0], 0, chan_index );
2074      }
2075      break;
2076
2077   case TGSI_OPCODE_SLT:
2078   /* TGSI_OPCODE_SETLT */
2079      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2080         FETCH( &r[0], 0, chan_index );
2081         FETCH( &r[1], 1, chan_index );
2082         micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2083         STORE( &r[0], 0, chan_index );
2084      }
2085      break;
2086
2087   case TGSI_OPCODE_SGE:
2088   /* TGSI_OPCODE_SETGE */
2089      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2090         FETCH( &r[0], 0, chan_index );
2091         FETCH( &r[1], 1, chan_index );
2092         micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2093         STORE( &r[0], 0, chan_index );
2094      }
2095      break;
2096
2097   case TGSI_OPCODE_MAD:
2098   /* TGSI_OPCODE_MADD */
2099      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2100         FETCH( &r[0], 0, chan_index );
2101         FETCH( &r[1], 1, chan_index );
2102         micro_mul( &r[0], &r[0], &r[1] );
2103         FETCH( &r[1], 2, chan_index );
2104         micro_add( &r[0], &r[0], &r[1] );
2105         STORE( &r[0], 0, chan_index );
2106      }
2107      break;
2108
2109   case TGSI_OPCODE_SUB:
2110      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2111         FETCH(&r[0], 0, chan_index);
2112         FETCH(&r[1], 1, chan_index);
2113
2114         micro_sub( &r[0], &r[0], &r[1] );
2115
2116         STORE(&r[0], 0, chan_index);
2117      }
2118      break;
2119
2120   case TGSI_OPCODE_LERP:
2121   /* TGSI_OPCODE_LRP */
2122      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2123         FETCH(&r[0], 0, chan_index);
2124         FETCH(&r[1], 1, chan_index);
2125         FETCH(&r[2], 2, chan_index);
2126
2127         micro_sub( &r[1], &r[1], &r[2] );
2128         micro_mul( &r[0], &r[0], &r[1] );
2129         micro_add( &r[0], &r[0], &r[2] );
2130
2131         STORE(&r[0], 0, chan_index);
2132      }
2133      break;
2134
2135   case TGSI_OPCODE_CND:
2136      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2137         FETCH(&r[0], 0, chan_index);
2138         FETCH(&r[1], 1, chan_index);
2139         FETCH(&r[2], 2, chan_index);
2140         micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2141         STORE(&r[0], 0, chan_index);
2142      }
2143      break;
2144
2145   case TGSI_OPCODE_CND0:
2146      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2147         FETCH(&r[0], 0, chan_index);
2148         FETCH(&r[1], 1, chan_index);
2149         FETCH(&r[2], 2, chan_index);
2150         micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
2151         STORE(&r[0], 0, chan_index);
2152      }
2153      break;
2154
2155   case TGSI_OPCODE_DOT2ADD:
2156   /* TGSI_OPCODE_DP2A */
2157      FETCH( &r[0], 0, CHAN_X );
2158      FETCH( &r[1], 1, CHAN_X );
2159      micro_mul( &r[0], &r[0], &r[1] );
2160
2161      FETCH( &r[1], 0, CHAN_Y );
2162      FETCH( &r[2], 1, CHAN_Y );
2163      micro_mul( &r[1], &r[1], &r[2] );
2164      micro_add( &r[0], &r[0], &r[1] );
2165
2166      FETCH( &r[2], 2, CHAN_X );
2167      micro_add( &r[0], &r[0], &r[2] );
2168
2169      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2170         STORE( &r[0], 0, chan_index );
2171      }
2172      break;
2173
2174   case TGSI_OPCODE_INDEX:
2175      /* XXX: considered for removal */
2176      assert (0);
2177      break;
2178
2179   case TGSI_OPCODE_NEGATE:
2180      /* XXX: considered for removal */
2181      assert (0);
2182      break;
2183
2184   case TGSI_OPCODE_FRAC:
2185   /* TGSI_OPCODE_FRC */
2186      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2187         FETCH( &r[0], 0, chan_index );
2188         micro_frc( &r[0], &r[0] );
2189         STORE( &r[0], 0, chan_index );
2190      }
2191      break;
2192
2193   case TGSI_OPCODE_CLAMP:
2194      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2195         FETCH(&r[0], 0, chan_index);
2196         FETCH(&r[1], 1, chan_index);
2197         micro_max(&r[0], &r[0], &r[1]);
2198         FETCH(&r[1], 2, chan_index);
2199         micro_min(&r[0], &r[0], &r[1]);
2200         STORE(&r[0], 0, chan_index);
2201      }
2202      break;
2203
2204   case TGSI_OPCODE_ROUND:
2205   case TGSI_OPCODE_ARR:
2206      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2207         FETCH( &r[0], 0, chan_index );
2208         micro_rnd( &r[0], &r[0] );
2209         STORE( &r[0], 0, chan_index );
2210      }
2211      break;
2212
2213   case TGSI_OPCODE_EXPBASE2:
2214   /* TGSI_OPCODE_EX2 */
2215      FETCH(&r[0], 0, CHAN_X);
2216
2217#if FAST_MATH
2218      micro_exp2( &r[0], &r[0] );
2219#else
2220      micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2221#endif
2222
2223      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2224         STORE( &r[0], 0, chan_index );
2225      }
2226      break;
2227
2228   case TGSI_OPCODE_LOGBASE2:
2229   /* TGSI_OPCODE_LG2 */
2230      FETCH( &r[0], 0, CHAN_X );
2231      micro_lg2( &r[0], &r[0] );
2232      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2233         STORE( &r[0], 0, chan_index );
2234      }
2235      break;
2236
2237   case TGSI_OPCODE_POWER:
2238   /* TGSI_OPCODE_POW */
2239      FETCH(&r[0], 0, CHAN_X);
2240      FETCH(&r[1], 1, CHAN_X);
2241
2242      micro_pow( &r[0], &r[0], &r[1] );
2243
2244      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2245         STORE( &r[0], 0, chan_index );
2246      }
2247      break;
2248
2249   case TGSI_OPCODE_CROSSPRODUCT:
2250   /* TGSI_OPCODE_XPD */
2251      FETCH(&r[0], 0, CHAN_Y);
2252      FETCH(&r[1], 1, CHAN_Z);
2253
2254      micro_mul( &r[2], &r[0], &r[1] );
2255
2256      FETCH(&r[3], 0, CHAN_Z);
2257      FETCH(&r[4], 1, CHAN_Y);
2258
2259      micro_mul( &r[5], &r[3], &r[4] );
2260      micro_sub( &r[2], &r[2], &r[5] );
2261
2262      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2263         STORE( &r[2], 0, CHAN_X );
2264      }
2265
2266      FETCH(&r[2], 1, CHAN_X);
2267
2268      micro_mul( &r[3], &r[3], &r[2] );
2269
2270      FETCH(&r[5], 0, CHAN_X);
2271
2272      micro_mul( &r[1], &r[1], &r[5] );
2273      micro_sub( &r[3], &r[3], &r[1] );
2274
2275      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2276         STORE( &r[3], 0, CHAN_Y );
2277      }
2278
2279      micro_mul( &r[5], &r[5], &r[4] );
2280      micro_mul( &r[0], &r[0], &r[2] );
2281      micro_sub( &r[5], &r[5], &r[0] );
2282
2283      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2284         STORE( &r[5], 0, CHAN_Z );
2285      }
2286
2287      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2288         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2289      }
2290      break;
2291
2292    case TGSI_OPCODE_MULTIPLYMATRIX:
2293       /* XXX: considered for removal */
2294       assert (0);
2295       break;
2296
2297    case TGSI_OPCODE_ABS:
2298       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2299          FETCH(&r[0], 0, chan_index);
2300
2301          micro_abs( &r[0], &r[0] );
2302
2303          STORE(&r[0], 0, chan_index);
2304       }
2305       break;
2306
2307   case TGSI_OPCODE_RCC:
2308      FETCH(&r[0], 0, CHAN_X);
2309      micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2310      micro_float_clamp(&r[0], &r[0]);
2311      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2312         STORE(&r[0], 0, chan_index);
2313      }
2314      break;
2315
2316   case TGSI_OPCODE_DPH:
2317      FETCH(&r[0], 0, CHAN_X);
2318      FETCH(&r[1], 1, CHAN_X);
2319
2320      micro_mul( &r[0], &r[0], &r[1] );
2321
2322      FETCH(&r[1], 0, CHAN_Y);
2323      FETCH(&r[2], 1, CHAN_Y);
2324
2325      micro_mul( &r[1], &r[1], &r[2] );
2326      micro_add( &r[0], &r[0], &r[1] );
2327
2328      FETCH(&r[1], 0, CHAN_Z);
2329      FETCH(&r[2], 1, CHAN_Z);
2330
2331      micro_mul( &r[1], &r[1], &r[2] );
2332      micro_add( &r[0], &r[0], &r[1] );
2333
2334      FETCH(&r[1], 1, CHAN_W);
2335
2336      micro_add( &r[0], &r[0], &r[1] );
2337
2338      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2339         STORE( &r[0], 0, chan_index );
2340      }
2341      break;
2342
2343   case TGSI_OPCODE_COS:
2344      FETCH(&r[0], 0, CHAN_X);
2345
2346      micro_cos( &r[0], &r[0] );
2347
2348      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2349         STORE( &r[0], 0, chan_index );
2350      }
2351      break;
2352
2353   case TGSI_OPCODE_DDX:
2354      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2355         FETCH( &r[0], 0, chan_index );
2356         micro_ddx( &r[0], &r[0] );
2357         STORE( &r[0], 0, chan_index );
2358      }
2359      break;
2360
2361   case TGSI_OPCODE_DDY:
2362      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2363         FETCH( &r[0], 0, chan_index );
2364         micro_ddy( &r[0], &r[0] );
2365         STORE( &r[0], 0, chan_index );
2366      }
2367      break;
2368
2369   case TGSI_OPCODE_KILP:
2370      exec_kilp (mach, inst);
2371      break;
2372
2373   case TGSI_OPCODE_KIL:
2374      exec_kil (mach, inst);
2375      break;
2376
2377   case TGSI_OPCODE_PK2H:
2378      assert (0);
2379      break;
2380
2381   case TGSI_OPCODE_PK2US:
2382      assert (0);
2383      break;
2384
2385   case TGSI_OPCODE_PK4B:
2386      assert (0);
2387      break;
2388
2389   case TGSI_OPCODE_PK4UB:
2390      assert (0);
2391      break;
2392
2393   case TGSI_OPCODE_RFL:
2394      if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2395          IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2396          IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2397         /* r0 = dp3(src0, src0) */
2398         FETCH(&r[2], 0, CHAN_X);
2399         micro_mul(&r[0], &r[2], &r[2]);
2400         FETCH(&r[4], 0, CHAN_Y);
2401         micro_mul(&r[8], &r[4], &r[4]);
2402         micro_add(&r[0], &r[0], &r[8]);
2403         FETCH(&r[6], 0, CHAN_Z);
2404         micro_mul(&r[8], &r[6], &r[6]);
2405         micro_add(&r[0], &r[0], &r[8]);
2406
2407         /* r1 = dp3(src0, src1) */
2408         FETCH(&r[3], 1, CHAN_X);
2409         micro_mul(&r[1], &r[2], &r[3]);
2410         FETCH(&r[5], 1, CHAN_Y);
2411         micro_mul(&r[8], &r[4], &r[5]);
2412         micro_add(&r[1], &r[1], &r[8]);
2413         FETCH(&r[7], 1, CHAN_Z);
2414         micro_mul(&r[8], &r[6], &r[7]);
2415         micro_add(&r[1], &r[1], &r[8]);
2416
2417         /* r1 = 2 * r1 / r0 */
2418         micro_add(&r[1], &r[1], &r[1]);
2419         micro_div(&r[1], &r[1], &r[0]);
2420
2421         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2422            micro_mul(&r[2], &r[2], &r[1]);
2423            micro_sub(&r[2], &r[2], &r[3]);
2424            STORE(&r[2], 0, CHAN_X);
2425         }
2426         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2427            micro_mul(&r[4], &r[4], &r[1]);
2428            micro_sub(&r[4], &r[4], &r[5]);
2429            STORE(&r[4], 0, CHAN_Y);
2430         }
2431         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2432            micro_mul(&r[6], &r[6], &r[1]);
2433            micro_sub(&r[6], &r[6], &r[7]);
2434            STORE(&r[6], 0, CHAN_Z);
2435         }
2436      }
2437      if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2438         STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2439      }
2440      break;
2441
2442   case TGSI_OPCODE_SEQ:
2443      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2444         FETCH( &r[0], 0, chan_index );
2445         FETCH( &r[1], 1, chan_index );
2446         micro_eq( &r[0], &r[0], &r[1],
2447                   &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2448                   &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2449         STORE( &r[0], 0, chan_index );
2450      }
2451      break;
2452
2453   case TGSI_OPCODE_SFL:
2454      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2455         STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2456      }
2457      break;
2458
2459   case TGSI_OPCODE_SGT:
2460      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2461         FETCH( &r[0], 0, chan_index );
2462         FETCH( &r[1], 1, chan_index );
2463         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2464         STORE( &r[0], 0, chan_index );
2465      }
2466      break;
2467
2468   case TGSI_OPCODE_SIN:
2469      FETCH( &r[0], 0, CHAN_X );
2470      micro_sin( &r[0], &r[0] );
2471      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2472         STORE( &r[0], 0, chan_index );
2473      }
2474      break;
2475
2476   case TGSI_OPCODE_SLE:
2477      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2478         FETCH( &r[0], 0, chan_index );
2479         FETCH( &r[1], 1, chan_index );
2480         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2481         STORE( &r[0], 0, chan_index );
2482      }
2483      break;
2484
2485   case TGSI_OPCODE_SNE:
2486      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2487         FETCH( &r[0], 0, chan_index );
2488         FETCH( &r[1], 1, chan_index );
2489         micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2490         STORE( &r[0], 0, chan_index );
2491      }
2492      break;
2493
2494   case TGSI_OPCODE_STR:
2495      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2496         STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2497      }
2498      break;
2499
2500   case TGSI_OPCODE_TEX:
2501      /* simple texture lookup */
2502      /* src[0] = texcoord */
2503      /* src[1] = sampler unit */
2504      exec_tex(mach, inst, FALSE, FALSE);
2505      break;
2506
2507   case TGSI_OPCODE_TXB:
2508      /* Texture lookup with lod bias */
2509      /* src[0] = texcoord (src[0].w = LOD bias) */
2510      /* src[1] = sampler unit */
2511      exec_tex(mach, inst, TRUE, FALSE);
2512      break;
2513
2514   case TGSI_OPCODE_TXD:
2515      /* Texture lookup with explict partial derivatives */
2516      /* src[0] = texcoord */
2517      /* src[1] = d[strq]/dx */
2518      /* src[2] = d[strq]/dy */
2519      /* src[3] = sampler unit */
2520      assert (0);
2521      break;
2522
2523   case TGSI_OPCODE_TXL:
2524      /* Texture lookup with explit LOD */
2525      /* src[0] = texcoord (src[0].w = LOD) */
2526      /* src[1] = sampler unit */
2527      exec_tex(mach, inst, TRUE, FALSE);
2528      break;
2529
2530   case TGSI_OPCODE_TXP:
2531      /* Texture lookup with projection */
2532      /* src[0] = texcoord (src[0].w = projection) */
2533      /* src[1] = sampler unit */
2534      exec_tex(mach, inst, FALSE, TRUE);
2535      break;
2536
2537   case TGSI_OPCODE_UP2H:
2538      assert (0);
2539      break;
2540
2541   case TGSI_OPCODE_UP2US:
2542      assert (0);
2543      break;
2544
2545   case TGSI_OPCODE_UP4B:
2546      assert (0);
2547      break;
2548
2549   case TGSI_OPCODE_UP4UB:
2550      assert (0);
2551      break;
2552
2553   case TGSI_OPCODE_X2D:
2554      FETCH(&r[0], 1, CHAN_X);
2555      FETCH(&r[1], 1, CHAN_Y);
2556      if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2557          IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2558         FETCH(&r[2], 2, CHAN_X);
2559         micro_mul(&r[2], &r[2], &r[0]);
2560         FETCH(&r[3], 2, CHAN_Y);
2561         micro_mul(&r[3], &r[3], &r[1]);
2562         micro_add(&r[2], &r[2], &r[3]);
2563         FETCH(&r[3], 0, CHAN_X);
2564         micro_add(&r[2], &r[2], &r[3]);
2565         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2566            STORE(&r[2], 0, CHAN_X);
2567         }
2568         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2569            STORE(&r[2], 0, CHAN_Z);
2570         }
2571      }
2572      if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2573          IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2574         FETCH(&r[2], 2, CHAN_Z);
2575         micro_mul(&r[2], &r[2], &r[0]);
2576         FETCH(&r[3], 2, CHAN_W);
2577         micro_mul(&r[3], &r[3], &r[1]);
2578         micro_add(&r[2], &r[2], &r[3]);
2579         FETCH(&r[3], 0, CHAN_Y);
2580         micro_add(&r[2], &r[2], &r[3]);
2581         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2582            STORE(&r[2], 0, CHAN_Y);
2583         }
2584         if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2585            STORE(&r[2], 0, CHAN_W);
2586         }
2587      }
2588      break;
2589
2590   case TGSI_OPCODE_ARA:
2591      assert (0);
2592      break;
2593
2594   case TGSI_OPCODE_BRA:
2595      assert (0);
2596      break;
2597
2598   case TGSI_OPCODE_CAL:
2599      /* skip the call if no execution channels are enabled */
2600      if (mach->ExecMask) {
2601         /* do the call */
2602
2603         /* push the Cond, Loop, Cont stacks */
2604         assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2605         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2606         assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2607         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2608         assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2609         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2610
2611         assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2612         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2613
2614         /* note that PC was already incremented above */
2615         mach->CallStack[mach->CallStackTop++] = *pc;
2616         *pc = inst->InstructionExtLabel.Label;
2617      }
2618      break;
2619
2620   case TGSI_OPCODE_RET:
2621      mach->FuncMask &= ~mach->ExecMask;
2622      UPDATE_EXEC_MASK(mach);
2623
2624      if (mach->FuncMask == 0x0) {
2625         /* really return now (otherwise, keep executing */
2626
2627         if (mach->CallStackTop == 0) {
2628            /* returning from main() */
2629            *pc = -1;
2630            return;
2631         }
2632         *pc = mach->CallStack[--mach->CallStackTop];
2633
2634         /* pop the Cond, Loop, Cont stacks */
2635         assert(mach->CondStackTop > 0);
2636         mach->CondMask = mach->CondStack[--mach->CondStackTop];
2637         assert(mach->LoopStackTop > 0);
2638         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2639         assert(mach->ContStackTop > 0);
2640         mach->ContMask = mach->ContStack[--mach->ContStackTop];
2641         assert(mach->FuncStackTop > 0);
2642         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2643
2644         UPDATE_EXEC_MASK(mach);
2645      }
2646      break;
2647
2648   case TGSI_OPCODE_SSG:
2649   /* TGSI_OPCODE_SGN */
2650      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2651         FETCH( &r[0], 0, chan_index );
2652         micro_sgn( &r[0], &r[0] );
2653         STORE( &r[0], 0, chan_index );
2654      }
2655      break;
2656
2657   case TGSI_OPCODE_CMP:
2658      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2659         FETCH(&r[0], 0, chan_index);
2660         FETCH(&r[1], 1, chan_index);
2661         FETCH(&r[2], 2, chan_index);
2662
2663         micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2664
2665         STORE(&r[0], 0, chan_index);
2666      }
2667      break;
2668
2669   case TGSI_OPCODE_SCS:
2670      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2671         FETCH( &r[0], 0, CHAN_X );
2672      }
2673      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2674         micro_cos( &r[1], &r[0] );
2675         STORE( &r[1], 0, CHAN_X );
2676      }
2677      if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2678         micro_sin( &r[1], &r[0] );
2679         STORE( &r[1], 0, CHAN_Y );
2680      }
2681      if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2682         STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2683      }
2684      if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2685         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2686      }
2687      break;
2688
2689   case TGSI_OPCODE_NRM:
2690      /* 3-component vector normalize */
2691      if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2692         IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2693         IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2694         /* r3 = sqrt(dp3(src0, src0)) */
2695         FETCH(&r[0], 0, CHAN_X);
2696         micro_mul(&r[3], &r[0], &r[0]);
2697         FETCH(&r[1], 0, CHAN_Y);
2698         micro_mul(&r[4], &r[1], &r[1]);
2699         micro_add(&r[3], &r[3], &r[4]);
2700         FETCH(&r[2], 0, CHAN_Z);
2701         micro_mul(&r[4], &r[2], &r[2]);
2702         micro_add(&r[3], &r[3], &r[4]);
2703         micro_sqrt(&r[3], &r[3]);
2704
2705         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2706            micro_div(&r[0], &r[0], &r[3]);
2707            STORE(&r[0], 0, CHAN_X);
2708         }
2709         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2710            micro_div(&r[1], &r[1], &r[3]);
2711            STORE(&r[1], 0, CHAN_Y);
2712         }
2713         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2714            micro_div(&r[2], &r[2], &r[3]);
2715            STORE(&r[2], 0, CHAN_Z);
2716         }
2717      }
2718      if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2719         STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2720      }
2721      break;
2722
2723   case TGSI_OPCODE_NRM4:
2724      /* 4-component vector normalize */
2725      {
2726         union tgsi_exec_channel tmp, dot;
2727
2728         /* tmp = dp4(src0, src0): */
2729         FETCH( &r[0], 0, CHAN_X );
2730         micro_mul( &tmp, &r[0], &r[0] );
2731
2732         FETCH( &r[1], 0, CHAN_Y );
2733         micro_mul( &dot, &r[1], &r[1] );
2734         micro_add( &tmp, &tmp, &dot );
2735
2736         FETCH( &r[2], 0, CHAN_Z );
2737         micro_mul( &dot, &r[2], &r[2] );
2738         micro_add( &tmp, &tmp, &dot );
2739
2740         FETCH( &r[3], 0, CHAN_W );
2741         micro_mul( &dot, &r[3], &r[3] );
2742         micro_add( &tmp, &tmp, &dot );
2743
2744         /* tmp = 1 / sqrt(tmp) */
2745         micro_sqrt( &tmp, &tmp );
2746         micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2747
2748         FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2749            /* chan = chan * tmp */
2750            micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2751            STORE( &r[chan_index], 0, chan_index );
2752         }
2753      }
2754      break;
2755
2756   case TGSI_OPCODE_DIV:
2757      assert( 0 );
2758      break;
2759
2760   case TGSI_OPCODE_DP2:
2761      FETCH( &r[0], 0, CHAN_X );
2762      FETCH( &r[1], 1, CHAN_X );
2763      micro_mul( &r[0], &r[0], &r[1] );
2764
2765      FETCH( &r[1], 0, CHAN_Y );
2766      FETCH( &r[2], 1, CHAN_Y );
2767      micro_mul( &r[1], &r[1], &r[2] );
2768      micro_add( &r[0], &r[0], &r[1] );
2769
2770      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2771         STORE( &r[0], 0, chan_index );
2772      }
2773      break;
2774
2775   case TGSI_OPCODE_IF:
2776      /* push CondMask */
2777      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2778      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2779      FETCH( &r[0], 0, CHAN_X );
2780      /* update CondMask */
2781      if( ! r[0].u[0] ) {
2782         mach->CondMask &= ~0x1;
2783      }
2784      if( ! r[0].u[1] ) {
2785         mach->CondMask &= ~0x2;
2786      }
2787      if( ! r[0].u[2] ) {
2788         mach->CondMask &= ~0x4;
2789      }
2790      if( ! r[0].u[3] ) {
2791         mach->CondMask &= ~0x8;
2792      }
2793      UPDATE_EXEC_MASK(mach);
2794      /* Todo: If CondMask==0, jump to ELSE */
2795      break;
2796
2797   case TGSI_OPCODE_ELSE:
2798      /* invert CondMask wrt previous mask */
2799      {
2800         uint prevMask;
2801         assert(mach->CondStackTop > 0);
2802         prevMask = mach->CondStack[mach->CondStackTop - 1];
2803         mach->CondMask = ~mach->CondMask & prevMask;
2804         UPDATE_EXEC_MASK(mach);
2805         /* Todo: If CondMask==0, jump to ENDIF */
2806      }
2807      break;
2808
2809   case TGSI_OPCODE_ENDIF:
2810      /* pop CondMask */
2811      assert(mach->CondStackTop > 0);
2812      mach->CondMask = mach->CondStack[--mach->CondStackTop];
2813      UPDATE_EXEC_MASK(mach);
2814      break;
2815
2816   case TGSI_OPCODE_END:
2817      /* halt execution */
2818      *pc = -1;
2819      break;
2820
2821   case TGSI_OPCODE_REP:
2822      assert (0);
2823      break;
2824
2825   case TGSI_OPCODE_ENDREP:
2826       assert (0);
2827       break;
2828
2829   case TGSI_OPCODE_PUSHA:
2830      assert (0);
2831      break;
2832
2833   case TGSI_OPCODE_POPA:
2834      assert (0);
2835      break;
2836
2837   case TGSI_OPCODE_CEIL:
2838      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2839         FETCH( &r[0], 0, chan_index );
2840         micro_ceil( &r[0], &r[0] );
2841         STORE( &r[0], 0, chan_index );
2842      }
2843      break;
2844
2845   case TGSI_OPCODE_I2F:
2846      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2847         FETCH( &r[0], 0, chan_index );
2848         micro_i2f( &r[0], &r[0] );
2849         STORE( &r[0], 0, chan_index );
2850      }
2851      break;
2852
2853   case TGSI_OPCODE_NOT:
2854      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2855         FETCH( &r[0], 0, chan_index );
2856         micro_not( &r[0], &r[0] );
2857         STORE( &r[0], 0, chan_index );
2858      }
2859      break;
2860
2861   case TGSI_OPCODE_TRUNC:
2862      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2863         FETCH( &r[0], 0, chan_index );
2864         micro_trunc( &r[0], &r[0] );
2865         STORE( &r[0], 0, chan_index );
2866      }
2867      break;
2868
2869   case TGSI_OPCODE_SHL:
2870      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2871         FETCH( &r[0], 0, chan_index );
2872         FETCH( &r[1], 1, chan_index );
2873         micro_shl( &r[0], &r[0], &r[1] );
2874         STORE( &r[0], 0, chan_index );
2875      }
2876      break;
2877
2878   case TGSI_OPCODE_SHR:
2879      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2880         FETCH( &r[0], 0, chan_index );
2881         FETCH( &r[1], 1, chan_index );
2882         micro_ishr( &r[0], &r[0], &r[1] );
2883         STORE( &r[0], 0, chan_index );
2884      }
2885      break;
2886
2887   case TGSI_OPCODE_AND:
2888      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2889         FETCH( &r[0], 0, chan_index );
2890         FETCH( &r[1], 1, chan_index );
2891         micro_and( &r[0], &r[0], &r[1] );
2892         STORE( &r[0], 0, chan_index );
2893      }
2894      break;
2895
2896   case TGSI_OPCODE_OR:
2897      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2898         FETCH( &r[0], 0, chan_index );
2899         FETCH( &r[1], 1, chan_index );
2900         micro_or( &r[0], &r[0], &r[1] );
2901         STORE( &r[0], 0, chan_index );
2902      }
2903      break;
2904
2905   case TGSI_OPCODE_MOD:
2906      assert (0);
2907      break;
2908
2909   case TGSI_OPCODE_XOR:
2910      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2911         FETCH( &r[0], 0, chan_index );
2912         FETCH( &r[1], 1, chan_index );
2913         micro_xor( &r[0], &r[0], &r[1] );
2914         STORE( &r[0], 0, chan_index );
2915      }
2916      break;
2917
2918   case TGSI_OPCODE_SAD:
2919      assert (0);
2920      break;
2921
2922   case TGSI_OPCODE_TXF:
2923      assert (0);
2924      break;
2925
2926   case TGSI_OPCODE_TXQ:
2927      assert (0);
2928      break;
2929
2930   case TGSI_OPCODE_EMIT:
2931      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2932      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2933      break;
2934
2935   case TGSI_OPCODE_ENDPRIM:
2936      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2937      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2938      break;
2939
2940   case TGSI_OPCODE_LOOP:
2941      /* fall-through (for now) */
2942   case TGSI_OPCODE_BGNLOOP2:
2943      /* push LoopMask and ContMasks */
2944      assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2945      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2946      assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2947      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2948      break;
2949
2950   case TGSI_OPCODE_ENDLOOP:
2951      /* fall-through (for now at least) */
2952   case TGSI_OPCODE_ENDLOOP2:
2953      /* Restore ContMask, but don't pop */
2954      assert(mach->ContStackTop > 0);
2955      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2956      UPDATE_EXEC_MASK(mach);
2957      if (mach->ExecMask) {
2958         /* repeat loop: jump to instruction just past BGNLOOP */
2959         *pc = inst->InstructionExtLabel.Label + 1;
2960      }
2961      else {
2962         /* exit loop: pop LoopMask */
2963         assert(mach->LoopStackTop > 0);
2964         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2965         /* pop ContMask */
2966         assert(mach->ContStackTop > 0);
2967         mach->ContMask = mach->ContStack[--mach->ContStackTop];
2968      }
2969      UPDATE_EXEC_MASK(mach);
2970      break;
2971
2972   case TGSI_OPCODE_BRK:
2973      /* turn off loop channels for each enabled exec channel */
2974      mach->LoopMask &= ~mach->ExecMask;
2975      /* Todo: if mach->LoopMask == 0, jump to end of loop */
2976      UPDATE_EXEC_MASK(mach);
2977      break;
2978
2979   case TGSI_OPCODE_CONT:
2980      /* turn off cont channels for each enabled exec channel */
2981      mach->ContMask &= ~mach->ExecMask;
2982      /* Todo: if mach->LoopMask == 0, jump to end of loop */
2983      UPDATE_EXEC_MASK(mach);
2984      break;
2985
2986   case TGSI_OPCODE_BGNSUB:
2987      /* no-op */
2988      break;
2989
2990   case TGSI_OPCODE_ENDSUB:
2991      /* no-op */
2992      break;
2993
2994   case TGSI_OPCODE_NOISE1:
2995      assert( 0 );
2996      break;
2997
2998   case TGSI_OPCODE_NOISE2:
2999      assert( 0 );
3000      break;
3001
3002   case TGSI_OPCODE_NOISE3:
3003      assert( 0 );
3004      break;
3005
3006   case TGSI_OPCODE_NOISE4:
3007      assert( 0 );
3008      break;
3009
3010   case TGSI_OPCODE_NOP:
3011      break;
3012
3013   default:
3014      assert( 0 );
3015   }
3016}
3017
3018
3019/**
3020 * Run TGSI interpreter.
3021 * \return bitmask of "alive" quad components
3022 */
3023uint
3024tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3025{
3026   uint i;
3027   int pc = 0;
3028
3029   mach->CondMask = 0xf;
3030   mach->LoopMask = 0xf;
3031   mach->ContMask = 0xf;
3032   mach->FuncMask = 0xf;
3033   mach->ExecMask = 0xf;
3034
3035   mach->CondStackTop = 0; /* temporarily subvert this assertion */
3036   assert(mach->CondStackTop == 0);
3037   assert(mach->LoopStackTop == 0);
3038   assert(mach->ContStackTop == 0);
3039   assert(mach->CallStackTop == 0);
3040
3041   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3042   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3043
3044   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3045      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3046      mach->Primitives[0] = 0;
3047   }
3048
3049   for (i = 0; i < QUAD_SIZE; i++) {
3050      mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3051         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3052         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3053         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3054         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3055   }
3056
3057   /* execute declarations (interpolants) */
3058   for (i = 0; i < mach->NumDeclarations; i++) {
3059      exec_declaration( mach, mach->Declarations+i );
3060   }
3061
3062   /* execute instructions, until pc is set to -1 */
3063   while (pc != -1) {
3064      assert(pc < (int) mach->NumInstructions);
3065      exec_instruction( mach, mach->Instructions + pc, &pc );
3066   }
3067
3068#if 0
3069   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3070   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3071      /*
3072       * Scale back depth component.
3073       */
3074      for (i = 0; i < 4; i++)
3075         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3076   }
3077#endif
3078
3079   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3080}
3081