tgsi_exec.c revision 8fa6c1ac9299402c1faf75b264cf70b1b83d1eff
1/**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers.  This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 *   Michal Krol
50 *   Brian Paul
51 */
52
53#include "pipe/p_compiler.h"
54#include "pipe/p_state.h"
55#include "pipe/p_shader_tokens.h"
56#include "tgsi/tgsi_dump.h"
57#include "tgsi/tgsi_parse.h"
58#include "tgsi/tgsi_util.h"
59#include "tgsi_exec.h"
60#include "util/u_memory.h"
61#include "util/u_math.h"
62
63#define FAST_MATH 1
64
65#define TILE_TOP_LEFT     0
66#define TILE_TOP_RIGHT    1
67#define TILE_BOTTOM_LEFT  2
68#define TILE_BOTTOM_RIGHT 3
69
70#define CHAN_X  0
71#define CHAN_Y  1
72#define CHAN_Z  2
73#define CHAN_W  3
74
75/*
76 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
77 */
78#define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
79#define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
80#define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
81#define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
82#define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
83#define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
84#define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
85#define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
86#define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
87#define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
88#define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
89#define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
90#define TEMP_128_I         TGSI_EXEC_TEMP_128_I
91#define TEMP_128_C         TGSI_EXEC_TEMP_128_C
92#define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
93#define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
94#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
95#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
96#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
97#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
98#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
99#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
100#define TEMP_CC_I          TGSI_EXEC_TEMP_CC_I
101#define TEMP_CC_C          TGSI_EXEC_TEMP_CC_C
102#define TEMP_3_I           TGSI_EXEC_TEMP_THREE_I
103#define TEMP_3_C           TGSI_EXEC_TEMP_THREE_C
104#define TEMP_HALF_I        TGSI_EXEC_TEMP_HALF_I
105#define TEMP_HALF_C        TGSI_EXEC_TEMP_HALF_C
106#define TEMP_R0            TGSI_EXEC_TEMP_R0
107
108#define IS_CHANNEL_ENABLED(INST, CHAN)\
109   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
110
111#define IS_CHANNEL_ENABLED2(INST, CHAN)\
112   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
113
114#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
115   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
116      if (IS_CHANNEL_ENABLED( INST, CHAN ))
117
118#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
119   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
120      if (IS_CHANNEL_ENABLED2( INST, CHAN ))
121
122
123/** The execution mask depends on the conditional mask and the loop mask */
124#define UPDATE_EXEC_MASK(MACH) \
125      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
126
127
128static const union tgsi_exec_channel ZeroVec =
129   { { 0.0, 0.0, 0.0, 0.0 } };
130
131
132#ifdef DEBUG
133static void
134check_inf_or_nan(const union tgsi_exec_channel *chan)
135{
136   assert(!util_is_inf_or_nan(chan->f[0]));
137   assert(!util_is_inf_or_nan(chan->f[1]));
138   assert(!util_is_inf_or_nan(chan->f[2]));
139   assert(!util_is_inf_or_nan(chan->f[3]));
140}
141#endif
142
143
144#ifdef DEBUG
145static void
146print_chan(const char *msg, const union tgsi_exec_channel *chan)
147{
148   debug_printf("%s = {%f, %f, %f, %f}\n",
149                msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
150}
151#endif
152
153
154#ifdef DEBUG
155static void
156print_temp(const struct tgsi_exec_machine *mach, uint index)
157{
158   const struct tgsi_exec_vector *tmp = &mach->Temps[index];
159   int i;
160   debug_printf("Temp[%u] =\n", index);
161   for (i = 0; i < 4; i++) {
162      debug_printf("  %c: { %f, %f, %f, %f }\n",
163                   "XYZW"[i],
164                   tmp->xyzw[i].f[0],
165                   tmp->xyzw[i].f[1],
166                   tmp->xyzw[i].f[2],
167                   tmp->xyzw[i].f[3]);
168   }
169}
170#endif
171
172
173/**
174 * Check if there's a potential src/dst register data dependency when
175 * using SOA execution.
176 * Example:
177 *   MOV T, T.yxwz;
178 * This would expand into:
179 *   MOV t0, t1;
180 *   MOV t1, t0;
181 *   MOV t2, t3;
182 *   MOV t3, t2;
183 * The second instruction will have the wrong value for t0 if executed as-is.
184 */
185static boolean
186tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
187{
188   uint i, chan;
189
190   uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
191   if (writemask == TGSI_WRITEMASK_X ||
192       writemask == TGSI_WRITEMASK_Y ||
193       writemask == TGSI_WRITEMASK_Z ||
194       writemask == TGSI_WRITEMASK_W ||
195       writemask == TGSI_WRITEMASK_NONE) {
196      /* no chance of data dependency */
197      return FALSE;
198   }
199
200   /* loop over src regs */
201   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
202      if ((inst->FullSrcRegisters[i].SrcRegister.File ==
203           inst->FullDstRegisters[0].DstRegister.File) &&
204          (inst->FullSrcRegisters[i].SrcRegister.Index ==
205           inst->FullDstRegisters[0].DstRegister.Index)) {
206         /* loop over dest channels */
207         uint channelsWritten = 0x0;
208         FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
209            /* check if we're reading a channel that's been written */
210            uint swizzle = tgsi_util_get_full_src_register_extswizzle(&inst->FullSrcRegisters[i], chan);
211            if (swizzle <= TGSI_SWIZZLE_W &&
212                (channelsWritten & (1 << swizzle))) {
213               return TRUE;
214            }
215
216            channelsWritten |= (1 << chan);
217         }
218      }
219   }
220   return FALSE;
221}
222
223
224/**
225 * Initialize machine state by expanding tokens to full instructions,
226 * allocating temporary storage, setting up constants, etc.
227 * After this, we can call tgsi_exec_machine_run() many times.
228 */
229void
230tgsi_exec_machine_bind_shader(
231   struct tgsi_exec_machine *mach,
232   const struct tgsi_token *tokens,
233   uint numSamplers,
234   struct tgsi_sampler **samplers)
235{
236   uint k;
237   struct tgsi_parse_context parse;
238   struct tgsi_exec_labels *labels = &mach->Labels;
239   struct tgsi_full_instruction *instructions;
240   struct tgsi_full_declaration *declarations;
241   uint maxInstructions = 10, numInstructions = 0;
242   uint maxDeclarations = 10, numDeclarations = 0;
243   uint instno = 0;
244
245#if 0
246   tgsi_dump(tokens, 0);
247#endif
248
249   util_init_math();
250
251   mach->Tokens = tokens;
252   mach->Samplers = samplers;
253
254   k = tgsi_parse_init (&parse, mach->Tokens);
255   if (k != TGSI_PARSE_OK) {
256      debug_printf( "Problem parsing!\n" );
257      return;
258   }
259
260   mach->Processor = parse.FullHeader.Processor.Processor;
261   mach->ImmLimit = 0;
262   labels->count = 0;
263
264   declarations = (struct tgsi_full_declaration *)
265      MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
266
267   if (!declarations) {
268      return;
269   }
270
271   instructions = (struct tgsi_full_instruction *)
272      MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
273
274   if (!instructions) {
275      FREE( declarations );
276      return;
277   }
278
279   while( !tgsi_parse_end_of_tokens( &parse ) ) {
280      uint pointer = parse.Position;
281      uint i;
282
283      tgsi_parse_token( &parse );
284      switch( parse.FullToken.Token.Type ) {
285      case TGSI_TOKEN_TYPE_DECLARATION:
286         /* save expanded declaration */
287         if (numDeclarations == maxDeclarations) {
288            declarations = REALLOC(declarations,
289                                   maxDeclarations
290                                   * sizeof(struct tgsi_full_declaration),
291                                   (maxDeclarations + 10)
292                                   * sizeof(struct tgsi_full_declaration));
293            maxDeclarations += 10;
294         }
295         memcpy(declarations + numDeclarations,
296                &parse.FullToken.FullDeclaration,
297                sizeof(declarations[0]));
298         numDeclarations++;
299         break;
300
301      case TGSI_TOKEN_TYPE_IMMEDIATE:
302         {
303            uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
304            assert( size % 4 == 0 );
305            assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
306
307            for( i = 0; i < size; i++ ) {
308               mach->Imms[mach->ImmLimit + i / 4][i % 4] =
309		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
310            }
311            mach->ImmLimit += size / 4;
312         }
313         break;
314
315      case TGSI_TOKEN_TYPE_INSTRUCTION:
316         assert( labels->count < MAX_LABELS );
317
318         labels->labels[labels->count][0] = instno;
319         labels->labels[labels->count][1] = pointer;
320         labels->count++;
321
322         /* save expanded instruction */
323         if (numInstructions == maxInstructions) {
324            instructions = REALLOC(instructions,
325                                   maxInstructions
326                                   * sizeof(struct tgsi_full_instruction),
327                                   (maxInstructions + 10)
328                                   * sizeof(struct tgsi_full_instruction));
329            maxInstructions += 10;
330         }
331         memcpy(instructions + numInstructions,
332                &parse.FullToken.FullInstruction,
333                sizeof(instructions[0]));
334
335#if 0
336         if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
337            debug_printf("SOA dependency in instruction:\n");
338            tgsi_dump_instruction(&parse.FullToken.FullInstruction,
339                                  numInstructions);
340         }
341#else
342         (void) tgsi_check_soa_dependencies;
343#endif
344
345         numInstructions++;
346         break;
347
348      default:
349         assert( 0 );
350      }
351   }
352   tgsi_parse_free (&parse);
353
354   if (mach->Declarations) {
355      FREE( mach->Declarations );
356   }
357   mach->Declarations = declarations;
358   mach->NumDeclarations = numDeclarations;
359
360   if (mach->Instructions) {
361      FREE( mach->Instructions );
362   }
363   mach->Instructions = instructions;
364   mach->NumInstructions = numInstructions;
365}
366
367
368void
369tgsi_exec_machine_init(
370   struct tgsi_exec_machine *mach )
371{
372   uint i;
373
374   mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
375   mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
376
377   /* Setup constants. */
378   for( i = 0; i < 4; i++ ) {
379      mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
380      mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
381      mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
382      mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
383      mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
384      mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
385      mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
386      mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
387      mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
388      mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
389   }
390
391#ifdef DEBUG
392   /* silence warnings */
393   (void) print_chan;
394   (void) print_temp;
395#endif
396}
397
398
399void
400tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
401{
402   if (mach->Instructions) {
403      FREE(mach->Instructions);
404      mach->Instructions = NULL;
405      mach->NumInstructions = 0;
406   }
407   if (mach->Declarations) {
408      FREE(mach->Declarations);
409      mach->Declarations = NULL;
410      mach->NumDeclarations = 0;
411   }
412}
413
414
415static void
416micro_abs(
417   union tgsi_exec_channel *dst,
418   const union tgsi_exec_channel *src )
419{
420   dst->f[0] = fabsf( src->f[0] );
421   dst->f[1] = fabsf( src->f[1] );
422   dst->f[2] = fabsf( src->f[2] );
423   dst->f[3] = fabsf( src->f[3] );
424}
425
426static void
427micro_add(
428   union tgsi_exec_channel *dst,
429   const union tgsi_exec_channel *src0,
430   const union tgsi_exec_channel *src1 )
431{
432   dst->f[0] = src0->f[0] + src1->f[0];
433   dst->f[1] = src0->f[1] + src1->f[1];
434   dst->f[2] = src0->f[2] + src1->f[2];
435   dst->f[3] = src0->f[3] + src1->f[3];
436}
437
438#if 0
439static void
440micro_iadd(
441   union tgsi_exec_channel *dst,
442   const union tgsi_exec_channel *src0,
443   const union tgsi_exec_channel *src1 )
444{
445   dst->i[0] = src0->i[0] + src1->i[0];
446   dst->i[1] = src0->i[1] + src1->i[1];
447   dst->i[2] = src0->i[2] + src1->i[2];
448   dst->i[3] = src0->i[3] + src1->i[3];
449}
450#endif
451
452static void
453micro_and(
454   union tgsi_exec_channel *dst,
455   const union tgsi_exec_channel *src0,
456   const union tgsi_exec_channel *src1 )
457{
458   dst->u[0] = src0->u[0] & src1->u[0];
459   dst->u[1] = src0->u[1] & src1->u[1];
460   dst->u[2] = src0->u[2] & src1->u[2];
461   dst->u[3] = src0->u[3] & src1->u[3];
462}
463
464static void
465micro_ceil(
466   union tgsi_exec_channel *dst,
467   const union tgsi_exec_channel *src )
468{
469   dst->f[0] = ceilf( src->f[0] );
470   dst->f[1] = ceilf( src->f[1] );
471   dst->f[2] = ceilf( src->f[2] );
472   dst->f[3] = ceilf( src->f[3] );
473}
474
475static void
476micro_cos(
477   union tgsi_exec_channel *dst,
478   const union tgsi_exec_channel *src )
479{
480   dst->f[0] = cosf( src->f[0] );
481   dst->f[1] = cosf( src->f[1] );
482   dst->f[2] = cosf( src->f[2] );
483   dst->f[3] = cosf( src->f[3] );
484}
485
486static void
487micro_ddx(
488   union tgsi_exec_channel *dst,
489   const union tgsi_exec_channel *src )
490{
491   dst->f[0] =
492   dst->f[1] =
493   dst->f[2] =
494   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
495}
496
497static void
498micro_ddy(
499   union tgsi_exec_channel *dst,
500   const union tgsi_exec_channel *src )
501{
502   dst->f[0] =
503   dst->f[1] =
504   dst->f[2] =
505   dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
506}
507
508static void
509micro_div(
510   union tgsi_exec_channel *dst,
511   const union tgsi_exec_channel *src0,
512   const union tgsi_exec_channel *src1 )
513{
514   if (src1->f[0] != 0) {
515      dst->f[0] = src0->f[0] / src1->f[0];
516   }
517   if (src1->f[1] != 0) {
518      dst->f[1] = src0->f[1] / src1->f[1];
519   }
520   if (src1->f[2] != 0) {
521      dst->f[2] = src0->f[2] / src1->f[2];
522   }
523   if (src1->f[3] != 0) {
524      dst->f[3] = src0->f[3] / src1->f[3];
525   }
526}
527
528#if 0
529static void
530micro_udiv(
531   union tgsi_exec_channel *dst,
532   const union tgsi_exec_channel *src0,
533   const union tgsi_exec_channel *src1 )
534{
535   dst->u[0] = src0->u[0] / src1->u[0];
536   dst->u[1] = src0->u[1] / src1->u[1];
537   dst->u[2] = src0->u[2] / src1->u[2];
538   dst->u[3] = src0->u[3] / src1->u[3];
539}
540#endif
541
542static void
543micro_eq(
544   union tgsi_exec_channel *dst,
545   const union tgsi_exec_channel *src0,
546   const union tgsi_exec_channel *src1,
547   const union tgsi_exec_channel *src2,
548   const union tgsi_exec_channel *src3 )
549{
550   dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
551   dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
552   dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
553   dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
554}
555
556#if 0
557static void
558micro_ieq(
559   union tgsi_exec_channel *dst,
560   const union tgsi_exec_channel *src0,
561   const union tgsi_exec_channel *src1,
562   const union tgsi_exec_channel *src2,
563   const union tgsi_exec_channel *src3 )
564{
565   dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
566   dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
567   dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
568   dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
569}
570#endif
571
572static void
573micro_exp2(
574   union tgsi_exec_channel *dst,
575   const union tgsi_exec_channel *src)
576{
577#if FAST_MATH
578   dst->f[0] = util_fast_exp2( src->f[0] );
579   dst->f[1] = util_fast_exp2( src->f[1] );
580   dst->f[2] = util_fast_exp2( src->f[2] );
581   dst->f[3] = util_fast_exp2( src->f[3] );
582#else
583   dst->f[0] = powf( 2.0f, src->f[0] );
584   dst->f[1] = powf( 2.0f, src->f[1] );
585   dst->f[2] = powf( 2.0f, src->f[2] );
586   dst->f[3] = powf( 2.0f, src->f[3] );
587#endif
588}
589
590#if 0
591static void
592micro_f2ut(
593   union tgsi_exec_channel *dst,
594   const union tgsi_exec_channel *src )
595{
596   dst->u[0] = (uint) src->f[0];
597   dst->u[1] = (uint) src->f[1];
598   dst->u[2] = (uint) src->f[2];
599   dst->u[3] = (uint) src->f[3];
600}
601#endif
602
603static void
604micro_float_clamp(union tgsi_exec_channel *dst,
605                  const union tgsi_exec_channel *src)
606{
607   uint i;
608
609   for (i = 0; i < 4; i++) {
610      if (src->f[i] > 0.0f) {
611         if (src->f[i] > 1.884467e+019f)
612            dst->f[i] = 1.884467e+019f;
613         else if (src->f[i] < 5.42101e-020f)
614            dst->f[i] = 5.42101e-020f;
615         else
616            dst->f[i] = src->f[i];
617      }
618      else {
619         if (src->f[i] < -1.884467e+019f)
620            dst->f[i] = -1.884467e+019f;
621         else if (src->f[i] > -5.42101e-020f)
622            dst->f[i] = -5.42101e-020f;
623         else
624            dst->f[i] = src->f[i];
625      }
626   }
627}
628
629static void
630micro_flr(
631   union tgsi_exec_channel *dst,
632   const union tgsi_exec_channel *src )
633{
634   dst->f[0] = floorf( src->f[0] );
635   dst->f[1] = floorf( src->f[1] );
636   dst->f[2] = floorf( src->f[2] );
637   dst->f[3] = floorf( src->f[3] );
638}
639
640static void
641micro_frc(
642   union tgsi_exec_channel *dst,
643   const union tgsi_exec_channel *src )
644{
645   dst->f[0] = src->f[0] - floorf( src->f[0] );
646   dst->f[1] = src->f[1] - floorf( src->f[1] );
647   dst->f[2] = src->f[2] - floorf( src->f[2] );
648   dst->f[3] = src->f[3] - floorf( src->f[3] );
649}
650
651static void
652micro_i2f(
653   union tgsi_exec_channel *dst,
654   const union tgsi_exec_channel *src )
655{
656   dst->f[0] = (float) src->i[0];
657   dst->f[1] = (float) src->i[1];
658   dst->f[2] = (float) src->i[2];
659   dst->f[3] = (float) src->i[3];
660}
661
662static void
663micro_lg2(
664   union tgsi_exec_channel *dst,
665   const union tgsi_exec_channel *src )
666{
667#if FAST_MATH
668   dst->f[0] = util_fast_log2( src->f[0] );
669   dst->f[1] = util_fast_log2( src->f[1] );
670   dst->f[2] = util_fast_log2( src->f[2] );
671   dst->f[3] = util_fast_log2( src->f[3] );
672#else
673   dst->f[0] = logf( src->f[0] ) * 1.442695f;
674   dst->f[1] = logf( src->f[1] ) * 1.442695f;
675   dst->f[2] = logf( src->f[2] ) * 1.442695f;
676   dst->f[3] = logf( src->f[3] ) * 1.442695f;
677#endif
678}
679
680static void
681micro_le(
682   union tgsi_exec_channel *dst,
683   const union tgsi_exec_channel *src0,
684   const union tgsi_exec_channel *src1,
685   const union tgsi_exec_channel *src2,
686   const union tgsi_exec_channel *src3 )
687{
688   dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
689   dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
690   dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
691   dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
692}
693
694static void
695micro_lt(
696   union tgsi_exec_channel *dst,
697   const union tgsi_exec_channel *src0,
698   const union tgsi_exec_channel *src1,
699   const union tgsi_exec_channel *src2,
700   const union tgsi_exec_channel *src3 )
701{
702   dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
703   dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
704   dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
705   dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
706}
707
708#if 0
709static void
710micro_ilt(
711   union tgsi_exec_channel *dst,
712   const union tgsi_exec_channel *src0,
713   const union tgsi_exec_channel *src1,
714   const union tgsi_exec_channel *src2,
715   const union tgsi_exec_channel *src3 )
716{
717   dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
718   dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
719   dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
720   dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
721}
722#endif
723
724#if 0
725static void
726micro_ult(
727   union tgsi_exec_channel *dst,
728   const union tgsi_exec_channel *src0,
729   const union tgsi_exec_channel *src1,
730   const union tgsi_exec_channel *src2,
731   const union tgsi_exec_channel *src3 )
732{
733   dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
734   dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
735   dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
736   dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
737}
738#endif
739
740static void
741micro_max(
742   union tgsi_exec_channel *dst,
743   const union tgsi_exec_channel *src0,
744   const union tgsi_exec_channel *src1 )
745{
746   dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
747   dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
748   dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
749   dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
750}
751
752#if 0
753static void
754micro_imax(
755   union tgsi_exec_channel *dst,
756   const union tgsi_exec_channel *src0,
757   const union tgsi_exec_channel *src1 )
758{
759   dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
760   dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
761   dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
762   dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
763}
764#endif
765
766#if 0
767static void
768micro_umax(
769   union tgsi_exec_channel *dst,
770   const union tgsi_exec_channel *src0,
771   const union tgsi_exec_channel *src1 )
772{
773   dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
774   dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
775   dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
776   dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
777}
778#endif
779
780static void
781micro_min(
782   union tgsi_exec_channel *dst,
783   const union tgsi_exec_channel *src0,
784   const union tgsi_exec_channel *src1 )
785{
786   dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
787   dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
788   dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
789   dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
790}
791
792#if 0
793static void
794micro_imin(
795   union tgsi_exec_channel *dst,
796   const union tgsi_exec_channel *src0,
797   const union tgsi_exec_channel *src1 )
798{
799   dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
800   dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
801   dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
802   dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
803}
804#endif
805
806#if 0
807static void
808micro_umin(
809   union tgsi_exec_channel *dst,
810   const union tgsi_exec_channel *src0,
811   const union tgsi_exec_channel *src1 )
812{
813   dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
814   dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
815   dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
816   dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
817}
818#endif
819
820#if 0
821static void
822micro_umod(
823   union tgsi_exec_channel *dst,
824   const union tgsi_exec_channel *src0,
825   const union tgsi_exec_channel *src1 )
826{
827   dst->u[0] = src0->u[0] % src1->u[0];
828   dst->u[1] = src0->u[1] % src1->u[1];
829   dst->u[2] = src0->u[2] % src1->u[2];
830   dst->u[3] = src0->u[3] % src1->u[3];
831}
832#endif
833
834static void
835micro_mul(
836   union tgsi_exec_channel *dst,
837   const union tgsi_exec_channel *src0,
838   const union tgsi_exec_channel *src1 )
839{
840   dst->f[0] = src0->f[0] * src1->f[0];
841   dst->f[1] = src0->f[1] * src1->f[1];
842   dst->f[2] = src0->f[2] * src1->f[2];
843   dst->f[3] = src0->f[3] * src1->f[3];
844}
845
846#if 0
847static void
848micro_imul(
849   union tgsi_exec_channel *dst,
850   const union tgsi_exec_channel *src0,
851   const union tgsi_exec_channel *src1 )
852{
853   dst->i[0] = src0->i[0] * src1->i[0];
854   dst->i[1] = src0->i[1] * src1->i[1];
855   dst->i[2] = src0->i[2] * src1->i[2];
856   dst->i[3] = src0->i[3] * src1->i[3];
857}
858#endif
859
860#if 0
861static void
862micro_imul64(
863   union tgsi_exec_channel *dst0,
864   union tgsi_exec_channel *dst1,
865   const union tgsi_exec_channel *src0,
866   const union tgsi_exec_channel *src1 )
867{
868   dst1->i[0] = src0->i[0] * src1->i[0];
869   dst1->i[1] = src0->i[1] * src1->i[1];
870   dst1->i[2] = src0->i[2] * src1->i[2];
871   dst1->i[3] = src0->i[3] * src1->i[3];
872   dst0->i[0] = 0;
873   dst0->i[1] = 0;
874   dst0->i[2] = 0;
875   dst0->i[3] = 0;
876}
877#endif
878
879#if 0
880static void
881micro_umul64(
882   union tgsi_exec_channel *dst0,
883   union tgsi_exec_channel *dst1,
884   const union tgsi_exec_channel *src0,
885   const union tgsi_exec_channel *src1 )
886{
887   dst1->u[0] = src0->u[0] * src1->u[0];
888   dst1->u[1] = src0->u[1] * src1->u[1];
889   dst1->u[2] = src0->u[2] * src1->u[2];
890   dst1->u[3] = src0->u[3] * src1->u[3];
891   dst0->u[0] = 0;
892   dst0->u[1] = 0;
893   dst0->u[2] = 0;
894   dst0->u[3] = 0;
895}
896#endif
897
898
899#if 0
900static void
901micro_movc(
902   union tgsi_exec_channel *dst,
903   const union tgsi_exec_channel *src0,
904   const union tgsi_exec_channel *src1,
905   const union tgsi_exec_channel *src2 )
906{
907   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
908   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
909   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
910   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
911}
912#endif
913
914static void
915micro_neg(
916   union tgsi_exec_channel *dst,
917   const union tgsi_exec_channel *src )
918{
919   dst->f[0] = -src->f[0];
920   dst->f[1] = -src->f[1];
921   dst->f[2] = -src->f[2];
922   dst->f[3] = -src->f[3];
923}
924
925#if 0
926static void
927micro_ineg(
928   union tgsi_exec_channel *dst,
929   const union tgsi_exec_channel *src )
930{
931   dst->i[0] = -src->i[0];
932   dst->i[1] = -src->i[1];
933   dst->i[2] = -src->i[2];
934   dst->i[3] = -src->i[3];
935}
936#endif
937
938static void
939micro_not(
940   union tgsi_exec_channel *dst,
941   const union tgsi_exec_channel *src )
942{
943   dst->u[0] = ~src->u[0];
944   dst->u[1] = ~src->u[1];
945   dst->u[2] = ~src->u[2];
946   dst->u[3] = ~src->u[3];
947}
948
949static void
950micro_or(
951   union tgsi_exec_channel *dst,
952   const union tgsi_exec_channel *src0,
953   const union tgsi_exec_channel *src1 )
954{
955   dst->u[0] = src0->u[0] | src1->u[0];
956   dst->u[1] = src0->u[1] | src1->u[1];
957   dst->u[2] = src0->u[2] | src1->u[2];
958   dst->u[3] = src0->u[3] | src1->u[3];
959}
960
961static void
962micro_pow(
963   union tgsi_exec_channel *dst,
964   const union tgsi_exec_channel *src0,
965   const union tgsi_exec_channel *src1 )
966{
967#if FAST_MATH
968   dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
969   dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
970   dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
971   dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
972#else
973   dst->f[0] = powf( src0->f[0], src1->f[0] );
974   dst->f[1] = powf( src0->f[1], src1->f[1] );
975   dst->f[2] = powf( src0->f[2], src1->f[2] );
976   dst->f[3] = powf( src0->f[3], src1->f[3] );
977#endif
978}
979
980static void
981micro_rnd(
982   union tgsi_exec_channel *dst,
983   const union tgsi_exec_channel *src )
984{
985   dst->f[0] = floorf( src->f[0] + 0.5f );
986   dst->f[1] = floorf( src->f[1] + 0.5f );
987   dst->f[2] = floorf( src->f[2] + 0.5f );
988   dst->f[3] = floorf( src->f[3] + 0.5f );
989}
990
991static void
992micro_sgn(
993   union tgsi_exec_channel *dst,
994   const union tgsi_exec_channel *src )
995{
996   dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
997   dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
998   dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
999   dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
1000}
1001
1002static void
1003micro_shl(
1004   union tgsi_exec_channel *dst,
1005   const union tgsi_exec_channel *src0,
1006   const union tgsi_exec_channel *src1 )
1007{
1008   dst->i[0] = src0->i[0] << src1->i[0];
1009   dst->i[1] = src0->i[1] << src1->i[1];
1010   dst->i[2] = src0->i[2] << src1->i[2];
1011   dst->i[3] = src0->i[3] << src1->i[3];
1012}
1013
1014static void
1015micro_ishr(
1016   union tgsi_exec_channel *dst,
1017   const union tgsi_exec_channel *src0,
1018   const union tgsi_exec_channel *src1 )
1019{
1020   dst->i[0] = src0->i[0] >> src1->i[0];
1021   dst->i[1] = src0->i[1] >> src1->i[1];
1022   dst->i[2] = src0->i[2] >> src1->i[2];
1023   dst->i[3] = src0->i[3] >> src1->i[3];
1024}
1025
1026static void
1027micro_trunc(
1028   union tgsi_exec_channel *dst,
1029   const union tgsi_exec_channel *src0 )
1030{
1031   dst->f[0] = (float) (int) src0->f[0];
1032   dst->f[1] = (float) (int) src0->f[1];
1033   dst->f[2] = (float) (int) src0->f[2];
1034   dst->f[3] = (float) (int) src0->f[3];
1035}
1036
1037#if 0
1038static void
1039micro_ushr(
1040   union tgsi_exec_channel *dst,
1041   const union tgsi_exec_channel *src0,
1042   const union tgsi_exec_channel *src1 )
1043{
1044   dst->u[0] = src0->u[0] >> src1->u[0];
1045   dst->u[1] = src0->u[1] >> src1->u[1];
1046   dst->u[2] = src0->u[2] >> src1->u[2];
1047   dst->u[3] = src0->u[3] >> src1->u[3];
1048}
1049#endif
1050
1051static void
1052micro_sin(
1053   union tgsi_exec_channel *dst,
1054   const union tgsi_exec_channel *src )
1055{
1056   dst->f[0] = sinf( src->f[0] );
1057   dst->f[1] = sinf( src->f[1] );
1058   dst->f[2] = sinf( src->f[2] );
1059   dst->f[3] = sinf( src->f[3] );
1060}
1061
1062static void
1063micro_sqrt( union tgsi_exec_channel *dst,
1064            const union tgsi_exec_channel *src )
1065{
1066   dst->f[0] = sqrtf( src->f[0] );
1067   dst->f[1] = sqrtf( src->f[1] );
1068   dst->f[2] = sqrtf( src->f[2] );
1069   dst->f[3] = sqrtf( src->f[3] );
1070}
1071
1072static void
1073micro_sub(
1074   union tgsi_exec_channel *dst,
1075   const union tgsi_exec_channel *src0,
1076   const union tgsi_exec_channel *src1 )
1077{
1078   dst->f[0] = src0->f[0] - src1->f[0];
1079   dst->f[1] = src0->f[1] - src1->f[1];
1080   dst->f[2] = src0->f[2] - src1->f[2];
1081   dst->f[3] = src0->f[3] - src1->f[3];
1082}
1083
1084#if 0
1085static void
1086micro_u2f(
1087   union tgsi_exec_channel *dst,
1088   const union tgsi_exec_channel *src )
1089{
1090   dst->f[0] = (float) src->u[0];
1091   dst->f[1] = (float) src->u[1];
1092   dst->f[2] = (float) src->u[2];
1093   dst->f[3] = (float) src->u[3];
1094}
1095#endif
1096
1097static void
1098micro_xor(
1099   union tgsi_exec_channel *dst,
1100   const union tgsi_exec_channel *src0,
1101   const union tgsi_exec_channel *src1 )
1102{
1103   dst->u[0] = src0->u[0] ^ src1->u[0];
1104   dst->u[1] = src0->u[1] ^ src1->u[1];
1105   dst->u[2] = src0->u[2] ^ src1->u[2];
1106   dst->u[3] = src0->u[3] ^ src1->u[3];
1107}
1108
1109static void
1110fetch_src_file_channel(
1111   const struct tgsi_exec_machine *mach,
1112   const uint file,
1113   const uint swizzle,
1114   const union tgsi_exec_channel *index,
1115   union tgsi_exec_channel *chan )
1116{
1117   switch( swizzle ) {
1118   case TGSI_EXTSWIZZLE_X:
1119   case TGSI_EXTSWIZZLE_Y:
1120   case TGSI_EXTSWIZZLE_Z:
1121   case TGSI_EXTSWIZZLE_W:
1122      switch( file ) {
1123      case TGSI_FILE_CONSTANT:
1124         assert(mach->Consts);
1125         if (index->i[0] < 0)
1126            chan->f[0] = 0.0f;
1127         else
1128            chan->f[0] = mach->Consts[index->i[0]][swizzle];
1129         if (index->i[1] < 0)
1130            chan->f[1] = 0.0f;
1131         else
1132            chan->f[1] = mach->Consts[index->i[1]][swizzle];
1133         if (index->i[2] < 0)
1134            chan->f[2] = 0.0f;
1135         else
1136            chan->f[2] = mach->Consts[index->i[2]][swizzle];
1137         if (index->i[3] < 0)
1138            chan->f[3] = 0.0f;
1139         else
1140            chan->f[3] = mach->Consts[index->i[3]][swizzle];
1141         break;
1142
1143      case TGSI_FILE_INPUT:
1144         chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1145         chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1146         chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1147         chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1148         break;
1149
1150      case TGSI_FILE_TEMPORARY:
1151         assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1152         chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1153         chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1154         chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1155         chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1156         break;
1157
1158      case TGSI_FILE_IMMEDIATE:
1159         assert( index->i[0] < (int) mach->ImmLimit );
1160         chan->f[0] = mach->Imms[index->i[0]][swizzle];
1161         assert( index->i[1] < (int) mach->ImmLimit );
1162         chan->f[1] = mach->Imms[index->i[1]][swizzle];
1163         assert( index->i[2] < (int) mach->ImmLimit );
1164         chan->f[2] = mach->Imms[index->i[2]][swizzle];
1165         assert( index->i[3] < (int) mach->ImmLimit );
1166         chan->f[3] = mach->Imms[index->i[3]][swizzle];
1167         break;
1168
1169      case TGSI_FILE_ADDRESS:
1170         chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1171         chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1172         chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1173         chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1174         break;
1175
1176      case TGSI_FILE_OUTPUT:
1177         /* vertex/fragment output vars can be read too */
1178         chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1179         chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1180         chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1181         chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1182         break;
1183
1184      default:
1185         assert( 0 );
1186      }
1187      break;
1188
1189   case TGSI_EXTSWIZZLE_ZERO:
1190      *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1191      break;
1192
1193   case TGSI_EXTSWIZZLE_ONE:
1194      *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1195      break;
1196
1197   default:
1198      assert( 0 );
1199   }
1200}
1201
1202static void
1203fetch_source(
1204   const struct tgsi_exec_machine *mach,
1205   union tgsi_exec_channel *chan,
1206   const struct tgsi_full_src_register *reg,
1207   const uint chan_index )
1208{
1209   union tgsi_exec_channel index;
1210   uint swizzle;
1211
1212   /* We start with a direct index into a register file.
1213    *
1214    *    file[1],
1215    *    where:
1216    *       file = SrcRegister.File
1217    *       [1] = SrcRegister.Index
1218    */
1219   index.i[0] =
1220   index.i[1] =
1221   index.i[2] =
1222   index.i[3] = reg->SrcRegister.Index;
1223
1224   /* There is an extra source register that indirectly subscripts
1225    * a register file. The direct index now becomes an offset
1226    * that is being added to the indirect register.
1227    *
1228    *    file[ind[2].x+1],
1229    *    where:
1230    *       ind = SrcRegisterInd.File
1231    *       [2] = SrcRegisterInd.Index
1232    *       .x = SrcRegisterInd.SwizzleX
1233    */
1234   if (reg->SrcRegister.Indirect) {
1235      union tgsi_exec_channel index2;
1236      union tgsi_exec_channel indir_index;
1237      const uint execmask = mach->ExecMask;
1238      uint i;
1239
1240      /* which address register (always zero now) */
1241      index2.i[0] =
1242      index2.i[1] =
1243      index2.i[2] =
1244      index2.i[3] = reg->SrcRegisterInd.Index;
1245
1246      /* get current value of address register[swizzle] */
1247      swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1248      fetch_src_file_channel(
1249         mach,
1250         reg->SrcRegisterInd.File,
1251         swizzle,
1252         &index2,
1253         &indir_index );
1254
1255      /* add value of address register to the offset */
1256      index.i[0] += (int) indir_index.f[0];
1257      index.i[1] += (int) indir_index.f[1];
1258      index.i[2] += (int) indir_index.f[2];
1259      index.i[3] += (int) indir_index.f[3];
1260
1261      /* for disabled execution channels, zero-out the index to
1262       * avoid using a potential garbage value.
1263       */
1264      for (i = 0; i < QUAD_SIZE; i++) {
1265         if ((execmask & (1 << i)) == 0)
1266            index.i[i] = 0;
1267      }
1268   }
1269
1270   /* There is an extra source register that is a second
1271    * subscript to a register file. Effectively it means that
1272    * the register file is actually a 2D array of registers.
1273    *
1274    *    file[1][3] == file[1*sizeof(file[1])+3],
1275    *    where:
1276    *       [3] = SrcRegisterDim.Index
1277    */
1278   if (reg->SrcRegister.Dimension) {
1279      /* The size of the first-order array depends on the register file type.
1280       * We need to multiply the index to the first array to get an effective,
1281       * "flat" index that points to the beginning of the second-order array.
1282       */
1283      switch (reg->SrcRegister.File) {
1284      case TGSI_FILE_INPUT:
1285         index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1286         index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1287         index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1288         index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1289         break;
1290      case TGSI_FILE_CONSTANT:
1291         index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1292         index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1293         index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1294         index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1295         break;
1296      default:
1297         assert( 0 );
1298      }
1299
1300      index.i[0] += reg->SrcRegisterDim.Index;
1301      index.i[1] += reg->SrcRegisterDim.Index;
1302      index.i[2] += reg->SrcRegisterDim.Index;
1303      index.i[3] += reg->SrcRegisterDim.Index;
1304
1305      /* Again, the second subscript index can be addressed indirectly
1306       * identically to the first one.
1307       * Nothing stops us from indirectly addressing the indirect register,
1308       * but there is no need for that, so we won't exercise it.
1309       *
1310       *    file[1][ind[4].y+3],
1311       *    where:
1312       *       ind = SrcRegisterDimInd.File
1313       *       [4] = SrcRegisterDimInd.Index
1314       *       .y = SrcRegisterDimInd.SwizzleX
1315       */
1316      if (reg->SrcRegisterDim.Indirect) {
1317         union tgsi_exec_channel index2;
1318         union tgsi_exec_channel indir_index;
1319         const uint execmask = mach->ExecMask;
1320         uint i;
1321
1322         index2.i[0] =
1323         index2.i[1] =
1324         index2.i[2] =
1325         index2.i[3] = reg->SrcRegisterDimInd.Index;
1326
1327         swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1328         fetch_src_file_channel(
1329            mach,
1330            reg->SrcRegisterDimInd.File,
1331            swizzle,
1332            &index2,
1333            &indir_index );
1334
1335         index.i[0] += (int) indir_index.f[0];
1336         index.i[1] += (int) indir_index.f[1];
1337         index.i[2] += (int) indir_index.f[2];
1338         index.i[3] += (int) indir_index.f[3];
1339
1340         /* for disabled execution channels, zero-out the index to
1341          * avoid using a potential garbage value.
1342          */
1343         for (i = 0; i < QUAD_SIZE; i++) {
1344            if ((execmask & (1 << i)) == 0)
1345               index.i[i] = 0;
1346         }
1347      }
1348
1349      /* If by any chance there was a need for a 3D array of register
1350       * files, we would have to check whether SrcRegisterDim is followed
1351       * by a dimension register and continue the saga.
1352       */
1353   }
1354
1355   swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1356   fetch_src_file_channel(
1357      mach,
1358      reg->SrcRegister.File,
1359      swizzle,
1360      &index,
1361      chan );
1362
1363   switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1364   case TGSI_UTIL_SIGN_CLEAR:
1365      micro_abs( chan, chan );
1366      break;
1367
1368   case TGSI_UTIL_SIGN_SET:
1369      micro_abs( chan, chan );
1370      micro_neg( chan, chan );
1371      break;
1372
1373   case TGSI_UTIL_SIGN_TOGGLE:
1374      micro_neg( chan, chan );
1375      break;
1376
1377   case TGSI_UTIL_SIGN_KEEP:
1378      break;
1379   }
1380
1381   if (reg->SrcRegisterExtMod.Complement) {
1382      micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1383   }
1384}
1385
1386static void
1387store_dest(
1388   struct tgsi_exec_machine *mach,
1389   const union tgsi_exec_channel *chan,
1390   const struct tgsi_full_dst_register *reg,
1391   const struct tgsi_full_instruction *inst,
1392   uint chan_index )
1393{
1394   uint i;
1395   union tgsi_exec_channel null;
1396   union tgsi_exec_channel *dst;
1397   uint execmask = mach->ExecMask;
1398
1399#ifdef DEBUG
1400   check_inf_or_nan(chan);
1401#endif
1402
1403   switch (reg->DstRegister.File) {
1404   case TGSI_FILE_NULL:
1405      dst = &null;
1406      break;
1407
1408   case TGSI_FILE_OUTPUT:
1409      dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1410                           + reg->DstRegister.Index].xyzw[chan_index];
1411      break;
1412
1413   case TGSI_FILE_TEMPORARY:
1414      assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1415      dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1416      break;
1417
1418   case TGSI_FILE_ADDRESS:
1419      dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1420      break;
1421
1422   default:
1423      assert( 0 );
1424      return;
1425   }
1426
1427   if (inst->InstructionExtNv.CondFlowEnable) {
1428      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1429      uint swizzle;
1430      uint shift;
1431      uint mask;
1432      uint test;
1433
1434      /* Only CC0 supported.
1435       */
1436      assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1437
1438      switch (chan_index) {
1439      case CHAN_X:
1440         swizzle = inst->InstructionExtNv.CondSwizzleX;
1441         break;
1442      case CHAN_Y:
1443         swizzle = inst->InstructionExtNv.CondSwizzleY;
1444         break;
1445      case CHAN_Z:
1446         swizzle = inst->InstructionExtNv.CondSwizzleZ;
1447         break;
1448      case CHAN_W:
1449         swizzle = inst->InstructionExtNv.CondSwizzleW;
1450         break;
1451      default:
1452         assert( 0 );
1453         return;
1454      }
1455
1456      switch (swizzle) {
1457      case TGSI_SWIZZLE_X:
1458         shift = TGSI_EXEC_CC_X_SHIFT;
1459         mask = TGSI_EXEC_CC_X_MASK;
1460         break;
1461      case TGSI_SWIZZLE_Y:
1462         shift = TGSI_EXEC_CC_Y_SHIFT;
1463         mask = TGSI_EXEC_CC_Y_MASK;
1464         break;
1465      case TGSI_SWIZZLE_Z:
1466         shift = TGSI_EXEC_CC_Z_SHIFT;
1467         mask = TGSI_EXEC_CC_Z_MASK;
1468         break;
1469      case TGSI_SWIZZLE_W:
1470         shift = TGSI_EXEC_CC_W_SHIFT;
1471         mask = TGSI_EXEC_CC_W_MASK;
1472         break;
1473      default:
1474         assert( 0 );
1475         return;
1476      }
1477
1478      switch (inst->InstructionExtNv.CondMask) {
1479      case TGSI_CC_GT:
1480         test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1481         for (i = 0; i < QUAD_SIZE; i++)
1482            if (cc->u[i] & test)
1483               execmask &= ~(1 << i);
1484         break;
1485
1486      case TGSI_CC_EQ:
1487         test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1488         for (i = 0; i < QUAD_SIZE; i++)
1489            if (cc->u[i] & test)
1490               execmask &= ~(1 << i);
1491         break;
1492
1493      case TGSI_CC_LT:
1494         test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1495         for (i = 0; i < QUAD_SIZE; i++)
1496            if (cc->u[i] & test)
1497               execmask &= ~(1 << i);
1498         break;
1499
1500      case TGSI_CC_GE:
1501         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1502         for (i = 0; i < QUAD_SIZE; i++)
1503            if (cc->u[i] & test)
1504               execmask &= ~(1 << i);
1505         break;
1506
1507      case TGSI_CC_LE:
1508         test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1509         for (i = 0; i < QUAD_SIZE; i++)
1510            if (cc->u[i] & test)
1511               execmask &= ~(1 << i);
1512         break;
1513
1514      case TGSI_CC_NE:
1515         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1516         for (i = 0; i < QUAD_SIZE; i++)
1517            if (cc->u[i] & test)
1518               execmask &= ~(1 << i);
1519         break;
1520
1521      case TGSI_CC_TR:
1522         break;
1523
1524      case TGSI_CC_FL:
1525         for (i = 0; i < QUAD_SIZE; i++)
1526            execmask &= ~(1 << i);
1527         break;
1528
1529      default:
1530         assert( 0 );
1531         return;
1532      }
1533   }
1534
1535   switch (inst->Instruction.Saturate) {
1536   case TGSI_SAT_NONE:
1537      for (i = 0; i < QUAD_SIZE; i++)
1538         if (execmask & (1 << i))
1539            dst->i[i] = chan->i[i];
1540      break;
1541
1542   case TGSI_SAT_ZERO_ONE:
1543      for (i = 0; i < QUAD_SIZE; i++)
1544         if (execmask & (1 << i)) {
1545            if (chan->f[i] < 0.0f)
1546               dst->f[i] = 0.0f;
1547            else if (chan->f[i] > 1.0f)
1548               dst->f[i] = 1.0f;
1549            else
1550               dst->i[i] = chan->i[i];
1551         }
1552      break;
1553
1554   case TGSI_SAT_MINUS_PLUS_ONE:
1555      for (i = 0; i < QUAD_SIZE; i++)
1556         if (execmask & (1 << i)) {
1557            if (chan->f[i] < -1.0f)
1558               dst->f[i] = -1.0f;
1559            else if (chan->f[i] > 1.0f)
1560               dst->f[i] = 1.0f;
1561            else
1562               dst->i[i] = chan->i[i];
1563         }
1564      break;
1565
1566   default:
1567      assert( 0 );
1568   }
1569
1570   if (inst->InstructionExtNv.CondDstUpdate) {
1571      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1572      uint shift;
1573      uint mask;
1574
1575      /* Only CC0 supported.
1576       */
1577      assert( inst->InstructionExtNv.CondDstIndex < 1 );
1578
1579      switch (chan_index) {
1580      case CHAN_X:
1581         shift = TGSI_EXEC_CC_X_SHIFT;
1582         mask = ~TGSI_EXEC_CC_X_MASK;
1583         break;
1584      case CHAN_Y:
1585         shift = TGSI_EXEC_CC_Y_SHIFT;
1586         mask = ~TGSI_EXEC_CC_Y_MASK;
1587         break;
1588      case CHAN_Z:
1589         shift = TGSI_EXEC_CC_Z_SHIFT;
1590         mask = ~TGSI_EXEC_CC_Z_MASK;
1591         break;
1592      case CHAN_W:
1593         shift = TGSI_EXEC_CC_W_SHIFT;
1594         mask = ~TGSI_EXEC_CC_W_MASK;
1595         break;
1596      default:
1597         assert( 0 );
1598         return;
1599      }
1600
1601      for (i = 0; i < QUAD_SIZE; i++)
1602         if (execmask & (1 << i)) {
1603            cc->u[i] &= mask;
1604            if (dst->f[i] < 0.0f)
1605               cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1606            else if (dst->f[i] > 0.0f)
1607               cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1608            else if (dst->f[i] == 0.0f)
1609               cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1610            else
1611               cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1612         }
1613   }
1614}
1615
1616#define FETCH(VAL,INDEX,CHAN)\
1617    fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1618
1619#define STORE(VAL,INDEX,CHAN)\
1620    store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1621
1622
1623/**
1624 * Execute ARB-style KIL which is predicated by a src register.
1625 * Kill fragment if any of the four values is less than zero.
1626 */
1627static void
1628exec_kil(struct tgsi_exec_machine *mach,
1629         const struct tgsi_full_instruction *inst)
1630{
1631   uint uniquemask;
1632   uint chan_index;
1633   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1634   union tgsi_exec_channel r[1];
1635
1636   /* This mask stores component bits that were already tested. Note that
1637    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1638    * tested. */
1639   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1640
1641   for (chan_index = 0; chan_index < 4; chan_index++)
1642   {
1643      uint swizzle;
1644      uint i;
1645
1646      /* unswizzle channel */
1647      swizzle = tgsi_util_get_full_src_register_extswizzle (
1648                        &inst->FullSrcRegisters[0],
1649                        chan_index);
1650
1651      /* check if the component has not been already tested */
1652      if (uniquemask & (1 << swizzle))
1653         continue;
1654      uniquemask |= 1 << swizzle;
1655
1656      FETCH(&r[0], 0, chan_index);
1657      for (i = 0; i < 4; i++)
1658         if (r[0].f[i] < 0.0f)
1659            kilmask |= 1 << i;
1660   }
1661
1662   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1663}
1664
1665/**
1666 * Execute NVIDIA-style KIL which is predicated by a condition code.
1667 * Kill fragment if the condition code is TRUE.
1668 */
1669static void
1670exec_kilp(struct tgsi_exec_machine *mach,
1671          const struct tgsi_full_instruction *inst)
1672{
1673   uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1674
1675   if (inst->InstructionExtNv.CondFlowEnable) {
1676      uint swizzle[4];
1677      uint chan_index;
1678
1679      kilmask = 0x0;
1680
1681      swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1682      swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1683      swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1684      swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1685
1686      for (chan_index = 0; chan_index < 4; chan_index++)
1687      {
1688         uint i;
1689
1690         for (i = 0; i < 4; i++) {
1691            /* TODO: evaluate the condition code */
1692            if (0)
1693               kilmask |= 1 << i;
1694         }
1695      }
1696   }
1697   else {
1698      /* "unconditional" kil */
1699      kilmask = mach->ExecMask;
1700   }
1701   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1702}
1703
1704
1705/*
1706 * Fetch a four texture samples using STR texture coordinates.
1707 */
1708static void
1709fetch_texel( struct tgsi_sampler *sampler,
1710             const union tgsi_exec_channel *s,
1711             const union tgsi_exec_channel *t,
1712             const union tgsi_exec_channel *p,
1713             float lodbias,  /* XXX should be float[4] */
1714             union tgsi_exec_channel *r,
1715             union tgsi_exec_channel *g,
1716             union tgsi_exec_channel *b,
1717             union tgsi_exec_channel *a )
1718{
1719   uint j;
1720   float rgba[NUM_CHANNELS][QUAD_SIZE];
1721
1722   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1723
1724   for (j = 0; j < 4; j++) {
1725      r->f[j] = rgba[0][j];
1726      g->f[j] = rgba[1][j];
1727      b->f[j] = rgba[2][j];
1728      a->f[j] = rgba[3][j];
1729   }
1730}
1731
1732
1733static void
1734exec_tex(struct tgsi_exec_machine *mach,
1735         const struct tgsi_full_instruction *inst,
1736         boolean biasLod,
1737         boolean projected)
1738{
1739   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1740   union tgsi_exec_channel r[4];
1741   uint chan_index;
1742   float lodBias;
1743
1744   /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
1745
1746   switch (inst->InstructionExtTexture.Texture) {
1747   case TGSI_TEXTURE_1D:
1748   case TGSI_TEXTURE_SHADOW1D:
1749
1750      FETCH(&r[0], 0, CHAN_X);
1751
1752      if (projected) {
1753         FETCH(&r[1], 0, CHAN_W);
1754         micro_div( &r[0], &r[0], &r[1] );
1755      }
1756
1757      if (biasLod) {
1758         FETCH(&r[1], 0, CHAN_W);
1759         lodBias = r[2].f[0];
1760      }
1761      else
1762         lodBias = 0.0;
1763
1764      fetch_texel(mach->Samplers[unit],
1765                  &r[0], &ZeroVec, &ZeroVec, lodBias,  /* S, T, P, BIAS */
1766                  &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1767      break;
1768
1769   case TGSI_TEXTURE_2D:
1770   case TGSI_TEXTURE_RECT:
1771   case TGSI_TEXTURE_SHADOW2D:
1772   case TGSI_TEXTURE_SHADOWRECT:
1773
1774      FETCH(&r[0], 0, CHAN_X);
1775      FETCH(&r[1], 0, CHAN_Y);
1776      FETCH(&r[2], 0, CHAN_Z);
1777
1778      if (projected) {
1779         FETCH(&r[3], 0, CHAN_W);
1780         micro_div( &r[0], &r[0], &r[3] );
1781         micro_div( &r[1], &r[1], &r[3] );
1782         micro_div( &r[2], &r[2], &r[3] );
1783      }
1784
1785      if (biasLod) {
1786         FETCH(&r[3], 0, CHAN_W);
1787         lodBias = r[3].f[0];
1788      }
1789      else
1790         lodBias = 0.0;
1791
1792      fetch_texel(mach->Samplers[unit],
1793                  &r[0], &r[1], &r[2], lodBias,  /* inputs */
1794                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */
1795      break;
1796
1797   case TGSI_TEXTURE_3D:
1798   case TGSI_TEXTURE_CUBE:
1799
1800      FETCH(&r[0], 0, CHAN_X);
1801      FETCH(&r[1], 0, CHAN_Y);
1802      FETCH(&r[2], 0, CHAN_Z);
1803
1804      if (projected) {
1805         FETCH(&r[3], 0, CHAN_W);
1806         micro_div( &r[0], &r[0], &r[3] );
1807         micro_div( &r[1], &r[1], &r[3] );
1808         micro_div( &r[2], &r[2], &r[3] );
1809      }
1810
1811      if (biasLod) {
1812         FETCH(&r[3], 0, CHAN_W);
1813         lodBias = r[3].f[0];
1814      }
1815      else
1816         lodBias = 0.0;
1817
1818      fetch_texel(mach->Samplers[unit],
1819                  &r[0], &r[1], &r[2], lodBias,
1820                  &r[0], &r[1], &r[2], &r[3]);
1821      break;
1822
1823   default:
1824      assert (0);
1825   }
1826
1827   FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1828      STORE( &r[chan_index], 0, chan_index );
1829   }
1830}
1831
1832
1833/**
1834 * Evaluate a constant-valued coefficient at the position of the
1835 * current quad.
1836 */
1837static void
1838eval_constant_coef(
1839   struct tgsi_exec_machine *mach,
1840   unsigned attrib,
1841   unsigned chan )
1842{
1843   unsigned i;
1844
1845   for( i = 0; i < QUAD_SIZE; i++ ) {
1846      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1847   }
1848}
1849
1850/**
1851 * Evaluate a linear-valued coefficient at the position of the
1852 * current quad.
1853 */
1854static void
1855eval_linear_coef(
1856   struct tgsi_exec_machine *mach,
1857   unsigned attrib,
1858   unsigned chan )
1859{
1860   const float x = mach->QuadPos.xyzw[0].f[0];
1861   const float y = mach->QuadPos.xyzw[1].f[0];
1862   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1863   const float dady = mach->InterpCoefs[attrib].dady[chan];
1864   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1865   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1866   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1867   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1868   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1869}
1870
1871/**
1872 * Evaluate a perspective-valued coefficient at the position of the
1873 * current quad.
1874 */
1875static void
1876eval_perspective_coef(
1877   struct tgsi_exec_machine *mach,
1878   unsigned attrib,
1879   unsigned chan )
1880{
1881   const float x = mach->QuadPos.xyzw[0].f[0];
1882   const float y = mach->QuadPos.xyzw[1].f[0];
1883   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1884   const float dady = mach->InterpCoefs[attrib].dady[chan];
1885   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1886   const float *w = mach->QuadPos.xyzw[3].f;
1887   /* divide by W here */
1888   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1889   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1890   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1891   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1892}
1893
1894
1895typedef void (* eval_coef_func)(
1896   struct tgsi_exec_machine *mach,
1897   unsigned attrib,
1898   unsigned chan );
1899
1900static void
1901exec_declaration(
1902   struct tgsi_exec_machine *mach,
1903   const struct tgsi_full_declaration *decl )
1904{
1905   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1906      if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1907         unsigned first, last, mask;
1908         eval_coef_func eval;
1909
1910         first = decl->DeclarationRange.First;
1911         last = decl->DeclarationRange.Last;
1912         mask = decl->Declaration.UsageMask;
1913
1914         switch( decl->Declaration.Interpolate ) {
1915         case TGSI_INTERPOLATE_CONSTANT:
1916            eval = eval_constant_coef;
1917            break;
1918
1919         case TGSI_INTERPOLATE_LINEAR:
1920            eval = eval_linear_coef;
1921            break;
1922
1923         case TGSI_INTERPOLATE_PERSPECTIVE:
1924            eval = eval_perspective_coef;
1925            break;
1926
1927         default:
1928            eval = NULL;
1929            assert( 0 );
1930         }
1931
1932         if( mask == TGSI_WRITEMASK_XYZW ) {
1933            unsigned i, j;
1934
1935            for( i = first; i <= last; i++ ) {
1936               for( j = 0; j < NUM_CHANNELS; j++ ) {
1937                  eval( mach, i, j );
1938               }
1939            }
1940         }
1941         else {
1942            unsigned i, j;
1943
1944            for( j = 0; j < NUM_CHANNELS; j++ ) {
1945               if( mask & (1 << j) ) {
1946                  for( i = first; i <= last; i++ ) {
1947                     eval( mach, i, j );
1948                  }
1949               }
1950            }
1951         }
1952      }
1953   }
1954}
1955
1956static void
1957exec_instruction(
1958   struct tgsi_exec_machine *mach,
1959   const struct tgsi_full_instruction *inst,
1960   int *pc )
1961{
1962   uint chan_index;
1963   union tgsi_exec_channel r[10];
1964
1965   (*pc)++;
1966
1967   switch (inst->Instruction.Opcode) {
1968   case TGSI_OPCODE_ARL:
1969   case TGSI_OPCODE_FLOOR:
1970   /* TGSI_OPCODE_FLR */
1971      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1972         FETCH( &r[0], 0, chan_index );
1973         micro_flr( &r[0], &r[0] );
1974         STORE( &r[0], 0, chan_index );
1975      }
1976      break;
1977
1978   case TGSI_OPCODE_MOV:
1979   case TGSI_OPCODE_SWZ:
1980      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1981         FETCH( &r[0], 0, chan_index );
1982         STORE( &r[0], 0, chan_index );
1983      }
1984      break;
1985
1986   case TGSI_OPCODE_LIT:
1987      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1988         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1989      }
1990
1991      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1992         FETCH( &r[0], 0, CHAN_X );
1993         if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1994            micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1995            STORE( &r[0], 0, CHAN_Y );
1996         }
1997
1998         if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1999            FETCH( &r[1], 0, CHAN_Y );
2000            micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2001
2002            FETCH( &r[2], 0, CHAN_W );
2003            micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
2004            micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
2005            micro_pow( &r[1], &r[1], &r[2] );
2006            micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2007            STORE( &r[0], 0, CHAN_Z );
2008         }
2009      }
2010
2011      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2012         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2013      }
2014      break;
2015
2016   case TGSI_OPCODE_RCP:
2017   /* TGSI_OPCODE_RECIP */
2018      FETCH( &r[0], 0, CHAN_X );
2019      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2020      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2021         STORE( &r[0], 0, chan_index );
2022      }
2023      break;
2024
2025   case TGSI_OPCODE_RSQ:
2026   /* TGSI_OPCODE_RECIPSQRT */
2027      FETCH( &r[0], 0, CHAN_X );
2028      micro_abs( &r[0], &r[0] );
2029      micro_sqrt( &r[0], &r[0] );
2030      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2031      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2032         STORE( &r[0], 0, chan_index );
2033      }
2034      break;
2035
2036   case TGSI_OPCODE_EXP:
2037      FETCH( &r[0], 0, CHAN_X );
2038      micro_flr( &r[1], &r[0] );  /* r1 = floor(r0) */
2039      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2040         micro_exp2( &r[2], &r[1] );       /* r2 = 2 ^ r1 */
2041         STORE( &r[2], 0, CHAN_X );        /* store r2 */
2042      }
2043      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2044         micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
2045         STORE( &r[2], 0, CHAN_Y );        /* store r2 */
2046      }
2047      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2048         micro_exp2( &r[2], &r[0] );       /* r2 = 2 ^ r0 */
2049         STORE( &r[2], 0, CHAN_Z );        /* store r2 */
2050      }
2051      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2052         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2053      }
2054      break;
2055
2056   case TGSI_OPCODE_LOG:
2057      FETCH( &r[0], 0, CHAN_X );
2058      micro_abs( &r[2], &r[0] );  /* r2 = abs(r0) */
2059      micro_lg2( &r[1], &r[2] );  /* r1 = lg2(r2) */
2060      micro_flr( &r[0], &r[1] );  /* r0 = floor(r1) */
2061      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2062         STORE( &r[0], 0, CHAN_X );
2063      }
2064      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2065         micro_exp2( &r[0], &r[0] );       /* r0 = 2 ^ r0 */
2066         micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
2067         STORE( &r[0], 0, CHAN_Y );
2068      }
2069      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2070         STORE( &r[1], 0, CHAN_Z );
2071      }
2072      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2073         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2074      }
2075      break;
2076
2077   case TGSI_OPCODE_MUL:
2078      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
2079      {
2080         FETCH(&r[0], 0, chan_index);
2081         FETCH(&r[1], 1, chan_index);
2082
2083         micro_mul( &r[0], &r[0], &r[1] );
2084
2085         STORE(&r[0], 0, chan_index);
2086      }
2087      break;
2088
2089   case TGSI_OPCODE_ADD:
2090      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2091         FETCH( &r[0], 0, chan_index );
2092         FETCH( &r[1], 1, chan_index );
2093         micro_add( &r[0], &r[0], &r[1] );
2094         STORE( &r[0], 0, chan_index );
2095      }
2096      break;
2097
2098   case TGSI_OPCODE_DP3:
2099   /* TGSI_OPCODE_DOT3 */
2100      FETCH( &r[0], 0, CHAN_X );
2101      FETCH( &r[1], 1, CHAN_X );
2102      micro_mul( &r[0], &r[0], &r[1] );
2103
2104      FETCH( &r[1], 0, CHAN_Y );
2105      FETCH( &r[2], 1, CHAN_Y );
2106      micro_mul( &r[1], &r[1], &r[2] );
2107      micro_add( &r[0], &r[0], &r[1] );
2108
2109      FETCH( &r[1], 0, CHAN_Z );
2110      FETCH( &r[2], 1, CHAN_Z );
2111      micro_mul( &r[1], &r[1], &r[2] );
2112      micro_add( &r[0], &r[0], &r[1] );
2113
2114      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2115         STORE( &r[0], 0, chan_index );
2116      }
2117      break;
2118
2119    case TGSI_OPCODE_DP4:
2120    /* TGSI_OPCODE_DOT4 */
2121       FETCH(&r[0], 0, CHAN_X);
2122       FETCH(&r[1], 1, CHAN_X);
2123
2124       micro_mul( &r[0], &r[0], &r[1] );
2125
2126       FETCH(&r[1], 0, CHAN_Y);
2127       FETCH(&r[2], 1, CHAN_Y);
2128
2129       micro_mul( &r[1], &r[1], &r[2] );
2130       micro_add( &r[0], &r[0], &r[1] );
2131
2132       FETCH(&r[1], 0, CHAN_Z);
2133       FETCH(&r[2], 1, CHAN_Z);
2134
2135       micro_mul( &r[1], &r[1], &r[2] );
2136       micro_add( &r[0], &r[0], &r[1] );
2137
2138       FETCH(&r[1], 0, CHAN_W);
2139       FETCH(&r[2], 1, CHAN_W);
2140
2141       micro_mul( &r[1], &r[1], &r[2] );
2142       micro_add( &r[0], &r[0], &r[1] );
2143
2144      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2145         STORE( &r[0], 0, chan_index );
2146      }
2147      break;
2148
2149   case TGSI_OPCODE_DST:
2150      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2151         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2152      }
2153
2154      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2155         FETCH( &r[0], 0, CHAN_Y );
2156         FETCH( &r[1], 1, CHAN_Y);
2157         micro_mul( &r[0], &r[0], &r[1] );
2158         STORE( &r[0], 0, CHAN_Y );
2159      }
2160
2161      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2162         FETCH( &r[0], 0, CHAN_Z );
2163         STORE( &r[0], 0, CHAN_Z );
2164      }
2165
2166      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2167         FETCH( &r[0], 1, CHAN_W );
2168         STORE( &r[0], 0, CHAN_W );
2169      }
2170      break;
2171
2172   case TGSI_OPCODE_MIN:
2173      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2174         FETCH(&r[0], 0, chan_index);
2175         FETCH(&r[1], 1, chan_index);
2176
2177         /* XXX use micro_min()?? */
2178         micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
2179
2180         STORE(&r[0], 0, chan_index);
2181      }
2182      break;
2183
2184   case TGSI_OPCODE_MAX:
2185      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2186         FETCH(&r[0], 0, chan_index);
2187         FETCH(&r[1], 1, chan_index);
2188
2189         /* XXX use micro_max()?? */
2190         micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2191
2192         STORE(&r[0], 0, chan_index );
2193      }
2194      break;
2195
2196   case TGSI_OPCODE_SLT:
2197   /* TGSI_OPCODE_SETLT */
2198      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2199         FETCH( &r[0], 0, chan_index );
2200         FETCH( &r[1], 1, chan_index );
2201         micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2202         STORE( &r[0], 0, chan_index );
2203      }
2204      break;
2205
2206   case TGSI_OPCODE_SGE:
2207   /* TGSI_OPCODE_SETGE */
2208      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2209         FETCH( &r[0], 0, chan_index );
2210         FETCH( &r[1], 1, chan_index );
2211         micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2212         STORE( &r[0], 0, chan_index );
2213      }
2214      break;
2215
2216   case TGSI_OPCODE_MAD:
2217   /* TGSI_OPCODE_MADD */
2218      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2219         FETCH( &r[0], 0, chan_index );
2220         FETCH( &r[1], 1, chan_index );
2221         micro_mul( &r[0], &r[0], &r[1] );
2222         FETCH( &r[1], 2, chan_index );
2223         micro_add( &r[0], &r[0], &r[1] );
2224         STORE( &r[0], 0, chan_index );
2225      }
2226      break;
2227
2228   case TGSI_OPCODE_SUB:
2229      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2230         FETCH(&r[0], 0, chan_index);
2231         FETCH(&r[1], 1, chan_index);
2232
2233         micro_sub( &r[0], &r[0], &r[1] );
2234
2235         STORE(&r[0], 0, chan_index);
2236      }
2237      break;
2238
2239   case TGSI_OPCODE_LERP:
2240   /* TGSI_OPCODE_LRP */
2241      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2242         FETCH(&r[0], 0, chan_index);
2243         FETCH(&r[1], 1, chan_index);
2244         FETCH(&r[2], 2, chan_index);
2245
2246         micro_sub( &r[1], &r[1], &r[2] );
2247         micro_mul( &r[0], &r[0], &r[1] );
2248         micro_add( &r[0], &r[0], &r[2] );
2249
2250         STORE(&r[0], 0, chan_index);
2251      }
2252      break;
2253
2254   case TGSI_OPCODE_CND:
2255      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2256         FETCH(&r[0], 0, chan_index);
2257         FETCH(&r[1], 1, chan_index);
2258         FETCH(&r[2], 2, chan_index);
2259         micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2260         STORE(&r[0], 0, chan_index);
2261      }
2262      break;
2263
2264   case TGSI_OPCODE_CND0:
2265      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2266         FETCH(&r[0], 0, chan_index);
2267         FETCH(&r[1], 1, chan_index);
2268         FETCH(&r[2], 2, chan_index);
2269         micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
2270         STORE(&r[0], 0, chan_index);
2271      }
2272      break;
2273
2274   case TGSI_OPCODE_DOT2ADD:
2275   /* TGSI_OPCODE_DP2A */
2276      FETCH( &r[0], 0, CHAN_X );
2277      FETCH( &r[1], 1, CHAN_X );
2278      micro_mul( &r[0], &r[0], &r[1] );
2279
2280      FETCH( &r[1], 0, CHAN_Y );
2281      FETCH( &r[2], 1, CHAN_Y );
2282      micro_mul( &r[1], &r[1], &r[2] );
2283      micro_add( &r[0], &r[0], &r[1] );
2284
2285      FETCH( &r[2], 2, CHAN_X );
2286      micro_add( &r[0], &r[0], &r[2] );
2287
2288      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2289         STORE( &r[0], 0, chan_index );
2290      }
2291      break;
2292
2293   case TGSI_OPCODE_INDEX:
2294      /* XXX: considered for removal */
2295      assert (0);
2296      break;
2297
2298   case TGSI_OPCODE_NEGATE:
2299      /* XXX: considered for removal */
2300      assert (0);
2301      break;
2302
2303   case TGSI_OPCODE_FRAC:
2304   /* TGSI_OPCODE_FRC */
2305      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2306         FETCH( &r[0], 0, chan_index );
2307         micro_frc( &r[0], &r[0] );
2308         STORE( &r[0], 0, chan_index );
2309      }
2310      break;
2311
2312   case TGSI_OPCODE_CLAMP:
2313      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2314         FETCH(&r[0], 0, chan_index);
2315         FETCH(&r[1], 1, chan_index);
2316         micro_max(&r[0], &r[0], &r[1]);
2317         FETCH(&r[1], 2, chan_index);
2318         micro_min(&r[0], &r[0], &r[1]);
2319         STORE(&r[0], 0, chan_index);
2320      }
2321      break;
2322
2323   case TGSI_OPCODE_ROUND:
2324   case TGSI_OPCODE_ARR:
2325      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2326         FETCH( &r[0], 0, chan_index );
2327         micro_rnd( &r[0], &r[0] );
2328         STORE( &r[0], 0, chan_index );
2329      }
2330      break;
2331
2332   case TGSI_OPCODE_EXPBASE2:
2333   /* TGSI_OPCODE_EX2 */
2334      FETCH(&r[0], 0, CHAN_X);
2335
2336#if FAST_MATH
2337      micro_exp2( &r[0], &r[0] );
2338#else
2339      micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2340#endif
2341
2342      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2343         STORE( &r[0], 0, chan_index );
2344      }
2345      break;
2346
2347   case TGSI_OPCODE_LOGBASE2:
2348   /* TGSI_OPCODE_LG2 */
2349      FETCH( &r[0], 0, CHAN_X );
2350      micro_lg2( &r[0], &r[0] );
2351      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2352         STORE( &r[0], 0, chan_index );
2353      }
2354      break;
2355
2356   case TGSI_OPCODE_POWER:
2357   /* TGSI_OPCODE_POW */
2358      FETCH(&r[0], 0, CHAN_X);
2359      FETCH(&r[1], 1, CHAN_X);
2360
2361      micro_pow( &r[0], &r[0], &r[1] );
2362
2363      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2364         STORE( &r[0], 0, chan_index );
2365      }
2366      break;
2367
2368   case TGSI_OPCODE_CROSSPRODUCT:
2369   /* TGSI_OPCODE_XPD */
2370      FETCH(&r[0], 0, CHAN_Y);
2371      FETCH(&r[1], 1, CHAN_Z);
2372
2373      micro_mul( &r[2], &r[0], &r[1] );
2374
2375      FETCH(&r[3], 0, CHAN_Z);
2376      FETCH(&r[4], 1, CHAN_Y);
2377
2378      micro_mul( &r[5], &r[3], &r[4] );
2379      micro_sub( &r[2], &r[2], &r[5] );
2380
2381      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2382         STORE( &r[2], 0, CHAN_X );
2383      }
2384
2385      FETCH(&r[2], 1, CHAN_X);
2386
2387      micro_mul( &r[3], &r[3], &r[2] );
2388
2389      FETCH(&r[5], 0, CHAN_X);
2390
2391      micro_mul( &r[1], &r[1], &r[5] );
2392      micro_sub( &r[3], &r[3], &r[1] );
2393
2394      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2395         STORE( &r[3], 0, CHAN_Y );
2396      }
2397
2398      micro_mul( &r[5], &r[5], &r[4] );
2399      micro_mul( &r[0], &r[0], &r[2] );
2400      micro_sub( &r[5], &r[5], &r[0] );
2401
2402      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2403         STORE( &r[5], 0, CHAN_Z );
2404      }
2405
2406      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2407         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2408      }
2409      break;
2410
2411    case TGSI_OPCODE_MULTIPLYMATRIX:
2412       /* XXX: considered for removal */
2413       assert (0);
2414       break;
2415
2416    case TGSI_OPCODE_ABS:
2417       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2418          FETCH(&r[0], 0, chan_index);
2419
2420          micro_abs( &r[0], &r[0] );
2421
2422          STORE(&r[0], 0, chan_index);
2423       }
2424       break;
2425
2426   case TGSI_OPCODE_RCC:
2427      FETCH(&r[0], 0, CHAN_X);
2428      micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2429      micro_float_clamp(&r[0], &r[0]);
2430      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2431         STORE(&r[0], 0, chan_index);
2432      }
2433      break;
2434
2435   case TGSI_OPCODE_DPH:
2436      FETCH(&r[0], 0, CHAN_X);
2437      FETCH(&r[1], 1, CHAN_X);
2438
2439      micro_mul( &r[0], &r[0], &r[1] );
2440
2441      FETCH(&r[1], 0, CHAN_Y);
2442      FETCH(&r[2], 1, CHAN_Y);
2443
2444      micro_mul( &r[1], &r[1], &r[2] );
2445      micro_add( &r[0], &r[0], &r[1] );
2446
2447      FETCH(&r[1], 0, CHAN_Z);
2448      FETCH(&r[2], 1, CHAN_Z);
2449
2450      micro_mul( &r[1], &r[1], &r[2] );
2451      micro_add( &r[0], &r[0], &r[1] );
2452
2453      FETCH(&r[1], 1, CHAN_W);
2454
2455      micro_add( &r[0], &r[0], &r[1] );
2456
2457      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2458         STORE( &r[0], 0, chan_index );
2459      }
2460      break;
2461
2462   case TGSI_OPCODE_COS:
2463      FETCH(&r[0], 0, CHAN_X);
2464
2465      micro_cos( &r[0], &r[0] );
2466
2467      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2468         STORE( &r[0], 0, chan_index );
2469      }
2470      break;
2471
2472   case TGSI_OPCODE_DDX:
2473      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2474         FETCH( &r[0], 0, chan_index );
2475         micro_ddx( &r[0], &r[0] );
2476         STORE( &r[0], 0, chan_index );
2477      }
2478      break;
2479
2480   case TGSI_OPCODE_DDY:
2481      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2482         FETCH( &r[0], 0, chan_index );
2483         micro_ddy( &r[0], &r[0] );
2484         STORE( &r[0], 0, chan_index );
2485      }
2486      break;
2487
2488   case TGSI_OPCODE_KILP:
2489      exec_kilp (mach, inst);
2490      break;
2491
2492   case TGSI_OPCODE_KIL:
2493      exec_kil (mach, inst);
2494      break;
2495
2496   case TGSI_OPCODE_PK2H:
2497      assert (0);
2498      break;
2499
2500   case TGSI_OPCODE_PK2US:
2501      assert (0);
2502      break;
2503
2504   case TGSI_OPCODE_PK4B:
2505      assert (0);
2506      break;
2507
2508   case TGSI_OPCODE_PK4UB:
2509      assert (0);
2510      break;
2511
2512   case TGSI_OPCODE_RFL:
2513      if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2514          IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2515          IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2516         /* r0 = dp3(src0, src0) */
2517         FETCH(&r[2], 0, CHAN_X);
2518         micro_mul(&r[0], &r[2], &r[2]);
2519         FETCH(&r[4], 0, CHAN_Y);
2520         micro_mul(&r[8], &r[4], &r[4]);
2521         micro_add(&r[0], &r[0], &r[8]);
2522         FETCH(&r[6], 0, CHAN_Z);
2523         micro_mul(&r[8], &r[6], &r[6]);
2524         micro_add(&r[0], &r[0], &r[8]);
2525
2526         /* r1 = dp3(src0, src1) */
2527         FETCH(&r[3], 1, CHAN_X);
2528         micro_mul(&r[1], &r[2], &r[3]);
2529         FETCH(&r[5], 1, CHAN_Y);
2530         micro_mul(&r[8], &r[4], &r[5]);
2531         micro_add(&r[1], &r[1], &r[8]);
2532         FETCH(&r[7], 1, CHAN_Z);
2533         micro_mul(&r[8], &r[6], &r[7]);
2534         micro_add(&r[1], &r[1], &r[8]);
2535
2536         /* r1 = 2 * r1 / r0 */
2537         micro_add(&r[1], &r[1], &r[1]);
2538         micro_div(&r[1], &r[1], &r[0]);
2539
2540         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2541            micro_mul(&r[2], &r[2], &r[1]);
2542            micro_sub(&r[2], &r[2], &r[3]);
2543            STORE(&r[2], 0, CHAN_X);
2544         }
2545         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2546            micro_mul(&r[4], &r[4], &r[1]);
2547            micro_sub(&r[4], &r[4], &r[5]);
2548            STORE(&r[4], 0, CHAN_Y);
2549         }
2550         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2551            micro_mul(&r[6], &r[6], &r[1]);
2552            micro_sub(&r[6], &r[6], &r[7]);
2553            STORE(&r[6], 0, CHAN_Z);
2554         }
2555      }
2556      if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2557         STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2558      }
2559      break;
2560
2561   case TGSI_OPCODE_SEQ:
2562      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2563         FETCH( &r[0], 0, chan_index );
2564         FETCH( &r[1], 1, chan_index );
2565         micro_eq( &r[0], &r[0], &r[1],
2566                   &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2567                   &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2568         STORE( &r[0], 0, chan_index );
2569      }
2570      break;
2571
2572   case TGSI_OPCODE_SFL:
2573      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2574         STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2575      }
2576      break;
2577
2578   case TGSI_OPCODE_SGT:
2579      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2580         FETCH( &r[0], 0, chan_index );
2581         FETCH( &r[1], 1, chan_index );
2582         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2583         STORE( &r[0], 0, chan_index );
2584      }
2585      break;
2586
2587   case TGSI_OPCODE_SIN:
2588      FETCH( &r[0], 0, CHAN_X );
2589      micro_sin( &r[0], &r[0] );
2590      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2591         STORE( &r[0], 0, chan_index );
2592      }
2593      break;
2594
2595   case TGSI_OPCODE_SLE:
2596      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2597         FETCH( &r[0], 0, chan_index );
2598         FETCH( &r[1], 1, chan_index );
2599         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2600         STORE( &r[0], 0, chan_index );
2601      }
2602      break;
2603
2604   case TGSI_OPCODE_SNE:
2605      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2606         FETCH( &r[0], 0, chan_index );
2607         FETCH( &r[1], 1, chan_index );
2608         micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2609         STORE( &r[0], 0, chan_index );
2610      }
2611      break;
2612
2613   case TGSI_OPCODE_STR:
2614      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2615         STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2616      }
2617      break;
2618
2619   case TGSI_OPCODE_TEX:
2620      /* simple texture lookup */
2621      /* src[0] = texcoord */
2622      /* src[1] = sampler unit */
2623      exec_tex(mach, inst, FALSE, FALSE);
2624      break;
2625
2626   case TGSI_OPCODE_TXB:
2627      /* Texture lookup with lod bias */
2628      /* src[0] = texcoord (src[0].w = LOD bias) */
2629      /* src[1] = sampler unit */
2630      exec_tex(mach, inst, TRUE, FALSE);
2631      break;
2632
2633   case TGSI_OPCODE_TXD:
2634      /* Texture lookup with explict partial derivatives */
2635      /* src[0] = texcoord */
2636      /* src[1] = d[strq]/dx */
2637      /* src[2] = d[strq]/dy */
2638      /* src[3] = sampler unit */
2639      assert (0);
2640      break;
2641
2642   case TGSI_OPCODE_TXL:
2643      /* Texture lookup with explit LOD */
2644      /* src[0] = texcoord (src[0].w = LOD) */
2645      /* src[1] = sampler unit */
2646      exec_tex(mach, inst, TRUE, FALSE);
2647      break;
2648
2649   case TGSI_OPCODE_TXP:
2650      /* Texture lookup with projection */
2651      /* src[0] = texcoord (src[0].w = projection) */
2652      /* src[1] = sampler unit */
2653      exec_tex(mach, inst, FALSE, TRUE);
2654      break;
2655
2656   case TGSI_OPCODE_UP2H:
2657      assert (0);
2658      break;
2659
2660   case TGSI_OPCODE_UP2US:
2661      assert (0);
2662      break;
2663
2664   case TGSI_OPCODE_UP4B:
2665      assert (0);
2666      break;
2667
2668   case TGSI_OPCODE_UP4UB:
2669      assert (0);
2670      break;
2671
2672   case TGSI_OPCODE_X2D:
2673      FETCH(&r[0], 1, CHAN_X);
2674      FETCH(&r[1], 1, CHAN_Y);
2675      if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2676          IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2677         FETCH(&r[2], 2, CHAN_X);
2678         micro_mul(&r[2], &r[2], &r[0]);
2679         FETCH(&r[3], 2, CHAN_Y);
2680         micro_mul(&r[3], &r[3], &r[1]);
2681         micro_add(&r[2], &r[2], &r[3]);
2682         FETCH(&r[3], 0, CHAN_X);
2683         micro_add(&r[2], &r[2], &r[3]);
2684         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2685            STORE(&r[2], 0, CHAN_X);
2686         }
2687         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2688            STORE(&r[2], 0, CHAN_Z);
2689         }
2690      }
2691      if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2692          IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2693         FETCH(&r[2], 2, CHAN_Z);
2694         micro_mul(&r[2], &r[2], &r[0]);
2695         FETCH(&r[3], 2, CHAN_W);
2696         micro_mul(&r[3], &r[3], &r[1]);
2697         micro_add(&r[2], &r[2], &r[3]);
2698         FETCH(&r[3], 0, CHAN_Y);
2699         micro_add(&r[2], &r[2], &r[3]);
2700         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2701            STORE(&r[2], 0, CHAN_Y);
2702         }
2703         if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2704            STORE(&r[2], 0, CHAN_W);
2705         }
2706      }
2707      break;
2708
2709   case TGSI_OPCODE_ARA:
2710      assert (0);
2711      break;
2712
2713   case TGSI_OPCODE_BRA:
2714      assert (0);
2715      break;
2716
2717   case TGSI_OPCODE_CAL:
2718      /* skip the call if no execution channels are enabled */
2719      if (mach->ExecMask) {
2720         /* do the call */
2721
2722         /* push the Cond, Loop, Cont stacks */
2723         assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2724         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2725         assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2726         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2727         assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2728         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2729
2730         assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2731         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2732
2733         /* note that PC was already incremented above */
2734         mach->CallStack[mach->CallStackTop++] = *pc;
2735         *pc = inst->InstructionExtLabel.Label;
2736      }
2737      break;
2738
2739   case TGSI_OPCODE_RET:
2740      mach->FuncMask &= ~mach->ExecMask;
2741      UPDATE_EXEC_MASK(mach);
2742
2743      if (mach->FuncMask == 0x0) {
2744         /* really return now (otherwise, keep executing */
2745
2746         if (mach->CallStackTop == 0) {
2747            /* returning from main() */
2748            *pc = -1;
2749            return;
2750         }
2751         *pc = mach->CallStack[--mach->CallStackTop];
2752
2753         /* pop the Cond, Loop, Cont stacks */
2754         assert(mach->CondStackTop > 0);
2755         mach->CondMask = mach->CondStack[--mach->CondStackTop];
2756         assert(mach->LoopStackTop > 0);
2757         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2758         assert(mach->ContStackTop > 0);
2759         mach->ContMask = mach->ContStack[--mach->ContStackTop];
2760         assert(mach->FuncStackTop > 0);
2761         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2762
2763         UPDATE_EXEC_MASK(mach);
2764      }
2765      break;
2766
2767   case TGSI_OPCODE_SSG:
2768   /* TGSI_OPCODE_SGN */
2769      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2770         FETCH( &r[0], 0, chan_index );
2771         micro_sgn( &r[0], &r[0] );
2772         STORE( &r[0], 0, chan_index );
2773      }
2774      break;
2775
2776   case TGSI_OPCODE_CMP:
2777      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2778         FETCH(&r[0], 0, chan_index);
2779         FETCH(&r[1], 1, chan_index);
2780         FETCH(&r[2], 2, chan_index);
2781
2782         micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2783
2784         STORE(&r[0], 0, chan_index);
2785      }
2786      break;
2787
2788   case TGSI_OPCODE_SCS:
2789      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2790         FETCH( &r[0], 0, CHAN_X );
2791         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2792            micro_cos(&r[1], &r[0]);
2793            STORE(&r[1], 0, CHAN_X);
2794         }
2795         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2796            micro_sin(&r[1], &r[0]);
2797            STORE(&r[1], 0, CHAN_Y);
2798         }
2799      }
2800      if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2801         STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2802      }
2803      if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2804         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2805      }
2806      break;
2807
2808   case TGSI_OPCODE_NRM:
2809      /* 3-component vector normalize */
2810      if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2811         IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2812         IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2813         /* r3 = sqrt(dp3(src0, src0)) */
2814         FETCH(&r[0], 0, CHAN_X);
2815         micro_mul(&r[3], &r[0], &r[0]);
2816         FETCH(&r[1], 0, CHAN_Y);
2817         micro_mul(&r[4], &r[1], &r[1]);
2818         micro_add(&r[3], &r[3], &r[4]);
2819         FETCH(&r[2], 0, CHAN_Z);
2820         micro_mul(&r[4], &r[2], &r[2]);
2821         micro_add(&r[3], &r[3], &r[4]);
2822         micro_sqrt(&r[3], &r[3]);
2823
2824         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2825            micro_div(&r[0], &r[0], &r[3]);
2826            STORE(&r[0], 0, CHAN_X);
2827         }
2828         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2829            micro_div(&r[1], &r[1], &r[3]);
2830            STORE(&r[1], 0, CHAN_Y);
2831         }
2832         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2833            micro_div(&r[2], &r[2], &r[3]);
2834            STORE(&r[2], 0, CHAN_Z);
2835         }
2836      }
2837      if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2838         STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2839      }
2840      break;
2841
2842   case TGSI_OPCODE_NRM4:
2843      /* 4-component vector normalize */
2844      {
2845         union tgsi_exec_channel tmp, dot;
2846
2847         /* tmp = dp4(src0, src0): */
2848         FETCH( &r[0], 0, CHAN_X );
2849         micro_mul( &tmp, &r[0], &r[0] );
2850
2851         FETCH( &r[1], 0, CHAN_Y );
2852         micro_mul( &dot, &r[1], &r[1] );
2853         micro_add( &tmp, &tmp, &dot );
2854
2855         FETCH( &r[2], 0, CHAN_Z );
2856         micro_mul( &dot, &r[2], &r[2] );
2857         micro_add( &tmp, &tmp, &dot );
2858
2859         FETCH( &r[3], 0, CHAN_W );
2860         micro_mul( &dot, &r[3], &r[3] );
2861         micro_add( &tmp, &tmp, &dot );
2862
2863         /* tmp = 1 / sqrt(tmp) */
2864         micro_sqrt( &tmp, &tmp );
2865         micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2866
2867         FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2868            /* chan = chan * tmp */
2869            micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2870            STORE( &r[chan_index], 0, chan_index );
2871         }
2872      }
2873      break;
2874
2875   case TGSI_OPCODE_DIV:
2876      assert( 0 );
2877      break;
2878
2879   case TGSI_OPCODE_DP2:
2880      FETCH( &r[0], 0, CHAN_X );
2881      FETCH( &r[1], 1, CHAN_X );
2882      micro_mul( &r[0], &r[0], &r[1] );
2883
2884      FETCH( &r[1], 0, CHAN_Y );
2885      FETCH( &r[2], 1, CHAN_Y );
2886      micro_mul( &r[1], &r[1], &r[2] );
2887      micro_add( &r[0], &r[0], &r[1] );
2888
2889      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2890         STORE( &r[0], 0, chan_index );
2891      }
2892      break;
2893
2894   case TGSI_OPCODE_IF:
2895      /* push CondMask */
2896      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2897      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2898      FETCH( &r[0], 0, CHAN_X );
2899      /* update CondMask */
2900      if( ! r[0].u[0] ) {
2901         mach->CondMask &= ~0x1;
2902      }
2903      if( ! r[0].u[1] ) {
2904         mach->CondMask &= ~0x2;
2905      }
2906      if( ! r[0].u[2] ) {
2907         mach->CondMask &= ~0x4;
2908      }
2909      if( ! r[0].u[3] ) {
2910         mach->CondMask &= ~0x8;
2911      }
2912      UPDATE_EXEC_MASK(mach);
2913      /* Todo: If CondMask==0, jump to ELSE */
2914      break;
2915
2916   case TGSI_OPCODE_ELSE:
2917      /* invert CondMask wrt previous mask */
2918      {
2919         uint prevMask;
2920         assert(mach->CondStackTop > 0);
2921         prevMask = mach->CondStack[mach->CondStackTop - 1];
2922         mach->CondMask = ~mach->CondMask & prevMask;
2923         UPDATE_EXEC_MASK(mach);
2924         /* Todo: If CondMask==0, jump to ENDIF */
2925      }
2926      break;
2927
2928   case TGSI_OPCODE_ENDIF:
2929      /* pop CondMask */
2930      assert(mach->CondStackTop > 0);
2931      mach->CondMask = mach->CondStack[--mach->CondStackTop];
2932      UPDATE_EXEC_MASK(mach);
2933      break;
2934
2935   case TGSI_OPCODE_END:
2936      /* halt execution */
2937      *pc = -1;
2938      break;
2939
2940   case TGSI_OPCODE_REP:
2941      assert (0);
2942      break;
2943
2944   case TGSI_OPCODE_ENDREP:
2945       assert (0);
2946       break;
2947
2948   case TGSI_OPCODE_PUSHA:
2949      assert (0);
2950      break;
2951
2952   case TGSI_OPCODE_POPA:
2953      assert (0);
2954      break;
2955
2956   case TGSI_OPCODE_CEIL:
2957      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2958         FETCH( &r[0], 0, chan_index );
2959         micro_ceil( &r[0], &r[0] );
2960         STORE( &r[0], 0, chan_index );
2961      }
2962      break;
2963
2964   case TGSI_OPCODE_I2F:
2965      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2966         FETCH( &r[0], 0, chan_index );
2967         micro_i2f( &r[0], &r[0] );
2968         STORE( &r[0], 0, chan_index );
2969      }
2970      break;
2971
2972   case TGSI_OPCODE_NOT:
2973      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2974         FETCH( &r[0], 0, chan_index );
2975         micro_not( &r[0], &r[0] );
2976         STORE( &r[0], 0, chan_index );
2977      }
2978      break;
2979
2980   case TGSI_OPCODE_TRUNC:
2981      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2982         FETCH( &r[0], 0, chan_index );
2983         micro_trunc( &r[0], &r[0] );
2984         STORE( &r[0], 0, chan_index );
2985      }
2986      break;
2987
2988   case TGSI_OPCODE_SHL:
2989      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2990         FETCH( &r[0], 0, chan_index );
2991         FETCH( &r[1], 1, chan_index );
2992         micro_shl( &r[0], &r[0], &r[1] );
2993         STORE( &r[0], 0, chan_index );
2994      }
2995      break;
2996
2997   case TGSI_OPCODE_SHR:
2998      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2999         FETCH( &r[0], 0, chan_index );
3000         FETCH( &r[1], 1, chan_index );
3001         micro_ishr( &r[0], &r[0], &r[1] );
3002         STORE( &r[0], 0, chan_index );
3003      }
3004      break;
3005
3006   case TGSI_OPCODE_AND:
3007      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3008         FETCH( &r[0], 0, chan_index );
3009         FETCH( &r[1], 1, chan_index );
3010         micro_and( &r[0], &r[0], &r[1] );
3011         STORE( &r[0], 0, chan_index );
3012      }
3013      break;
3014
3015   case TGSI_OPCODE_OR:
3016      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3017         FETCH( &r[0], 0, chan_index );
3018         FETCH( &r[1], 1, chan_index );
3019         micro_or( &r[0], &r[0], &r[1] );
3020         STORE( &r[0], 0, chan_index );
3021      }
3022      break;
3023
3024   case TGSI_OPCODE_MOD:
3025      assert (0);
3026      break;
3027
3028   case TGSI_OPCODE_XOR:
3029      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3030         FETCH( &r[0], 0, chan_index );
3031         FETCH( &r[1], 1, chan_index );
3032         micro_xor( &r[0], &r[0], &r[1] );
3033         STORE( &r[0], 0, chan_index );
3034      }
3035      break;
3036
3037   case TGSI_OPCODE_SAD:
3038      assert (0);
3039      break;
3040
3041   case TGSI_OPCODE_TXF:
3042      assert (0);
3043      break;
3044
3045   case TGSI_OPCODE_TXQ:
3046      assert (0);
3047      break;
3048
3049   case TGSI_OPCODE_EMIT:
3050      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
3051      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
3052      break;
3053
3054   case TGSI_OPCODE_ENDPRIM:
3055      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
3056      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
3057      break;
3058
3059   case TGSI_OPCODE_LOOP:
3060      /* fall-through (for now) */
3061   case TGSI_OPCODE_BGNLOOP2:
3062      /* push LoopMask and ContMasks */
3063      assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3064      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3065      assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3066      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3067      break;
3068
3069   case TGSI_OPCODE_ENDLOOP:
3070      /* fall-through (for now at least) */
3071   case TGSI_OPCODE_ENDLOOP2:
3072      /* Restore ContMask, but don't pop */
3073      assert(mach->ContStackTop > 0);
3074      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3075      UPDATE_EXEC_MASK(mach);
3076      if (mach->ExecMask) {
3077         /* repeat loop: jump to instruction just past BGNLOOP */
3078         *pc = inst->InstructionExtLabel.Label + 1;
3079      }
3080      else {
3081         /* exit loop: pop LoopMask */
3082         assert(mach->LoopStackTop > 0);
3083         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3084         /* pop ContMask */
3085         assert(mach->ContStackTop > 0);
3086         mach->ContMask = mach->ContStack[--mach->ContStackTop];
3087      }
3088      UPDATE_EXEC_MASK(mach);
3089      break;
3090
3091   case TGSI_OPCODE_BRK:
3092      /* turn off loop channels for each enabled exec channel */
3093      mach->LoopMask &= ~mach->ExecMask;
3094      /* Todo: if mach->LoopMask == 0, jump to end of loop */
3095      UPDATE_EXEC_MASK(mach);
3096      break;
3097
3098   case TGSI_OPCODE_CONT:
3099      /* turn off cont channels for each enabled exec channel */
3100      mach->ContMask &= ~mach->ExecMask;
3101      /* Todo: if mach->LoopMask == 0, jump to end of loop */
3102      UPDATE_EXEC_MASK(mach);
3103      break;
3104
3105   case TGSI_OPCODE_BGNSUB:
3106      /* no-op */
3107      break;
3108
3109   case TGSI_OPCODE_ENDSUB:
3110      /* no-op */
3111      break;
3112
3113   case TGSI_OPCODE_NOISE1:
3114      assert( 0 );
3115      break;
3116
3117   case TGSI_OPCODE_NOISE2:
3118      assert( 0 );
3119      break;
3120
3121   case TGSI_OPCODE_NOISE3:
3122      assert( 0 );
3123      break;
3124
3125   case TGSI_OPCODE_NOISE4:
3126      assert( 0 );
3127      break;
3128
3129   case TGSI_OPCODE_NOP:
3130      break;
3131
3132   default:
3133      assert( 0 );
3134   }
3135}
3136
3137
3138/**
3139 * Run TGSI interpreter.
3140 * \return bitmask of "alive" quad components
3141 */
3142uint
3143tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3144{
3145   uint i;
3146   int pc = 0;
3147
3148   mach->CondMask = 0xf;
3149   mach->LoopMask = 0xf;
3150   mach->ContMask = 0xf;
3151   mach->FuncMask = 0xf;
3152   mach->ExecMask = 0xf;
3153
3154   mach->CondStackTop = 0; /* temporarily subvert this assertion */
3155   assert(mach->CondStackTop == 0);
3156   assert(mach->LoopStackTop == 0);
3157   assert(mach->ContStackTop == 0);
3158   assert(mach->CallStackTop == 0);
3159
3160   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3161   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3162
3163   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3164      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3165      mach->Primitives[0] = 0;
3166   }
3167
3168   for (i = 0; i < QUAD_SIZE; i++) {
3169      mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3170         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3171         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3172         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3173         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3174   }
3175
3176   /* execute declarations (interpolants) */
3177   for (i = 0; i < mach->NumDeclarations; i++) {
3178      exec_declaration( mach, mach->Declarations+i );
3179   }
3180
3181   /* execute instructions, until pc is set to -1 */
3182   while (pc != -1) {
3183      assert(pc < (int) mach->NumInstructions);
3184      exec_instruction( mach, mach->Instructions + pc, &pc );
3185   }
3186
3187#if 0
3188   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3189   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3190      /*
3191       * Scale back depth component.
3192       */
3193      for (i = 0; i < 4; i++)
3194         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3195   }
3196#endif
3197
3198   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3199}
3200