tgsi_exec.c revision ede9f3b52ecb27ada81fee06a943bb595c60eaee
1/**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers.  This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 *   Michal Krol
50 *   Brian Paul
51 */
52
53#include "pipe/p_compiler.h"
54#include "pipe/p_state.h"
55#include "pipe/p_shader_tokens.h"
56#include "tgsi/tgsi_dump.h"
57#include "tgsi/tgsi_parse.h"
58#include "tgsi/tgsi_util.h"
59#include "tgsi_exec.h"
60#include "util/u_memory.h"
61#include "util/u_math.h"
62
63#define FAST_MATH 1
64
65#define TILE_TOP_LEFT     0
66#define TILE_TOP_RIGHT    1
67#define TILE_BOTTOM_LEFT  2
68#define TILE_BOTTOM_RIGHT 3
69
70#define CHAN_X  0
71#define CHAN_Y  1
72#define CHAN_Z  2
73#define CHAN_W  3
74
75/*
76 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
77 */
78#define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
79#define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
80#define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
81#define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
82#define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
83#define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
84#define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
85#define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
86#define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
87#define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
88#define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
89#define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
90#define TEMP_128_I         TGSI_EXEC_TEMP_128_I
91#define TEMP_128_C         TGSI_EXEC_TEMP_128_C
92#define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
93#define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
94#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
95#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
96#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
97#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
98#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
99#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
100#define TEMP_CC_I          TGSI_EXEC_TEMP_CC_I
101#define TEMP_CC_C          TGSI_EXEC_TEMP_CC_C
102#define TEMP_3_I           TGSI_EXEC_TEMP_THREE_I
103#define TEMP_3_C           TGSI_EXEC_TEMP_THREE_C
104#define TEMP_HALF_I        TGSI_EXEC_TEMP_HALF_I
105#define TEMP_HALF_C        TGSI_EXEC_TEMP_HALF_C
106#define TEMP_R0            TGSI_EXEC_TEMP_R0
107
108#define IS_CHANNEL_ENABLED(INST, CHAN)\
109   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
110
111#define IS_CHANNEL_ENABLED2(INST, CHAN)\
112   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
113
114#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
115   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
116      if (IS_CHANNEL_ENABLED( INST, CHAN ))
117
118#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
119   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
120      if (IS_CHANNEL_ENABLED2( INST, CHAN ))
121
122
123/** The execution mask depends on the conditional mask and the loop mask */
124#define UPDATE_EXEC_MASK(MACH) \
125      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
126
127
128static const union tgsi_exec_channel ZeroVec =
129   { { 0.0, 0.0, 0.0, 0.0 } };
130
131
132#ifdef DEBUG
133static void
134check_inf_or_nan(const union tgsi_exec_channel *chan)
135{
136   assert(!util_is_inf_or_nan(chan->f[0]));
137   assert(!util_is_inf_or_nan(chan->f[1]));
138   assert(!util_is_inf_or_nan(chan->f[2]));
139   assert(!util_is_inf_or_nan(chan->f[3]));
140}
141#endif
142
143
144#ifdef DEBUG
145static void
146print_chan(const char *msg, const union tgsi_exec_channel *chan)
147{
148   debug_printf("%s = {%f, %f, %f, %f}\n",
149                msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
150}
151#endif
152
153
154#ifdef DEBUG
155static void
156print_temp(const struct tgsi_exec_machine *mach, uint index)
157{
158   const struct tgsi_exec_vector *tmp = &mach->Temps[index];
159   int i;
160   debug_printf("Temp[%u] =\n", index);
161   for (i = 0; i < 4; i++) {
162      debug_printf("  %c: { %f, %f, %f, %f }\n",
163                   "XYZW"[i],
164                   tmp->xyzw[i].f[0],
165                   tmp->xyzw[i].f[1],
166                   tmp->xyzw[i].f[2],
167                   tmp->xyzw[i].f[3]);
168   }
169}
170#endif
171
172
173/**
174 * Check if there's a potential src/dst register data dependency when
175 * using SOA execution.
176 * Example:
177 *   MOV T, T.yxwz;
178 * This would expand into:
179 *   MOV t0, t1;
180 *   MOV t1, t0;
181 *   MOV t2, t3;
182 *   MOV t3, t2;
183 * The second instruction will have the wrong value for t0 if executed as-is.
184 */
185static boolean
186tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
187{
188   uint i, chan;
189
190   uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
191   if (writemask == TGSI_WRITEMASK_X ||
192       writemask == TGSI_WRITEMASK_Y ||
193       writemask == TGSI_WRITEMASK_Z ||
194       writemask == TGSI_WRITEMASK_W ||
195       writemask == TGSI_WRITEMASK_NONE) {
196      /* no chance of data dependency */
197      return FALSE;
198   }
199
200   /* loop over src regs */
201   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
202      if ((inst->FullSrcRegisters[i].SrcRegister.File ==
203           inst->FullDstRegisters[0].DstRegister.File) &&
204          (inst->FullSrcRegisters[i].SrcRegister.Index ==
205           inst->FullDstRegisters[0].DstRegister.Index)) {
206         /* loop over dest channels */
207         uint channelsWritten = 0x0;
208         FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
209            /* check if we're reading a channel that's been written */
210            uint swizzle = tgsi_util_get_full_src_register_extswizzle(&inst->FullSrcRegisters[i], chan);
211            if (swizzle <= TGSI_SWIZZLE_W &&
212                (channelsWritten & (1 << swizzle))) {
213               return TRUE;
214            }
215
216            channelsWritten |= (1 << chan);
217         }
218      }
219   }
220   return FALSE;
221}
222
223
224/**
225 * Initialize machine state by expanding tokens to full instructions,
226 * allocating temporary storage, setting up constants, etc.
227 * After this, we can call tgsi_exec_machine_run() many times.
228 */
229void
230tgsi_exec_machine_bind_shader(
231   struct tgsi_exec_machine *mach,
232   const struct tgsi_token *tokens,
233   uint numSamplers,
234   struct tgsi_sampler **samplers)
235{
236   uint k;
237   struct tgsi_parse_context parse;
238   struct tgsi_exec_labels *labels = &mach->Labels;
239   struct tgsi_full_instruction *instructions;
240   struct tgsi_full_declaration *declarations;
241   uint maxInstructions = 10, numInstructions = 0;
242   uint maxDeclarations = 10, numDeclarations = 0;
243   uint instno = 0;
244
245#if 0
246   tgsi_dump(tokens, 0);
247#endif
248
249   util_init_math();
250
251   mach->Tokens = tokens;
252   mach->Samplers = samplers;
253
254   k = tgsi_parse_init (&parse, mach->Tokens);
255   if (k != TGSI_PARSE_OK) {
256      debug_printf( "Problem parsing!\n" );
257      return;
258   }
259
260   mach->Processor = parse.FullHeader.Processor.Processor;
261   mach->ImmLimit = 0;
262   labels->count = 0;
263
264   declarations = (struct tgsi_full_declaration *)
265      MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
266
267   if (!declarations) {
268      return;
269   }
270
271   instructions = (struct tgsi_full_instruction *)
272      MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
273
274   if (!instructions) {
275      FREE( declarations );
276      return;
277   }
278
279   while( !tgsi_parse_end_of_tokens( &parse ) ) {
280      uint pointer = parse.Position;
281      uint i;
282
283      tgsi_parse_token( &parse );
284      switch( parse.FullToken.Token.Type ) {
285      case TGSI_TOKEN_TYPE_DECLARATION:
286         /* save expanded declaration */
287         if (numDeclarations == maxDeclarations) {
288            declarations = REALLOC(declarations,
289                                   maxDeclarations
290                                   * sizeof(struct tgsi_full_declaration),
291                                   (maxDeclarations + 10)
292                                   * sizeof(struct tgsi_full_declaration));
293            maxDeclarations += 10;
294         }
295         memcpy(declarations + numDeclarations,
296                &parse.FullToken.FullDeclaration,
297                sizeof(declarations[0]));
298         numDeclarations++;
299         break;
300
301      case TGSI_TOKEN_TYPE_IMMEDIATE:
302         {
303            uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
304            assert( size <= 4 );
305            assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
306
307            for( i = 0; i < size; i++ ) {
308               mach->Imms[mach->ImmLimit][i] =
309		  parse.FullToken.FullImmediate.u[i].Float;
310            }
311            mach->ImmLimit += 1;
312         }
313         break;
314
315      case TGSI_TOKEN_TYPE_INSTRUCTION:
316         assert( labels->count < MAX_LABELS );
317
318         labels->labels[labels->count][0] = instno;
319         labels->labels[labels->count][1] = pointer;
320         labels->count++;
321
322         /* save expanded instruction */
323         if (numInstructions == maxInstructions) {
324            instructions = REALLOC(instructions,
325                                   maxInstructions
326                                   * sizeof(struct tgsi_full_instruction),
327                                   (maxInstructions + 10)
328                                   * sizeof(struct tgsi_full_instruction));
329            maxInstructions += 10;
330         }
331         memcpy(instructions + numInstructions,
332                &parse.FullToken.FullInstruction,
333                sizeof(instructions[0]));
334
335#if 0
336         if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
337            debug_printf("SOA dependency in instruction:\n");
338            tgsi_dump_instruction(&parse.FullToken.FullInstruction,
339                                  numInstructions);
340         }
341#else
342         (void) tgsi_check_soa_dependencies;
343#endif
344
345         numInstructions++;
346         break;
347
348      default:
349         assert( 0 );
350      }
351   }
352   tgsi_parse_free (&parse);
353
354   if (mach->Declarations) {
355      FREE( mach->Declarations );
356   }
357   mach->Declarations = declarations;
358   mach->NumDeclarations = numDeclarations;
359
360   if (mach->Instructions) {
361      FREE( mach->Instructions );
362   }
363   mach->Instructions = instructions;
364   mach->NumInstructions = numInstructions;
365}
366
367
368struct tgsi_exec_machine *
369tgsi_exec_machine_create( void )
370{
371   struct tgsi_exec_machine *mach;
372   uint i;
373
374   mach = align_malloc( sizeof *mach, 16 );
375   if (!mach)
376      goto fail;
377
378   mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
379
380   mach->Samplers = NULL;
381   mach->Consts = NULL;
382   mach->Tokens = NULL;
383   mach->Primitives = NULL;
384   mach->InterpCoefs = NULL;
385   mach->Instructions = NULL;
386   mach->Declarations = NULL;
387
388   /* Setup constants. */
389   for( i = 0; i < 4; i++ ) {
390      mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
391      mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
392      mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
393      mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
394      mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
395      mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
396      mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
397      mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
398      mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
399      mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
400   }
401
402#ifdef DEBUG
403   /* silence warnings */
404   (void) print_chan;
405   (void) print_temp;
406#endif
407
408   return mach;
409
410fail:
411   align_free(mach);
412   return NULL;
413}
414
415
416void
417tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
418{
419   if (mach) {
420      FREE(mach->Instructions);
421      FREE(mach->Declarations);
422   }
423
424   align_free(mach);
425}
426
427
428static void
429micro_abs(
430   union tgsi_exec_channel *dst,
431   const union tgsi_exec_channel *src )
432{
433   dst->f[0] = fabsf( src->f[0] );
434   dst->f[1] = fabsf( src->f[1] );
435   dst->f[2] = fabsf( src->f[2] );
436   dst->f[3] = fabsf( src->f[3] );
437}
438
439static void
440micro_add(
441   union tgsi_exec_channel *dst,
442   const union tgsi_exec_channel *src0,
443   const union tgsi_exec_channel *src1 )
444{
445   dst->f[0] = src0->f[0] + src1->f[0];
446   dst->f[1] = src0->f[1] + src1->f[1];
447   dst->f[2] = src0->f[2] + src1->f[2];
448   dst->f[3] = src0->f[3] + src1->f[3];
449}
450
451#if 0
452static void
453micro_iadd(
454   union tgsi_exec_channel *dst,
455   const union tgsi_exec_channel *src0,
456   const union tgsi_exec_channel *src1 )
457{
458   dst->i[0] = src0->i[0] + src1->i[0];
459   dst->i[1] = src0->i[1] + src1->i[1];
460   dst->i[2] = src0->i[2] + src1->i[2];
461   dst->i[3] = src0->i[3] + src1->i[3];
462}
463#endif
464
465static void
466micro_and(
467   union tgsi_exec_channel *dst,
468   const union tgsi_exec_channel *src0,
469   const union tgsi_exec_channel *src1 )
470{
471   dst->u[0] = src0->u[0] & src1->u[0];
472   dst->u[1] = src0->u[1] & src1->u[1];
473   dst->u[2] = src0->u[2] & src1->u[2];
474   dst->u[3] = src0->u[3] & src1->u[3];
475}
476
477static void
478micro_ceil(
479   union tgsi_exec_channel *dst,
480   const union tgsi_exec_channel *src )
481{
482   dst->f[0] = ceilf( src->f[0] );
483   dst->f[1] = ceilf( src->f[1] );
484   dst->f[2] = ceilf( src->f[2] );
485   dst->f[3] = ceilf( src->f[3] );
486}
487
488static void
489micro_cos(
490   union tgsi_exec_channel *dst,
491   const union tgsi_exec_channel *src )
492{
493   dst->f[0] = cosf( src->f[0] );
494   dst->f[1] = cosf( src->f[1] );
495   dst->f[2] = cosf( src->f[2] );
496   dst->f[3] = cosf( src->f[3] );
497}
498
499static void
500micro_ddx(
501   union tgsi_exec_channel *dst,
502   const union tgsi_exec_channel *src )
503{
504   dst->f[0] =
505   dst->f[1] =
506   dst->f[2] =
507   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
508}
509
510static void
511micro_ddy(
512   union tgsi_exec_channel *dst,
513   const union tgsi_exec_channel *src )
514{
515   dst->f[0] =
516   dst->f[1] =
517   dst->f[2] =
518   dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
519}
520
521static void
522micro_div(
523   union tgsi_exec_channel *dst,
524   const union tgsi_exec_channel *src0,
525   const union tgsi_exec_channel *src1 )
526{
527   if (src1->f[0] != 0) {
528      dst->f[0] = src0->f[0] / src1->f[0];
529   }
530   if (src1->f[1] != 0) {
531      dst->f[1] = src0->f[1] / src1->f[1];
532   }
533   if (src1->f[2] != 0) {
534      dst->f[2] = src0->f[2] / src1->f[2];
535   }
536   if (src1->f[3] != 0) {
537      dst->f[3] = src0->f[3] / src1->f[3];
538   }
539}
540
541#if 0
542static void
543micro_udiv(
544   union tgsi_exec_channel *dst,
545   const union tgsi_exec_channel *src0,
546   const union tgsi_exec_channel *src1 )
547{
548   dst->u[0] = src0->u[0] / src1->u[0];
549   dst->u[1] = src0->u[1] / src1->u[1];
550   dst->u[2] = src0->u[2] / src1->u[2];
551   dst->u[3] = src0->u[3] / src1->u[3];
552}
553#endif
554
555static void
556micro_eq(
557   union tgsi_exec_channel *dst,
558   const union tgsi_exec_channel *src0,
559   const union tgsi_exec_channel *src1,
560   const union tgsi_exec_channel *src2,
561   const union tgsi_exec_channel *src3 )
562{
563   dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
564   dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
565   dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
566   dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
567}
568
569#if 0
570static void
571micro_ieq(
572   union tgsi_exec_channel *dst,
573   const union tgsi_exec_channel *src0,
574   const union tgsi_exec_channel *src1,
575   const union tgsi_exec_channel *src2,
576   const union tgsi_exec_channel *src3 )
577{
578   dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
579   dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
580   dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
581   dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
582}
583#endif
584
585static void
586micro_exp2(
587   union tgsi_exec_channel *dst,
588   const union tgsi_exec_channel *src)
589{
590#if FAST_MATH
591   dst->f[0] = util_fast_exp2( src->f[0] );
592   dst->f[1] = util_fast_exp2( src->f[1] );
593   dst->f[2] = util_fast_exp2( src->f[2] );
594   dst->f[3] = util_fast_exp2( src->f[3] );
595#else
596   dst->f[0] = powf( 2.0f, src->f[0] );
597   dst->f[1] = powf( 2.0f, src->f[1] );
598   dst->f[2] = powf( 2.0f, src->f[2] );
599   dst->f[3] = powf( 2.0f, src->f[3] );
600#endif
601}
602
603#if 0
604static void
605micro_f2ut(
606   union tgsi_exec_channel *dst,
607   const union tgsi_exec_channel *src )
608{
609   dst->u[0] = (uint) src->f[0];
610   dst->u[1] = (uint) src->f[1];
611   dst->u[2] = (uint) src->f[2];
612   dst->u[3] = (uint) src->f[3];
613}
614#endif
615
616static void
617micro_float_clamp(union tgsi_exec_channel *dst,
618                  const union tgsi_exec_channel *src)
619{
620   uint i;
621
622   for (i = 0; i < 4; i++) {
623      if (src->f[i] > 0.0f) {
624         if (src->f[i] > 1.884467e+019f)
625            dst->f[i] = 1.884467e+019f;
626         else if (src->f[i] < 5.42101e-020f)
627            dst->f[i] = 5.42101e-020f;
628         else
629            dst->f[i] = src->f[i];
630      }
631      else {
632         if (src->f[i] < -1.884467e+019f)
633            dst->f[i] = -1.884467e+019f;
634         else if (src->f[i] > -5.42101e-020f)
635            dst->f[i] = -5.42101e-020f;
636         else
637            dst->f[i] = src->f[i];
638      }
639   }
640}
641
642static void
643micro_flr(
644   union tgsi_exec_channel *dst,
645   const union tgsi_exec_channel *src )
646{
647   dst->f[0] = floorf( src->f[0] );
648   dst->f[1] = floorf( src->f[1] );
649   dst->f[2] = floorf( src->f[2] );
650   dst->f[3] = floorf( src->f[3] );
651}
652
653static void
654micro_frc(
655   union tgsi_exec_channel *dst,
656   const union tgsi_exec_channel *src )
657{
658   dst->f[0] = src->f[0] - floorf( src->f[0] );
659   dst->f[1] = src->f[1] - floorf( src->f[1] );
660   dst->f[2] = src->f[2] - floorf( src->f[2] );
661   dst->f[3] = src->f[3] - floorf( src->f[3] );
662}
663
664static void
665micro_i2f(
666   union tgsi_exec_channel *dst,
667   const union tgsi_exec_channel *src )
668{
669   dst->f[0] = (float) src->i[0];
670   dst->f[1] = (float) src->i[1];
671   dst->f[2] = (float) src->i[2];
672   dst->f[3] = (float) src->i[3];
673}
674
675static void
676micro_lg2(
677   union tgsi_exec_channel *dst,
678   const union tgsi_exec_channel *src )
679{
680#if FAST_MATH
681   dst->f[0] = util_fast_log2( src->f[0] );
682   dst->f[1] = util_fast_log2( src->f[1] );
683   dst->f[2] = util_fast_log2( src->f[2] );
684   dst->f[3] = util_fast_log2( src->f[3] );
685#else
686   dst->f[0] = logf( src->f[0] ) * 1.442695f;
687   dst->f[1] = logf( src->f[1] ) * 1.442695f;
688   dst->f[2] = logf( src->f[2] ) * 1.442695f;
689   dst->f[3] = logf( src->f[3] ) * 1.442695f;
690#endif
691}
692
693static void
694micro_le(
695   union tgsi_exec_channel *dst,
696   const union tgsi_exec_channel *src0,
697   const union tgsi_exec_channel *src1,
698   const union tgsi_exec_channel *src2,
699   const union tgsi_exec_channel *src3 )
700{
701   dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
702   dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
703   dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
704   dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
705}
706
707static void
708micro_lt(
709   union tgsi_exec_channel *dst,
710   const union tgsi_exec_channel *src0,
711   const union tgsi_exec_channel *src1,
712   const union tgsi_exec_channel *src2,
713   const union tgsi_exec_channel *src3 )
714{
715   dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
716   dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
717   dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
718   dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
719}
720
721#if 0
722static void
723micro_ilt(
724   union tgsi_exec_channel *dst,
725   const union tgsi_exec_channel *src0,
726   const union tgsi_exec_channel *src1,
727   const union tgsi_exec_channel *src2,
728   const union tgsi_exec_channel *src3 )
729{
730   dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
731   dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
732   dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
733   dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
734}
735#endif
736
737#if 0
738static void
739micro_ult(
740   union tgsi_exec_channel *dst,
741   const union tgsi_exec_channel *src0,
742   const union tgsi_exec_channel *src1,
743   const union tgsi_exec_channel *src2,
744   const union tgsi_exec_channel *src3 )
745{
746   dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
747   dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
748   dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
749   dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
750}
751#endif
752
753static void
754micro_max(
755   union tgsi_exec_channel *dst,
756   const union tgsi_exec_channel *src0,
757   const union tgsi_exec_channel *src1 )
758{
759   dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
760   dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
761   dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
762   dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
763}
764
765#if 0
766static void
767micro_imax(
768   union tgsi_exec_channel *dst,
769   const union tgsi_exec_channel *src0,
770   const union tgsi_exec_channel *src1 )
771{
772   dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
773   dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
774   dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
775   dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
776}
777#endif
778
779#if 0
780static void
781micro_umax(
782   union tgsi_exec_channel *dst,
783   const union tgsi_exec_channel *src0,
784   const union tgsi_exec_channel *src1 )
785{
786   dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
787   dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
788   dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
789   dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
790}
791#endif
792
793static void
794micro_min(
795   union tgsi_exec_channel *dst,
796   const union tgsi_exec_channel *src0,
797   const union tgsi_exec_channel *src1 )
798{
799   dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
800   dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
801   dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
802   dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
803}
804
805#if 0
806static void
807micro_imin(
808   union tgsi_exec_channel *dst,
809   const union tgsi_exec_channel *src0,
810   const union tgsi_exec_channel *src1 )
811{
812   dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
813   dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
814   dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
815   dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
816}
817#endif
818
819#if 0
820static void
821micro_umin(
822   union tgsi_exec_channel *dst,
823   const union tgsi_exec_channel *src0,
824   const union tgsi_exec_channel *src1 )
825{
826   dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
827   dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
828   dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
829   dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
830}
831#endif
832
833#if 0
834static void
835micro_umod(
836   union tgsi_exec_channel *dst,
837   const union tgsi_exec_channel *src0,
838   const union tgsi_exec_channel *src1 )
839{
840   dst->u[0] = src0->u[0] % src1->u[0];
841   dst->u[1] = src0->u[1] % src1->u[1];
842   dst->u[2] = src0->u[2] % src1->u[2];
843   dst->u[3] = src0->u[3] % src1->u[3];
844}
845#endif
846
847static void
848micro_mul(
849   union tgsi_exec_channel *dst,
850   const union tgsi_exec_channel *src0,
851   const union tgsi_exec_channel *src1 )
852{
853   dst->f[0] = src0->f[0] * src1->f[0];
854   dst->f[1] = src0->f[1] * src1->f[1];
855   dst->f[2] = src0->f[2] * src1->f[2];
856   dst->f[3] = src0->f[3] * src1->f[3];
857}
858
859#if 0
860static void
861micro_imul(
862   union tgsi_exec_channel *dst,
863   const union tgsi_exec_channel *src0,
864   const union tgsi_exec_channel *src1 )
865{
866   dst->i[0] = src0->i[0] * src1->i[0];
867   dst->i[1] = src0->i[1] * src1->i[1];
868   dst->i[2] = src0->i[2] * src1->i[2];
869   dst->i[3] = src0->i[3] * src1->i[3];
870}
871#endif
872
873#if 0
874static void
875micro_imul64(
876   union tgsi_exec_channel *dst0,
877   union tgsi_exec_channel *dst1,
878   const union tgsi_exec_channel *src0,
879   const union tgsi_exec_channel *src1 )
880{
881   dst1->i[0] = src0->i[0] * src1->i[0];
882   dst1->i[1] = src0->i[1] * src1->i[1];
883   dst1->i[2] = src0->i[2] * src1->i[2];
884   dst1->i[3] = src0->i[3] * src1->i[3];
885   dst0->i[0] = 0;
886   dst0->i[1] = 0;
887   dst0->i[2] = 0;
888   dst0->i[3] = 0;
889}
890#endif
891
892#if 0
893static void
894micro_umul64(
895   union tgsi_exec_channel *dst0,
896   union tgsi_exec_channel *dst1,
897   const union tgsi_exec_channel *src0,
898   const union tgsi_exec_channel *src1 )
899{
900   dst1->u[0] = src0->u[0] * src1->u[0];
901   dst1->u[1] = src0->u[1] * src1->u[1];
902   dst1->u[2] = src0->u[2] * src1->u[2];
903   dst1->u[3] = src0->u[3] * src1->u[3];
904   dst0->u[0] = 0;
905   dst0->u[1] = 0;
906   dst0->u[2] = 0;
907   dst0->u[3] = 0;
908}
909#endif
910
911
912#if 0
913static void
914micro_movc(
915   union tgsi_exec_channel *dst,
916   const union tgsi_exec_channel *src0,
917   const union tgsi_exec_channel *src1,
918   const union tgsi_exec_channel *src2 )
919{
920   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
921   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
922   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
923   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
924}
925#endif
926
927static void
928micro_neg(
929   union tgsi_exec_channel *dst,
930   const union tgsi_exec_channel *src )
931{
932   dst->f[0] = -src->f[0];
933   dst->f[1] = -src->f[1];
934   dst->f[2] = -src->f[2];
935   dst->f[3] = -src->f[3];
936}
937
938#if 0
939static void
940micro_ineg(
941   union tgsi_exec_channel *dst,
942   const union tgsi_exec_channel *src )
943{
944   dst->i[0] = -src->i[0];
945   dst->i[1] = -src->i[1];
946   dst->i[2] = -src->i[2];
947   dst->i[3] = -src->i[3];
948}
949#endif
950
951static void
952micro_not(
953   union tgsi_exec_channel *dst,
954   const union tgsi_exec_channel *src )
955{
956   dst->u[0] = ~src->u[0];
957   dst->u[1] = ~src->u[1];
958   dst->u[2] = ~src->u[2];
959   dst->u[3] = ~src->u[3];
960}
961
962static void
963micro_or(
964   union tgsi_exec_channel *dst,
965   const union tgsi_exec_channel *src0,
966   const union tgsi_exec_channel *src1 )
967{
968   dst->u[0] = src0->u[0] | src1->u[0];
969   dst->u[1] = src0->u[1] | src1->u[1];
970   dst->u[2] = src0->u[2] | src1->u[2];
971   dst->u[3] = src0->u[3] | src1->u[3];
972}
973
974static void
975micro_pow(
976   union tgsi_exec_channel *dst,
977   const union tgsi_exec_channel *src0,
978   const union tgsi_exec_channel *src1 )
979{
980#if FAST_MATH
981   dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
982   dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
983   dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
984   dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
985#else
986   dst->f[0] = powf( src0->f[0], src1->f[0] );
987   dst->f[1] = powf( src0->f[1], src1->f[1] );
988   dst->f[2] = powf( src0->f[2], src1->f[2] );
989   dst->f[3] = powf( src0->f[3], src1->f[3] );
990#endif
991}
992
993static void
994micro_rnd(
995   union tgsi_exec_channel *dst,
996   const union tgsi_exec_channel *src )
997{
998   dst->f[0] = floorf( src->f[0] + 0.5f );
999   dst->f[1] = floorf( src->f[1] + 0.5f );
1000   dst->f[2] = floorf( src->f[2] + 0.5f );
1001   dst->f[3] = floorf( src->f[3] + 0.5f );
1002}
1003
1004static void
1005micro_sgn(
1006   union tgsi_exec_channel *dst,
1007   const union tgsi_exec_channel *src )
1008{
1009   dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
1010   dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
1011   dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
1012   dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
1013}
1014
1015static void
1016micro_shl(
1017   union tgsi_exec_channel *dst,
1018   const union tgsi_exec_channel *src0,
1019   const union tgsi_exec_channel *src1 )
1020{
1021   dst->i[0] = src0->i[0] << src1->i[0];
1022   dst->i[1] = src0->i[1] << src1->i[1];
1023   dst->i[2] = src0->i[2] << src1->i[2];
1024   dst->i[3] = src0->i[3] << src1->i[3];
1025}
1026
1027static void
1028micro_ishr(
1029   union tgsi_exec_channel *dst,
1030   const union tgsi_exec_channel *src0,
1031   const union tgsi_exec_channel *src1 )
1032{
1033   dst->i[0] = src0->i[0] >> src1->i[0];
1034   dst->i[1] = src0->i[1] >> src1->i[1];
1035   dst->i[2] = src0->i[2] >> src1->i[2];
1036   dst->i[3] = src0->i[3] >> src1->i[3];
1037}
1038
1039static void
1040micro_trunc(
1041   union tgsi_exec_channel *dst,
1042   const union tgsi_exec_channel *src0 )
1043{
1044   dst->f[0] = (float) (int) src0->f[0];
1045   dst->f[1] = (float) (int) src0->f[1];
1046   dst->f[2] = (float) (int) src0->f[2];
1047   dst->f[3] = (float) (int) src0->f[3];
1048}
1049
1050#if 0
1051static void
1052micro_ushr(
1053   union tgsi_exec_channel *dst,
1054   const union tgsi_exec_channel *src0,
1055   const union tgsi_exec_channel *src1 )
1056{
1057   dst->u[0] = src0->u[0] >> src1->u[0];
1058   dst->u[1] = src0->u[1] >> src1->u[1];
1059   dst->u[2] = src0->u[2] >> src1->u[2];
1060   dst->u[3] = src0->u[3] >> src1->u[3];
1061}
1062#endif
1063
1064static void
1065micro_sin(
1066   union tgsi_exec_channel *dst,
1067   const union tgsi_exec_channel *src )
1068{
1069   dst->f[0] = sinf( src->f[0] );
1070   dst->f[1] = sinf( src->f[1] );
1071   dst->f[2] = sinf( src->f[2] );
1072   dst->f[3] = sinf( src->f[3] );
1073}
1074
1075static void
1076micro_sqrt( union tgsi_exec_channel *dst,
1077            const union tgsi_exec_channel *src )
1078{
1079   dst->f[0] = sqrtf( src->f[0] );
1080   dst->f[1] = sqrtf( src->f[1] );
1081   dst->f[2] = sqrtf( src->f[2] );
1082   dst->f[3] = sqrtf( src->f[3] );
1083}
1084
1085static void
1086micro_sub(
1087   union tgsi_exec_channel *dst,
1088   const union tgsi_exec_channel *src0,
1089   const union tgsi_exec_channel *src1 )
1090{
1091   dst->f[0] = src0->f[0] - src1->f[0];
1092   dst->f[1] = src0->f[1] - src1->f[1];
1093   dst->f[2] = src0->f[2] - src1->f[2];
1094   dst->f[3] = src0->f[3] - src1->f[3];
1095}
1096
1097#if 0
1098static void
1099micro_u2f(
1100   union tgsi_exec_channel *dst,
1101   const union tgsi_exec_channel *src )
1102{
1103   dst->f[0] = (float) src->u[0];
1104   dst->f[1] = (float) src->u[1];
1105   dst->f[2] = (float) src->u[2];
1106   dst->f[3] = (float) src->u[3];
1107}
1108#endif
1109
1110static void
1111micro_xor(
1112   union tgsi_exec_channel *dst,
1113   const union tgsi_exec_channel *src0,
1114   const union tgsi_exec_channel *src1 )
1115{
1116   dst->u[0] = src0->u[0] ^ src1->u[0];
1117   dst->u[1] = src0->u[1] ^ src1->u[1];
1118   dst->u[2] = src0->u[2] ^ src1->u[2];
1119   dst->u[3] = src0->u[3] ^ src1->u[3];
1120}
1121
1122static void
1123fetch_src_file_channel(
1124   const struct tgsi_exec_machine *mach,
1125   const uint file,
1126   const uint swizzle,
1127   const union tgsi_exec_channel *index,
1128   union tgsi_exec_channel *chan )
1129{
1130   switch( swizzle ) {
1131   case TGSI_EXTSWIZZLE_X:
1132   case TGSI_EXTSWIZZLE_Y:
1133   case TGSI_EXTSWIZZLE_Z:
1134   case TGSI_EXTSWIZZLE_W:
1135      switch( file ) {
1136      case TGSI_FILE_CONSTANT:
1137         assert(mach->Consts);
1138         if (index->i[0] < 0)
1139            chan->f[0] = 0.0f;
1140         else
1141            chan->f[0] = mach->Consts[index->i[0]][swizzle];
1142         if (index->i[1] < 0)
1143            chan->f[1] = 0.0f;
1144         else
1145            chan->f[1] = mach->Consts[index->i[1]][swizzle];
1146         if (index->i[2] < 0)
1147            chan->f[2] = 0.0f;
1148         else
1149            chan->f[2] = mach->Consts[index->i[2]][swizzle];
1150         if (index->i[3] < 0)
1151            chan->f[3] = 0.0f;
1152         else
1153            chan->f[3] = mach->Consts[index->i[3]][swizzle];
1154         break;
1155
1156      case TGSI_FILE_INPUT:
1157         chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1158         chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1159         chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1160         chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1161         break;
1162
1163      case TGSI_FILE_TEMPORARY:
1164         assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1165         chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1166         chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1167         chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1168         chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1169         break;
1170
1171      case TGSI_FILE_IMMEDIATE:
1172         assert( index->i[0] < (int) mach->ImmLimit );
1173         chan->f[0] = mach->Imms[index->i[0]][swizzle];
1174         assert( index->i[1] < (int) mach->ImmLimit );
1175         chan->f[1] = mach->Imms[index->i[1]][swizzle];
1176         assert( index->i[2] < (int) mach->ImmLimit );
1177         chan->f[2] = mach->Imms[index->i[2]][swizzle];
1178         assert( index->i[3] < (int) mach->ImmLimit );
1179         chan->f[3] = mach->Imms[index->i[3]][swizzle];
1180         break;
1181
1182      case TGSI_FILE_ADDRESS:
1183         chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1184         chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1185         chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1186         chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1187         break;
1188
1189      case TGSI_FILE_OUTPUT:
1190         /* vertex/fragment output vars can be read too */
1191         chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1192         chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1193         chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1194         chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1195         break;
1196
1197      default:
1198         assert( 0 );
1199      }
1200      break;
1201
1202   case TGSI_EXTSWIZZLE_ZERO:
1203      *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1204      break;
1205
1206   case TGSI_EXTSWIZZLE_ONE:
1207      *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1208      break;
1209
1210   default:
1211      assert( 0 );
1212   }
1213}
1214
1215static void
1216fetch_source(
1217   const struct tgsi_exec_machine *mach,
1218   union tgsi_exec_channel *chan,
1219   const struct tgsi_full_src_register *reg,
1220   const uint chan_index )
1221{
1222   union tgsi_exec_channel index;
1223   uint swizzle;
1224
1225   /* We start with a direct index into a register file.
1226    *
1227    *    file[1],
1228    *    where:
1229    *       file = SrcRegister.File
1230    *       [1] = SrcRegister.Index
1231    */
1232   index.i[0] =
1233   index.i[1] =
1234   index.i[2] =
1235   index.i[3] = reg->SrcRegister.Index;
1236
1237   /* There is an extra source register that indirectly subscripts
1238    * a register file. The direct index now becomes an offset
1239    * that is being added to the indirect register.
1240    *
1241    *    file[ind[2].x+1],
1242    *    where:
1243    *       ind = SrcRegisterInd.File
1244    *       [2] = SrcRegisterInd.Index
1245    *       .x = SrcRegisterInd.SwizzleX
1246    */
1247   if (reg->SrcRegister.Indirect) {
1248      union tgsi_exec_channel index2;
1249      union tgsi_exec_channel indir_index;
1250      const uint execmask = mach->ExecMask;
1251      uint i;
1252
1253      /* which address register (always zero now) */
1254      index2.i[0] =
1255      index2.i[1] =
1256      index2.i[2] =
1257      index2.i[3] = reg->SrcRegisterInd.Index;
1258
1259      /* get current value of address register[swizzle] */
1260      swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1261      fetch_src_file_channel(
1262         mach,
1263         reg->SrcRegisterInd.File,
1264         swizzle,
1265         &index2,
1266         &indir_index );
1267
1268      /* add value of address register to the offset */
1269      index.i[0] += (int) indir_index.f[0];
1270      index.i[1] += (int) indir_index.f[1];
1271      index.i[2] += (int) indir_index.f[2];
1272      index.i[3] += (int) indir_index.f[3];
1273
1274      /* for disabled execution channels, zero-out the index to
1275       * avoid using a potential garbage value.
1276       */
1277      for (i = 0; i < QUAD_SIZE; i++) {
1278         if ((execmask & (1 << i)) == 0)
1279            index.i[i] = 0;
1280      }
1281   }
1282
1283   /* There is an extra source register that is a second
1284    * subscript to a register file. Effectively it means that
1285    * the register file is actually a 2D array of registers.
1286    *
1287    *    file[1][3] == file[1*sizeof(file[1])+3],
1288    *    where:
1289    *       [3] = SrcRegisterDim.Index
1290    */
1291   if (reg->SrcRegister.Dimension) {
1292      /* The size of the first-order array depends on the register file type.
1293       * We need to multiply the index to the first array to get an effective,
1294       * "flat" index that points to the beginning of the second-order array.
1295       */
1296      switch (reg->SrcRegister.File) {
1297      case TGSI_FILE_INPUT:
1298         index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1299         index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1300         index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1301         index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1302         break;
1303      case TGSI_FILE_CONSTANT:
1304         index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1305         index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1306         index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1307         index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1308         break;
1309      default:
1310         assert( 0 );
1311      }
1312
1313      index.i[0] += reg->SrcRegisterDim.Index;
1314      index.i[1] += reg->SrcRegisterDim.Index;
1315      index.i[2] += reg->SrcRegisterDim.Index;
1316      index.i[3] += reg->SrcRegisterDim.Index;
1317
1318      /* Again, the second subscript index can be addressed indirectly
1319       * identically to the first one.
1320       * Nothing stops us from indirectly addressing the indirect register,
1321       * but there is no need for that, so we won't exercise it.
1322       *
1323       *    file[1][ind[4].y+3],
1324       *    where:
1325       *       ind = SrcRegisterDimInd.File
1326       *       [4] = SrcRegisterDimInd.Index
1327       *       .y = SrcRegisterDimInd.SwizzleX
1328       */
1329      if (reg->SrcRegisterDim.Indirect) {
1330         union tgsi_exec_channel index2;
1331         union tgsi_exec_channel indir_index;
1332         const uint execmask = mach->ExecMask;
1333         uint i;
1334
1335         index2.i[0] =
1336         index2.i[1] =
1337         index2.i[2] =
1338         index2.i[3] = reg->SrcRegisterDimInd.Index;
1339
1340         swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1341         fetch_src_file_channel(
1342            mach,
1343            reg->SrcRegisterDimInd.File,
1344            swizzle,
1345            &index2,
1346            &indir_index );
1347
1348         index.i[0] += (int) indir_index.f[0];
1349         index.i[1] += (int) indir_index.f[1];
1350         index.i[2] += (int) indir_index.f[2];
1351         index.i[3] += (int) indir_index.f[3];
1352
1353         /* for disabled execution channels, zero-out the index to
1354          * avoid using a potential garbage value.
1355          */
1356         for (i = 0; i < QUAD_SIZE; i++) {
1357            if ((execmask & (1 << i)) == 0)
1358               index.i[i] = 0;
1359         }
1360      }
1361
1362      /* If by any chance there was a need for a 3D array of register
1363       * files, we would have to check whether SrcRegisterDim is followed
1364       * by a dimension register and continue the saga.
1365       */
1366   }
1367
1368   swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1369   fetch_src_file_channel(
1370      mach,
1371      reg->SrcRegister.File,
1372      swizzle,
1373      &index,
1374      chan );
1375
1376   switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1377   case TGSI_UTIL_SIGN_CLEAR:
1378      micro_abs( chan, chan );
1379      break;
1380
1381   case TGSI_UTIL_SIGN_SET:
1382      micro_abs( chan, chan );
1383      micro_neg( chan, chan );
1384      break;
1385
1386   case TGSI_UTIL_SIGN_TOGGLE:
1387      micro_neg( chan, chan );
1388      break;
1389
1390   case TGSI_UTIL_SIGN_KEEP:
1391      break;
1392   }
1393
1394   if (reg->SrcRegisterExtMod.Complement) {
1395      micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1396   }
1397}
1398
1399static void
1400store_dest(
1401   struct tgsi_exec_machine *mach,
1402   const union tgsi_exec_channel *chan,
1403   const struct tgsi_full_dst_register *reg,
1404   const struct tgsi_full_instruction *inst,
1405   uint chan_index )
1406{
1407   uint i;
1408   union tgsi_exec_channel null;
1409   union tgsi_exec_channel *dst;
1410   uint execmask = mach->ExecMask;
1411   int offset = 0;  /* indirection offset */
1412   int index;
1413
1414#ifdef DEBUG
1415   check_inf_or_nan(chan);
1416#endif
1417
1418   /* There is an extra source register that indirectly subscripts
1419    * a register file. The direct index now becomes an offset
1420    * that is being added to the indirect register.
1421    *
1422    *    file[ind[2].x+1],
1423    *    where:
1424    *       ind = DstRegisterInd.File
1425    *       [2] = DstRegisterInd.Index
1426    *       .x = DstRegisterInd.SwizzleX
1427    */
1428   if (reg->DstRegister.Indirect) {
1429      union tgsi_exec_channel index;
1430      union tgsi_exec_channel indir_index;
1431      uint swizzle;
1432
1433      /* which address register (always zero for now) */
1434      index.i[0] =
1435      index.i[1] =
1436      index.i[2] =
1437      index.i[3] = reg->DstRegisterInd.Index;
1438
1439      /* get current value of address register[swizzle] */
1440      swizzle = tgsi_util_get_src_register_swizzle( &reg->DstRegisterInd, CHAN_X );
1441
1442      /* fetch values from the address/indirection register */
1443      fetch_src_file_channel(
1444         mach,
1445         reg->DstRegisterInd.File,
1446         swizzle,
1447         &index,
1448         &indir_index );
1449
1450      /* save indirection offset */
1451      offset = (int) indir_index.f[0];
1452   }
1453
1454   switch (reg->DstRegister.File) {
1455   case TGSI_FILE_NULL:
1456      dst = &null;
1457      break;
1458
1459   case TGSI_FILE_OUTPUT:
1460      index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1461         + reg->DstRegister.Index;
1462      dst = &mach->Outputs[offset + index].xyzw[chan_index];
1463      break;
1464
1465   case TGSI_FILE_TEMPORARY:
1466      index = reg->DstRegister.Index;
1467      assert( index < TGSI_EXEC_NUM_TEMPS );
1468      dst = &mach->Temps[offset + index].xyzw[chan_index];
1469      break;
1470
1471   case TGSI_FILE_ADDRESS:
1472      index = reg->DstRegister.Index;
1473      dst = &mach->Addrs[index].xyzw[chan_index];
1474      break;
1475
1476   default:
1477      assert( 0 );
1478      return;
1479   }
1480
1481   if (inst->InstructionExtNv.CondFlowEnable) {
1482      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1483      uint swizzle;
1484      uint shift;
1485      uint mask;
1486      uint test;
1487
1488      /* Only CC0 supported.
1489       */
1490      assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1491
1492      switch (chan_index) {
1493      case CHAN_X:
1494         swizzle = inst->InstructionExtNv.CondSwizzleX;
1495         break;
1496      case CHAN_Y:
1497         swizzle = inst->InstructionExtNv.CondSwizzleY;
1498         break;
1499      case CHAN_Z:
1500         swizzle = inst->InstructionExtNv.CondSwizzleZ;
1501         break;
1502      case CHAN_W:
1503         swizzle = inst->InstructionExtNv.CondSwizzleW;
1504         break;
1505      default:
1506         assert( 0 );
1507         return;
1508      }
1509
1510      switch (swizzle) {
1511      case TGSI_SWIZZLE_X:
1512         shift = TGSI_EXEC_CC_X_SHIFT;
1513         mask = TGSI_EXEC_CC_X_MASK;
1514         break;
1515      case TGSI_SWIZZLE_Y:
1516         shift = TGSI_EXEC_CC_Y_SHIFT;
1517         mask = TGSI_EXEC_CC_Y_MASK;
1518         break;
1519      case TGSI_SWIZZLE_Z:
1520         shift = TGSI_EXEC_CC_Z_SHIFT;
1521         mask = TGSI_EXEC_CC_Z_MASK;
1522         break;
1523      case TGSI_SWIZZLE_W:
1524         shift = TGSI_EXEC_CC_W_SHIFT;
1525         mask = TGSI_EXEC_CC_W_MASK;
1526         break;
1527      default:
1528         assert( 0 );
1529         return;
1530      }
1531
1532      switch (inst->InstructionExtNv.CondMask) {
1533      case TGSI_CC_GT:
1534         test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1535         for (i = 0; i < QUAD_SIZE; i++)
1536            if (cc->u[i] & test)
1537               execmask &= ~(1 << i);
1538         break;
1539
1540      case TGSI_CC_EQ:
1541         test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1542         for (i = 0; i < QUAD_SIZE; i++)
1543            if (cc->u[i] & test)
1544               execmask &= ~(1 << i);
1545         break;
1546
1547      case TGSI_CC_LT:
1548         test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1549         for (i = 0; i < QUAD_SIZE; i++)
1550            if (cc->u[i] & test)
1551               execmask &= ~(1 << i);
1552         break;
1553
1554      case TGSI_CC_GE:
1555         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1556         for (i = 0; i < QUAD_SIZE; i++)
1557            if (cc->u[i] & test)
1558               execmask &= ~(1 << i);
1559         break;
1560
1561      case TGSI_CC_LE:
1562         test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1563         for (i = 0; i < QUAD_SIZE; i++)
1564            if (cc->u[i] & test)
1565               execmask &= ~(1 << i);
1566         break;
1567
1568      case TGSI_CC_NE:
1569         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1570         for (i = 0; i < QUAD_SIZE; i++)
1571            if (cc->u[i] & test)
1572               execmask &= ~(1 << i);
1573         break;
1574
1575      case TGSI_CC_TR:
1576         break;
1577
1578      case TGSI_CC_FL:
1579         for (i = 0; i < QUAD_SIZE; i++)
1580            execmask &= ~(1 << i);
1581         break;
1582
1583      default:
1584         assert( 0 );
1585         return;
1586      }
1587   }
1588
1589   switch (inst->Instruction.Saturate) {
1590   case TGSI_SAT_NONE:
1591      for (i = 0; i < QUAD_SIZE; i++)
1592         if (execmask & (1 << i))
1593            dst->i[i] = chan->i[i];
1594      break;
1595
1596   case TGSI_SAT_ZERO_ONE:
1597      for (i = 0; i < QUAD_SIZE; i++)
1598         if (execmask & (1 << i)) {
1599            if (chan->f[i] < 0.0f)
1600               dst->f[i] = 0.0f;
1601            else if (chan->f[i] > 1.0f)
1602               dst->f[i] = 1.0f;
1603            else
1604               dst->i[i] = chan->i[i];
1605         }
1606      break;
1607
1608   case TGSI_SAT_MINUS_PLUS_ONE:
1609      for (i = 0; i < QUAD_SIZE; i++)
1610         if (execmask & (1 << i)) {
1611            if (chan->f[i] < -1.0f)
1612               dst->f[i] = -1.0f;
1613            else if (chan->f[i] > 1.0f)
1614               dst->f[i] = 1.0f;
1615            else
1616               dst->i[i] = chan->i[i];
1617         }
1618      break;
1619
1620   default:
1621      assert( 0 );
1622   }
1623
1624   if (inst->InstructionExtNv.CondDstUpdate) {
1625      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1626      uint shift;
1627      uint mask;
1628
1629      /* Only CC0 supported.
1630       */
1631      assert( inst->InstructionExtNv.CondDstIndex < 1 );
1632
1633      switch (chan_index) {
1634      case CHAN_X:
1635         shift = TGSI_EXEC_CC_X_SHIFT;
1636         mask = ~TGSI_EXEC_CC_X_MASK;
1637         break;
1638      case CHAN_Y:
1639         shift = TGSI_EXEC_CC_Y_SHIFT;
1640         mask = ~TGSI_EXEC_CC_Y_MASK;
1641         break;
1642      case CHAN_Z:
1643         shift = TGSI_EXEC_CC_Z_SHIFT;
1644         mask = ~TGSI_EXEC_CC_Z_MASK;
1645         break;
1646      case CHAN_W:
1647         shift = TGSI_EXEC_CC_W_SHIFT;
1648         mask = ~TGSI_EXEC_CC_W_MASK;
1649         break;
1650      default:
1651         assert( 0 );
1652         return;
1653      }
1654
1655      for (i = 0; i < QUAD_SIZE; i++)
1656         if (execmask & (1 << i)) {
1657            cc->u[i] &= mask;
1658            if (dst->f[i] < 0.0f)
1659               cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1660            else if (dst->f[i] > 0.0f)
1661               cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1662            else if (dst->f[i] == 0.0f)
1663               cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1664            else
1665               cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1666         }
1667   }
1668}
1669
1670#define FETCH(VAL,INDEX,CHAN)\
1671    fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1672
1673#define STORE(VAL,INDEX,CHAN)\
1674    store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1675
1676
1677/**
1678 * Execute ARB-style KIL which is predicated by a src register.
1679 * Kill fragment if any of the four values is less than zero.
1680 */
1681static void
1682exec_kil(struct tgsi_exec_machine *mach,
1683         const struct tgsi_full_instruction *inst)
1684{
1685   uint uniquemask;
1686   uint chan_index;
1687   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1688   union tgsi_exec_channel r[1];
1689
1690   /* This mask stores component bits that were already tested. Note that
1691    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1692    * tested. */
1693   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1694
1695   for (chan_index = 0; chan_index < 4; chan_index++)
1696   {
1697      uint swizzle;
1698      uint i;
1699
1700      /* unswizzle channel */
1701      swizzle = tgsi_util_get_full_src_register_extswizzle (
1702                        &inst->FullSrcRegisters[0],
1703                        chan_index);
1704
1705      /* check if the component has not been already tested */
1706      if (uniquemask & (1 << swizzle))
1707         continue;
1708      uniquemask |= 1 << swizzle;
1709
1710      FETCH(&r[0], 0, chan_index);
1711      for (i = 0; i < 4; i++)
1712         if (r[0].f[i] < 0.0f)
1713            kilmask |= 1 << i;
1714   }
1715
1716   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1717}
1718
1719/**
1720 * Execute NVIDIA-style KIL which is predicated by a condition code.
1721 * Kill fragment if the condition code is TRUE.
1722 */
1723static void
1724exec_kilp(struct tgsi_exec_machine *mach,
1725          const struct tgsi_full_instruction *inst)
1726{
1727   uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1728
1729   if (inst->InstructionExtNv.CondFlowEnable) {
1730      uint swizzle[4];
1731      uint chan_index;
1732
1733      kilmask = 0x0;
1734
1735      swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1736      swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1737      swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1738      swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1739
1740      for (chan_index = 0; chan_index < 4; chan_index++)
1741      {
1742         uint i;
1743
1744         for (i = 0; i < 4; i++) {
1745            /* TODO: evaluate the condition code */
1746            if (0)
1747               kilmask |= 1 << i;
1748         }
1749      }
1750   }
1751   else {
1752      /* "unconditional" kil */
1753      kilmask = mach->ExecMask;
1754   }
1755   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1756}
1757
1758
1759/*
1760 * Fetch a four texture samples using STR texture coordinates.
1761 */
1762static void
1763fetch_texel( struct tgsi_sampler *sampler,
1764             const union tgsi_exec_channel *s,
1765             const union tgsi_exec_channel *t,
1766             const union tgsi_exec_channel *p,
1767             float lodbias,  /* XXX should be float[4] */
1768             union tgsi_exec_channel *r,
1769             union tgsi_exec_channel *g,
1770             union tgsi_exec_channel *b,
1771             union tgsi_exec_channel *a )
1772{
1773   uint j;
1774   float rgba[NUM_CHANNELS][QUAD_SIZE];
1775
1776   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1777
1778   for (j = 0; j < 4; j++) {
1779      r->f[j] = rgba[0][j];
1780      g->f[j] = rgba[1][j];
1781      b->f[j] = rgba[2][j];
1782      a->f[j] = rgba[3][j];
1783   }
1784}
1785
1786
1787static void
1788exec_tex(struct tgsi_exec_machine *mach,
1789         const struct tgsi_full_instruction *inst,
1790         boolean biasLod,
1791         boolean projected)
1792{
1793   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1794   union tgsi_exec_channel r[4];
1795   uint chan_index;
1796   float lodBias;
1797
1798   /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
1799
1800   switch (inst->InstructionExtTexture.Texture) {
1801   case TGSI_TEXTURE_1D:
1802   case TGSI_TEXTURE_SHADOW1D:
1803
1804      FETCH(&r[0], 0, CHAN_X);
1805
1806      if (projected) {
1807         FETCH(&r[1], 0, CHAN_W);
1808         micro_div( &r[0], &r[0], &r[1] );
1809      }
1810
1811      if (biasLod) {
1812         FETCH(&r[1], 0, CHAN_W);
1813         lodBias = r[2].f[0];
1814      }
1815      else
1816         lodBias = 0.0;
1817
1818      fetch_texel(mach->Samplers[unit],
1819                  &r[0], &ZeroVec, &ZeroVec, lodBias,  /* S, T, P, BIAS */
1820                  &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1821      break;
1822
1823   case TGSI_TEXTURE_2D:
1824   case TGSI_TEXTURE_RECT:
1825   case TGSI_TEXTURE_SHADOW2D:
1826   case TGSI_TEXTURE_SHADOWRECT:
1827
1828      FETCH(&r[0], 0, CHAN_X);
1829      FETCH(&r[1], 0, CHAN_Y);
1830      FETCH(&r[2], 0, CHAN_Z);
1831
1832      if (projected) {
1833         FETCH(&r[3], 0, CHAN_W);
1834         micro_div( &r[0], &r[0], &r[3] );
1835         micro_div( &r[1], &r[1], &r[3] );
1836         micro_div( &r[2], &r[2], &r[3] );
1837      }
1838
1839      if (biasLod) {
1840         FETCH(&r[3], 0, CHAN_W);
1841         lodBias = r[3].f[0];
1842      }
1843      else
1844         lodBias = 0.0;
1845
1846      fetch_texel(mach->Samplers[unit],
1847                  &r[0], &r[1], &r[2], lodBias,  /* inputs */
1848                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */
1849      break;
1850
1851   case TGSI_TEXTURE_3D:
1852   case TGSI_TEXTURE_CUBE:
1853
1854      FETCH(&r[0], 0, CHAN_X);
1855      FETCH(&r[1], 0, CHAN_Y);
1856      FETCH(&r[2], 0, CHAN_Z);
1857
1858      if (projected) {
1859         FETCH(&r[3], 0, CHAN_W);
1860         micro_div( &r[0], &r[0], &r[3] );
1861         micro_div( &r[1], &r[1], &r[3] );
1862         micro_div( &r[2], &r[2], &r[3] );
1863      }
1864
1865      if (biasLod) {
1866         FETCH(&r[3], 0, CHAN_W);
1867         lodBias = r[3].f[0];
1868      }
1869      else
1870         lodBias = 0.0;
1871
1872      fetch_texel(mach->Samplers[unit],
1873                  &r[0], &r[1], &r[2], lodBias,
1874                  &r[0], &r[1], &r[2], &r[3]);
1875      break;
1876
1877   default:
1878      assert (0);
1879   }
1880
1881   FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1882      STORE( &r[chan_index], 0, chan_index );
1883   }
1884}
1885
1886
1887/**
1888 * Evaluate a constant-valued coefficient at the position of the
1889 * current quad.
1890 */
1891static void
1892eval_constant_coef(
1893   struct tgsi_exec_machine *mach,
1894   unsigned attrib,
1895   unsigned chan )
1896{
1897   unsigned i;
1898
1899   for( i = 0; i < QUAD_SIZE; i++ ) {
1900      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1901   }
1902}
1903
1904/**
1905 * Evaluate a linear-valued coefficient at the position of the
1906 * current quad.
1907 */
1908static void
1909eval_linear_coef(
1910   struct tgsi_exec_machine *mach,
1911   unsigned attrib,
1912   unsigned chan )
1913{
1914   const float x = mach->QuadPos.xyzw[0].f[0];
1915   const float y = mach->QuadPos.xyzw[1].f[0];
1916   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1917   const float dady = mach->InterpCoefs[attrib].dady[chan];
1918   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1919   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1920   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1921   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1922   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1923}
1924
1925/**
1926 * Evaluate a perspective-valued coefficient at the position of the
1927 * current quad.
1928 */
1929static void
1930eval_perspective_coef(
1931   struct tgsi_exec_machine *mach,
1932   unsigned attrib,
1933   unsigned chan )
1934{
1935   const float x = mach->QuadPos.xyzw[0].f[0];
1936   const float y = mach->QuadPos.xyzw[1].f[0];
1937   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1938   const float dady = mach->InterpCoefs[attrib].dady[chan];
1939   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1940   const float *w = mach->QuadPos.xyzw[3].f;
1941   /* divide by W here */
1942   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1943   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1944   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1945   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1946}
1947
1948
1949typedef void (* eval_coef_func)(
1950   struct tgsi_exec_machine *mach,
1951   unsigned attrib,
1952   unsigned chan );
1953
1954static void
1955exec_declaration(
1956   struct tgsi_exec_machine *mach,
1957   const struct tgsi_full_declaration *decl )
1958{
1959   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1960      if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1961         unsigned first, last, mask;
1962         eval_coef_func eval;
1963
1964         first = decl->DeclarationRange.First;
1965         last = decl->DeclarationRange.Last;
1966         mask = decl->Declaration.UsageMask;
1967
1968         switch( decl->Declaration.Interpolate ) {
1969         case TGSI_INTERPOLATE_CONSTANT:
1970            eval = eval_constant_coef;
1971            break;
1972
1973         case TGSI_INTERPOLATE_LINEAR:
1974            eval = eval_linear_coef;
1975            break;
1976
1977         case TGSI_INTERPOLATE_PERSPECTIVE:
1978            eval = eval_perspective_coef;
1979            break;
1980
1981         default:
1982            eval = NULL;
1983            assert( 0 );
1984         }
1985
1986         if( mask == TGSI_WRITEMASK_XYZW ) {
1987            unsigned i, j;
1988
1989            for( i = first; i <= last; i++ ) {
1990               for( j = 0; j < NUM_CHANNELS; j++ ) {
1991                  eval( mach, i, j );
1992               }
1993            }
1994         }
1995         else {
1996            unsigned i, j;
1997
1998            for( j = 0; j < NUM_CHANNELS; j++ ) {
1999               if( mask & (1 << j) ) {
2000                  for( i = first; i <= last; i++ ) {
2001                     eval( mach, i, j );
2002                  }
2003               }
2004            }
2005         }
2006      }
2007   }
2008}
2009
2010static void
2011exec_instruction(
2012   struct tgsi_exec_machine *mach,
2013   const struct tgsi_full_instruction *inst,
2014   int *pc )
2015{
2016   uint chan_index;
2017   union tgsi_exec_channel r[10];
2018
2019   (*pc)++;
2020
2021   switch (inst->Instruction.Opcode) {
2022   case TGSI_OPCODE_ARL:
2023   case TGSI_OPCODE_FLR:
2024      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2025         FETCH( &r[0], 0, chan_index );
2026         micro_flr( &r[0], &r[0] );
2027         STORE( &r[0], 0, chan_index );
2028      }
2029      break;
2030
2031   case TGSI_OPCODE_MOV:
2032   case TGSI_OPCODE_SWZ:
2033      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2034         FETCH( &r[0], 0, chan_index );
2035         STORE( &r[0], 0, chan_index );
2036      }
2037      break;
2038
2039   case TGSI_OPCODE_LIT:
2040      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2041         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2042      }
2043
2044      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2045         FETCH( &r[0], 0, CHAN_X );
2046         if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2047            micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2048            STORE( &r[0], 0, CHAN_Y );
2049         }
2050
2051         if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2052            FETCH( &r[1], 0, CHAN_Y );
2053            micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2054
2055            FETCH( &r[2], 0, CHAN_W );
2056            micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
2057            micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
2058            micro_pow( &r[1], &r[1], &r[2] );
2059            micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2060            STORE( &r[0], 0, CHAN_Z );
2061         }
2062      }
2063
2064      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2065         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2066      }
2067      break;
2068
2069   case TGSI_OPCODE_RCP:
2070   /* TGSI_OPCODE_RECIP */
2071      FETCH( &r[0], 0, CHAN_X );
2072      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2073      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2074         STORE( &r[0], 0, chan_index );
2075      }
2076      break;
2077
2078   case TGSI_OPCODE_RSQ:
2079   /* TGSI_OPCODE_RECIPSQRT */
2080      FETCH( &r[0], 0, CHAN_X );
2081      micro_abs( &r[0], &r[0] );
2082      micro_sqrt( &r[0], &r[0] );
2083      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
2084      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2085         STORE( &r[0], 0, chan_index );
2086      }
2087      break;
2088
2089   case TGSI_OPCODE_EXP:
2090      FETCH( &r[0], 0, CHAN_X );
2091      micro_flr( &r[1], &r[0] );  /* r1 = floor(r0) */
2092      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2093         micro_exp2( &r[2], &r[1] );       /* r2 = 2 ^ r1 */
2094         STORE( &r[2], 0, CHAN_X );        /* store r2 */
2095      }
2096      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2097         micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
2098         STORE( &r[2], 0, CHAN_Y );        /* store r2 */
2099      }
2100      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2101         micro_exp2( &r[2], &r[0] );       /* r2 = 2 ^ r0 */
2102         STORE( &r[2], 0, CHAN_Z );        /* store r2 */
2103      }
2104      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2105         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2106      }
2107      break;
2108
2109   case TGSI_OPCODE_LOG:
2110      FETCH( &r[0], 0, CHAN_X );
2111      micro_abs( &r[2], &r[0] );  /* r2 = abs(r0) */
2112      micro_lg2( &r[1], &r[2] );  /* r1 = lg2(r2) */
2113      micro_flr( &r[0], &r[1] );  /* r0 = floor(r1) */
2114      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2115         STORE( &r[0], 0, CHAN_X );
2116      }
2117      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2118         micro_exp2( &r[0], &r[0] );       /* r0 = 2 ^ r0 */
2119         micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
2120         STORE( &r[0], 0, CHAN_Y );
2121      }
2122      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2123         STORE( &r[1], 0, CHAN_Z );
2124      }
2125      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2126         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2127      }
2128      break;
2129
2130   case TGSI_OPCODE_MUL:
2131      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
2132      {
2133         FETCH(&r[0], 0, chan_index);
2134         FETCH(&r[1], 1, chan_index);
2135
2136         micro_mul( &r[0], &r[0], &r[1] );
2137
2138         STORE(&r[0], 0, chan_index);
2139      }
2140      break;
2141
2142   case TGSI_OPCODE_ADD:
2143      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2144         FETCH( &r[0], 0, chan_index );
2145         FETCH( &r[1], 1, chan_index );
2146         micro_add( &r[0], &r[0], &r[1] );
2147         STORE( &r[0], 0, chan_index );
2148      }
2149      break;
2150
2151   case TGSI_OPCODE_DP3:
2152   /* TGSI_OPCODE_DOT3 */
2153      FETCH( &r[0], 0, CHAN_X );
2154      FETCH( &r[1], 1, CHAN_X );
2155      micro_mul( &r[0], &r[0], &r[1] );
2156
2157      FETCH( &r[1], 0, CHAN_Y );
2158      FETCH( &r[2], 1, CHAN_Y );
2159      micro_mul( &r[1], &r[1], &r[2] );
2160      micro_add( &r[0], &r[0], &r[1] );
2161
2162      FETCH( &r[1], 0, CHAN_Z );
2163      FETCH( &r[2], 1, CHAN_Z );
2164      micro_mul( &r[1], &r[1], &r[2] );
2165      micro_add( &r[0], &r[0], &r[1] );
2166
2167      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2168         STORE( &r[0], 0, chan_index );
2169      }
2170      break;
2171
2172    case TGSI_OPCODE_DP4:
2173    /* TGSI_OPCODE_DOT4 */
2174       FETCH(&r[0], 0, CHAN_X);
2175       FETCH(&r[1], 1, CHAN_X);
2176
2177       micro_mul( &r[0], &r[0], &r[1] );
2178
2179       FETCH(&r[1], 0, CHAN_Y);
2180       FETCH(&r[2], 1, CHAN_Y);
2181
2182       micro_mul( &r[1], &r[1], &r[2] );
2183       micro_add( &r[0], &r[0], &r[1] );
2184
2185       FETCH(&r[1], 0, CHAN_Z);
2186       FETCH(&r[2], 1, CHAN_Z);
2187
2188       micro_mul( &r[1], &r[1], &r[2] );
2189       micro_add( &r[0], &r[0], &r[1] );
2190
2191       FETCH(&r[1], 0, CHAN_W);
2192       FETCH(&r[2], 1, CHAN_W);
2193
2194       micro_mul( &r[1], &r[1], &r[2] );
2195       micro_add( &r[0], &r[0], &r[1] );
2196
2197      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2198         STORE( &r[0], 0, chan_index );
2199      }
2200      break;
2201
2202   case TGSI_OPCODE_DST:
2203      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2204         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2205      }
2206
2207      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2208         FETCH( &r[0], 0, CHAN_Y );
2209         FETCH( &r[1], 1, CHAN_Y);
2210         micro_mul( &r[0], &r[0], &r[1] );
2211         STORE( &r[0], 0, CHAN_Y );
2212      }
2213
2214      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2215         FETCH( &r[0], 0, CHAN_Z );
2216         STORE( &r[0], 0, CHAN_Z );
2217      }
2218
2219      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2220         FETCH( &r[0], 1, CHAN_W );
2221         STORE( &r[0], 0, CHAN_W );
2222      }
2223      break;
2224
2225   case TGSI_OPCODE_MIN:
2226      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2227         FETCH(&r[0], 0, chan_index);
2228         FETCH(&r[1], 1, chan_index);
2229
2230         /* XXX use micro_min()?? */
2231         micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
2232
2233         STORE(&r[0], 0, chan_index);
2234      }
2235      break;
2236
2237   case TGSI_OPCODE_MAX:
2238      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2239         FETCH(&r[0], 0, chan_index);
2240         FETCH(&r[1], 1, chan_index);
2241
2242         /* XXX use micro_max()?? */
2243         micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2244
2245         STORE(&r[0], 0, chan_index );
2246      }
2247      break;
2248
2249   case TGSI_OPCODE_SLT:
2250   /* TGSI_OPCODE_SETLT */
2251      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2252         FETCH( &r[0], 0, chan_index );
2253         FETCH( &r[1], 1, chan_index );
2254         micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2255         STORE( &r[0], 0, chan_index );
2256      }
2257      break;
2258
2259   case TGSI_OPCODE_SGE:
2260   /* TGSI_OPCODE_SETGE */
2261      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2262         FETCH( &r[0], 0, chan_index );
2263         FETCH( &r[1], 1, chan_index );
2264         micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2265         STORE( &r[0], 0, chan_index );
2266      }
2267      break;
2268
2269   case TGSI_OPCODE_MAD:
2270   /* TGSI_OPCODE_MADD */
2271      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2272         FETCH( &r[0], 0, chan_index );
2273         FETCH( &r[1], 1, chan_index );
2274         micro_mul( &r[0], &r[0], &r[1] );
2275         FETCH( &r[1], 2, chan_index );
2276         micro_add( &r[0], &r[0], &r[1] );
2277         STORE( &r[0], 0, chan_index );
2278      }
2279      break;
2280
2281   case TGSI_OPCODE_SUB:
2282      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2283         FETCH(&r[0], 0, chan_index);
2284         FETCH(&r[1], 1, chan_index);
2285
2286         micro_sub( &r[0], &r[0], &r[1] );
2287
2288         STORE(&r[0], 0, chan_index);
2289      }
2290      break;
2291
2292   case TGSI_OPCODE_LRP:
2293      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2294         FETCH(&r[0], 0, chan_index);
2295         FETCH(&r[1], 1, chan_index);
2296         FETCH(&r[2], 2, chan_index);
2297
2298         micro_sub( &r[1], &r[1], &r[2] );
2299         micro_mul( &r[0], &r[0], &r[1] );
2300         micro_add( &r[0], &r[0], &r[2] );
2301
2302         STORE(&r[0], 0, chan_index);
2303      }
2304      break;
2305
2306   case TGSI_OPCODE_CND:
2307      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2308         FETCH(&r[0], 0, chan_index);
2309         FETCH(&r[1], 1, chan_index);
2310         FETCH(&r[2], 2, chan_index);
2311         micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2312         STORE(&r[0], 0, chan_index);
2313      }
2314      break;
2315
2316   case TGSI_OPCODE_CND0:
2317      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2318         FETCH(&r[0], 0, chan_index);
2319         FETCH(&r[1], 1, chan_index);
2320         FETCH(&r[2], 2, chan_index);
2321         micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
2322         STORE(&r[0], 0, chan_index);
2323      }
2324      break;
2325
2326   case TGSI_OPCODE_DP2A:
2327      FETCH( &r[0], 0, CHAN_X );
2328      FETCH( &r[1], 1, CHAN_X );
2329      micro_mul( &r[0], &r[0], &r[1] );
2330
2331      FETCH( &r[1], 0, CHAN_Y );
2332      FETCH( &r[2], 1, CHAN_Y );
2333      micro_mul( &r[1], &r[1], &r[2] );
2334      micro_add( &r[0], &r[0], &r[1] );
2335
2336      FETCH( &r[2], 2, CHAN_X );
2337      micro_add( &r[0], &r[0], &r[2] );
2338
2339      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2340         STORE( &r[0], 0, chan_index );
2341      }
2342      break;
2343
2344   case TGSI_OPCODE_INDEX:
2345      /* XXX: considered for removal */
2346      assert (0);
2347      break;
2348
2349   case TGSI_OPCODE_NEGATE:
2350      /* XXX: considered for removal */
2351      assert (0);
2352      break;
2353
2354   case TGSI_OPCODE_FRC:
2355      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2356         FETCH( &r[0], 0, chan_index );
2357         micro_frc( &r[0], &r[0] );
2358         STORE( &r[0], 0, chan_index );
2359      }
2360      break;
2361
2362   case TGSI_OPCODE_CLAMP:
2363      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2364         FETCH(&r[0], 0, chan_index);
2365         FETCH(&r[1], 1, chan_index);
2366         micro_max(&r[0], &r[0], &r[1]);
2367         FETCH(&r[1], 2, chan_index);
2368         micro_min(&r[0], &r[0], &r[1]);
2369         STORE(&r[0], 0, chan_index);
2370      }
2371      break;
2372
2373   case TGSI_OPCODE_ROUND:
2374   case TGSI_OPCODE_ARR:
2375      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2376         FETCH( &r[0], 0, chan_index );
2377         micro_rnd( &r[0], &r[0] );
2378         STORE( &r[0], 0, chan_index );
2379      }
2380      break;
2381
2382   case TGSI_OPCODE_EX2:
2383      FETCH(&r[0], 0, CHAN_X);
2384
2385#if FAST_MATH
2386      micro_exp2( &r[0], &r[0] );
2387#else
2388      micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2389#endif
2390
2391      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2392         STORE( &r[0], 0, chan_index );
2393      }
2394      break;
2395
2396   case TGSI_OPCODE_LG2:
2397      FETCH( &r[0], 0, CHAN_X );
2398      micro_lg2( &r[0], &r[0] );
2399      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2400         STORE( &r[0], 0, chan_index );
2401      }
2402      break;
2403
2404   case TGSI_OPCODE_POW:
2405      FETCH(&r[0], 0, CHAN_X);
2406      FETCH(&r[1], 1, CHAN_X);
2407
2408      micro_pow( &r[0], &r[0], &r[1] );
2409
2410      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2411         STORE( &r[0], 0, chan_index );
2412      }
2413      break;
2414
2415   case TGSI_OPCODE_XPD:
2416      FETCH(&r[0], 0, CHAN_Y);
2417      FETCH(&r[1], 1, CHAN_Z);
2418
2419      micro_mul( &r[2], &r[0], &r[1] );
2420
2421      FETCH(&r[3], 0, CHAN_Z);
2422      FETCH(&r[4], 1, CHAN_Y);
2423
2424      micro_mul( &r[5], &r[3], &r[4] );
2425      micro_sub( &r[2], &r[2], &r[5] );
2426
2427      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2428         STORE( &r[2], 0, CHAN_X );
2429      }
2430
2431      FETCH(&r[2], 1, CHAN_X);
2432
2433      micro_mul( &r[3], &r[3], &r[2] );
2434
2435      FETCH(&r[5], 0, CHAN_X);
2436
2437      micro_mul( &r[1], &r[1], &r[5] );
2438      micro_sub( &r[3], &r[3], &r[1] );
2439
2440      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2441         STORE( &r[3], 0, CHAN_Y );
2442      }
2443
2444      micro_mul( &r[5], &r[5], &r[4] );
2445      micro_mul( &r[0], &r[0], &r[2] );
2446      micro_sub( &r[5], &r[5], &r[0] );
2447
2448      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2449         STORE( &r[5], 0, CHAN_Z );
2450      }
2451
2452      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2453         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2454      }
2455      break;
2456
2457    case TGSI_OPCODE_MULTIPLYMATRIX:
2458       /* XXX: considered for removal */
2459       assert (0);
2460       break;
2461
2462    case TGSI_OPCODE_ABS:
2463       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2464          FETCH(&r[0], 0, chan_index);
2465
2466          micro_abs( &r[0], &r[0] );
2467
2468          STORE(&r[0], 0, chan_index);
2469       }
2470       break;
2471
2472   case TGSI_OPCODE_RCC:
2473      FETCH(&r[0], 0, CHAN_X);
2474      micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2475      micro_float_clamp(&r[0], &r[0]);
2476      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2477         STORE(&r[0], 0, chan_index);
2478      }
2479      break;
2480
2481   case TGSI_OPCODE_DPH:
2482      FETCH(&r[0], 0, CHAN_X);
2483      FETCH(&r[1], 1, CHAN_X);
2484
2485      micro_mul( &r[0], &r[0], &r[1] );
2486
2487      FETCH(&r[1], 0, CHAN_Y);
2488      FETCH(&r[2], 1, CHAN_Y);
2489
2490      micro_mul( &r[1], &r[1], &r[2] );
2491      micro_add( &r[0], &r[0], &r[1] );
2492
2493      FETCH(&r[1], 0, CHAN_Z);
2494      FETCH(&r[2], 1, CHAN_Z);
2495
2496      micro_mul( &r[1], &r[1], &r[2] );
2497      micro_add( &r[0], &r[0], &r[1] );
2498
2499      FETCH(&r[1], 1, CHAN_W);
2500
2501      micro_add( &r[0], &r[0], &r[1] );
2502
2503      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2504         STORE( &r[0], 0, chan_index );
2505      }
2506      break;
2507
2508   case TGSI_OPCODE_COS:
2509      FETCH(&r[0], 0, CHAN_X);
2510
2511      micro_cos( &r[0], &r[0] );
2512
2513      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2514         STORE( &r[0], 0, chan_index );
2515      }
2516      break;
2517
2518   case TGSI_OPCODE_DDX:
2519      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2520         FETCH( &r[0], 0, chan_index );
2521         micro_ddx( &r[0], &r[0] );
2522         STORE( &r[0], 0, chan_index );
2523      }
2524      break;
2525
2526   case TGSI_OPCODE_DDY:
2527      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2528         FETCH( &r[0], 0, chan_index );
2529         micro_ddy( &r[0], &r[0] );
2530         STORE( &r[0], 0, chan_index );
2531      }
2532      break;
2533
2534   case TGSI_OPCODE_KILP:
2535      exec_kilp (mach, inst);
2536      break;
2537
2538   case TGSI_OPCODE_KIL:
2539      exec_kil (mach, inst);
2540      break;
2541
2542   case TGSI_OPCODE_PK2H:
2543      assert (0);
2544      break;
2545
2546   case TGSI_OPCODE_PK2US:
2547      assert (0);
2548      break;
2549
2550   case TGSI_OPCODE_PK4B:
2551      assert (0);
2552      break;
2553
2554   case TGSI_OPCODE_PK4UB:
2555      assert (0);
2556      break;
2557
2558   case TGSI_OPCODE_RFL:
2559      if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2560          IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2561          IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2562         /* r0 = dp3(src0, src0) */
2563         FETCH(&r[2], 0, CHAN_X);
2564         micro_mul(&r[0], &r[2], &r[2]);
2565         FETCH(&r[4], 0, CHAN_Y);
2566         micro_mul(&r[8], &r[4], &r[4]);
2567         micro_add(&r[0], &r[0], &r[8]);
2568         FETCH(&r[6], 0, CHAN_Z);
2569         micro_mul(&r[8], &r[6], &r[6]);
2570         micro_add(&r[0], &r[0], &r[8]);
2571
2572         /* r1 = dp3(src0, src1) */
2573         FETCH(&r[3], 1, CHAN_X);
2574         micro_mul(&r[1], &r[2], &r[3]);
2575         FETCH(&r[5], 1, CHAN_Y);
2576         micro_mul(&r[8], &r[4], &r[5]);
2577         micro_add(&r[1], &r[1], &r[8]);
2578         FETCH(&r[7], 1, CHAN_Z);
2579         micro_mul(&r[8], &r[6], &r[7]);
2580         micro_add(&r[1], &r[1], &r[8]);
2581
2582         /* r1 = 2 * r1 / r0 */
2583         micro_add(&r[1], &r[1], &r[1]);
2584         micro_div(&r[1], &r[1], &r[0]);
2585
2586         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2587            micro_mul(&r[2], &r[2], &r[1]);
2588            micro_sub(&r[2], &r[2], &r[3]);
2589            STORE(&r[2], 0, CHAN_X);
2590         }
2591         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2592            micro_mul(&r[4], &r[4], &r[1]);
2593            micro_sub(&r[4], &r[4], &r[5]);
2594            STORE(&r[4], 0, CHAN_Y);
2595         }
2596         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2597            micro_mul(&r[6], &r[6], &r[1]);
2598            micro_sub(&r[6], &r[6], &r[7]);
2599            STORE(&r[6], 0, CHAN_Z);
2600         }
2601      }
2602      if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2603         STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2604      }
2605      break;
2606
2607   case TGSI_OPCODE_SEQ:
2608      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2609         FETCH( &r[0], 0, chan_index );
2610         FETCH( &r[1], 1, chan_index );
2611         micro_eq( &r[0], &r[0], &r[1],
2612                   &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2613                   &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2614         STORE( &r[0], 0, chan_index );
2615      }
2616      break;
2617
2618   case TGSI_OPCODE_SFL:
2619      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2620         STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2621      }
2622      break;
2623
2624   case TGSI_OPCODE_SGT:
2625      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2626         FETCH( &r[0], 0, chan_index );
2627         FETCH( &r[1], 1, chan_index );
2628         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2629         STORE( &r[0], 0, chan_index );
2630      }
2631      break;
2632
2633   case TGSI_OPCODE_SIN:
2634      FETCH( &r[0], 0, CHAN_X );
2635      micro_sin( &r[0], &r[0] );
2636      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2637         STORE( &r[0], 0, chan_index );
2638      }
2639      break;
2640
2641   case TGSI_OPCODE_SLE:
2642      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2643         FETCH( &r[0], 0, chan_index );
2644         FETCH( &r[1], 1, chan_index );
2645         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2646         STORE( &r[0], 0, chan_index );
2647      }
2648      break;
2649
2650   case TGSI_OPCODE_SNE:
2651      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2652         FETCH( &r[0], 0, chan_index );
2653         FETCH( &r[1], 1, chan_index );
2654         micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2655         STORE( &r[0], 0, chan_index );
2656      }
2657      break;
2658
2659   case TGSI_OPCODE_STR:
2660      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2661         STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2662      }
2663      break;
2664
2665   case TGSI_OPCODE_TEX:
2666      /* simple texture lookup */
2667      /* src[0] = texcoord */
2668      /* src[1] = sampler unit */
2669      exec_tex(mach, inst, FALSE, FALSE);
2670      break;
2671
2672   case TGSI_OPCODE_TXB:
2673      /* Texture lookup with lod bias */
2674      /* src[0] = texcoord (src[0].w = LOD bias) */
2675      /* src[1] = sampler unit */
2676      exec_tex(mach, inst, TRUE, FALSE);
2677      break;
2678
2679   case TGSI_OPCODE_TXD:
2680      /* Texture lookup with explict partial derivatives */
2681      /* src[0] = texcoord */
2682      /* src[1] = d[strq]/dx */
2683      /* src[2] = d[strq]/dy */
2684      /* src[3] = sampler unit */
2685      assert (0);
2686      break;
2687
2688   case TGSI_OPCODE_TXL:
2689      /* Texture lookup with explit LOD */
2690      /* src[0] = texcoord (src[0].w = LOD) */
2691      /* src[1] = sampler unit */
2692      exec_tex(mach, inst, TRUE, FALSE);
2693      break;
2694
2695   case TGSI_OPCODE_TXP:
2696      /* Texture lookup with projection */
2697      /* src[0] = texcoord (src[0].w = projection) */
2698      /* src[1] = sampler unit */
2699      exec_tex(mach, inst, FALSE, TRUE);
2700      break;
2701
2702   case TGSI_OPCODE_UP2H:
2703      assert (0);
2704      break;
2705
2706   case TGSI_OPCODE_UP2US:
2707      assert (0);
2708      break;
2709
2710   case TGSI_OPCODE_UP4B:
2711      assert (0);
2712      break;
2713
2714   case TGSI_OPCODE_UP4UB:
2715      assert (0);
2716      break;
2717
2718   case TGSI_OPCODE_X2D:
2719      FETCH(&r[0], 1, CHAN_X);
2720      FETCH(&r[1], 1, CHAN_Y);
2721      if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2722          IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2723         FETCH(&r[2], 2, CHAN_X);
2724         micro_mul(&r[2], &r[2], &r[0]);
2725         FETCH(&r[3], 2, CHAN_Y);
2726         micro_mul(&r[3], &r[3], &r[1]);
2727         micro_add(&r[2], &r[2], &r[3]);
2728         FETCH(&r[3], 0, CHAN_X);
2729         micro_add(&r[2], &r[2], &r[3]);
2730         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2731            STORE(&r[2], 0, CHAN_X);
2732         }
2733         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2734            STORE(&r[2], 0, CHAN_Z);
2735         }
2736      }
2737      if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2738          IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2739         FETCH(&r[2], 2, CHAN_Z);
2740         micro_mul(&r[2], &r[2], &r[0]);
2741         FETCH(&r[3], 2, CHAN_W);
2742         micro_mul(&r[3], &r[3], &r[1]);
2743         micro_add(&r[2], &r[2], &r[3]);
2744         FETCH(&r[3], 0, CHAN_Y);
2745         micro_add(&r[2], &r[2], &r[3]);
2746         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2747            STORE(&r[2], 0, CHAN_Y);
2748         }
2749         if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2750            STORE(&r[2], 0, CHAN_W);
2751         }
2752      }
2753      break;
2754
2755   case TGSI_OPCODE_ARA:
2756      assert (0);
2757      break;
2758
2759   case TGSI_OPCODE_BRA:
2760      assert (0);
2761      break;
2762
2763   case TGSI_OPCODE_CAL:
2764      /* skip the call if no execution channels are enabled */
2765      if (mach->ExecMask) {
2766         /* do the call */
2767
2768         /* push the Cond, Loop, Cont stacks */
2769         assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2770         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2771         assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2772         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2773         assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2774         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2775
2776         assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2777         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2778
2779         /* note that PC was already incremented above */
2780         mach->CallStack[mach->CallStackTop++] = *pc;
2781         *pc = inst->InstructionExtLabel.Label;
2782      }
2783      break;
2784
2785   case TGSI_OPCODE_RET:
2786      mach->FuncMask &= ~mach->ExecMask;
2787      UPDATE_EXEC_MASK(mach);
2788
2789      if (mach->FuncMask == 0x0) {
2790         /* really return now (otherwise, keep executing */
2791
2792         if (mach->CallStackTop == 0) {
2793            /* returning from main() */
2794            *pc = -1;
2795            return;
2796         }
2797         *pc = mach->CallStack[--mach->CallStackTop];
2798
2799         /* pop the Cond, Loop, Cont stacks */
2800         assert(mach->CondStackTop > 0);
2801         mach->CondMask = mach->CondStack[--mach->CondStackTop];
2802         assert(mach->LoopStackTop > 0);
2803         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2804         assert(mach->ContStackTop > 0);
2805         mach->ContMask = mach->ContStack[--mach->ContStackTop];
2806         assert(mach->FuncStackTop > 0);
2807         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2808
2809         UPDATE_EXEC_MASK(mach);
2810      }
2811      break;
2812
2813   case TGSI_OPCODE_SSG:
2814   /* TGSI_OPCODE_SGN */
2815      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2816         FETCH( &r[0], 0, chan_index );
2817         micro_sgn( &r[0], &r[0] );
2818         STORE( &r[0], 0, chan_index );
2819      }
2820      break;
2821
2822   case TGSI_OPCODE_CMP:
2823      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2824         FETCH(&r[0], 0, chan_index);
2825         FETCH(&r[1], 1, chan_index);
2826         FETCH(&r[2], 2, chan_index);
2827
2828         micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2829
2830         STORE(&r[0], 0, chan_index);
2831      }
2832      break;
2833
2834   case TGSI_OPCODE_SCS:
2835      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2836         FETCH( &r[0], 0, CHAN_X );
2837         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2838            micro_cos(&r[1], &r[0]);
2839            STORE(&r[1], 0, CHAN_X);
2840         }
2841         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2842            micro_sin(&r[1], &r[0]);
2843            STORE(&r[1], 0, CHAN_Y);
2844         }
2845      }
2846      if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2847         STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2848      }
2849      if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2850         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2851      }
2852      break;
2853
2854   case TGSI_OPCODE_NRM:
2855      /* 3-component vector normalize */
2856      if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2857         IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2858         IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2859         /* r3 = sqrt(dp3(src0, src0)) */
2860         FETCH(&r[0], 0, CHAN_X);
2861         micro_mul(&r[3], &r[0], &r[0]);
2862         FETCH(&r[1], 0, CHAN_Y);
2863         micro_mul(&r[4], &r[1], &r[1]);
2864         micro_add(&r[3], &r[3], &r[4]);
2865         FETCH(&r[2], 0, CHAN_Z);
2866         micro_mul(&r[4], &r[2], &r[2]);
2867         micro_add(&r[3], &r[3], &r[4]);
2868         micro_sqrt(&r[3], &r[3]);
2869
2870         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2871            micro_div(&r[0], &r[0], &r[3]);
2872            STORE(&r[0], 0, CHAN_X);
2873         }
2874         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2875            micro_div(&r[1], &r[1], &r[3]);
2876            STORE(&r[1], 0, CHAN_Y);
2877         }
2878         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2879            micro_div(&r[2], &r[2], &r[3]);
2880            STORE(&r[2], 0, CHAN_Z);
2881         }
2882      }
2883      if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2884         STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2885      }
2886      break;
2887
2888   case TGSI_OPCODE_NRM4:
2889      /* 4-component vector normalize */
2890      {
2891         union tgsi_exec_channel tmp, dot;
2892
2893         /* tmp = dp4(src0, src0): */
2894         FETCH( &r[0], 0, CHAN_X );
2895         micro_mul( &tmp, &r[0], &r[0] );
2896
2897         FETCH( &r[1], 0, CHAN_Y );
2898         micro_mul( &dot, &r[1], &r[1] );
2899         micro_add( &tmp, &tmp, &dot );
2900
2901         FETCH( &r[2], 0, CHAN_Z );
2902         micro_mul( &dot, &r[2], &r[2] );
2903         micro_add( &tmp, &tmp, &dot );
2904
2905         FETCH( &r[3], 0, CHAN_W );
2906         micro_mul( &dot, &r[3], &r[3] );
2907         micro_add( &tmp, &tmp, &dot );
2908
2909         /* tmp = 1 / sqrt(tmp) */
2910         micro_sqrt( &tmp, &tmp );
2911         micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2912
2913         FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2914            /* chan = chan * tmp */
2915            micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2916            STORE( &r[chan_index], 0, chan_index );
2917         }
2918      }
2919      break;
2920
2921   case TGSI_OPCODE_DIV:
2922      assert( 0 );
2923      break;
2924
2925   case TGSI_OPCODE_DP2:
2926      FETCH( &r[0], 0, CHAN_X );
2927      FETCH( &r[1], 1, CHAN_X );
2928      micro_mul( &r[0], &r[0], &r[1] );
2929
2930      FETCH( &r[1], 0, CHAN_Y );
2931      FETCH( &r[2], 1, CHAN_Y );
2932      micro_mul( &r[1], &r[1], &r[2] );
2933      micro_add( &r[0], &r[0], &r[1] );
2934
2935      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2936         STORE( &r[0], 0, chan_index );
2937      }
2938      break;
2939
2940   case TGSI_OPCODE_IF:
2941      /* push CondMask */
2942      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2943      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2944      FETCH( &r[0], 0, CHAN_X );
2945      /* update CondMask */
2946      if( ! r[0].u[0] ) {
2947         mach->CondMask &= ~0x1;
2948      }
2949      if( ! r[0].u[1] ) {
2950         mach->CondMask &= ~0x2;
2951      }
2952      if( ! r[0].u[2] ) {
2953         mach->CondMask &= ~0x4;
2954      }
2955      if( ! r[0].u[3] ) {
2956         mach->CondMask &= ~0x8;
2957      }
2958      UPDATE_EXEC_MASK(mach);
2959      /* Todo: If CondMask==0, jump to ELSE */
2960      break;
2961
2962   case TGSI_OPCODE_ELSE:
2963      /* invert CondMask wrt previous mask */
2964      {
2965         uint prevMask;
2966         assert(mach->CondStackTop > 0);
2967         prevMask = mach->CondStack[mach->CondStackTop - 1];
2968         mach->CondMask = ~mach->CondMask & prevMask;
2969         UPDATE_EXEC_MASK(mach);
2970         /* Todo: If CondMask==0, jump to ENDIF */
2971      }
2972      break;
2973
2974   case TGSI_OPCODE_ENDIF:
2975      /* pop CondMask */
2976      assert(mach->CondStackTop > 0);
2977      mach->CondMask = mach->CondStack[--mach->CondStackTop];
2978      UPDATE_EXEC_MASK(mach);
2979      break;
2980
2981   case TGSI_OPCODE_END:
2982      /* halt execution */
2983      *pc = -1;
2984      break;
2985
2986   case TGSI_OPCODE_REP:
2987      assert (0);
2988      break;
2989
2990   case TGSI_OPCODE_ENDREP:
2991       assert (0);
2992       break;
2993
2994   case TGSI_OPCODE_PUSHA:
2995      assert (0);
2996      break;
2997
2998   case TGSI_OPCODE_POPA:
2999      assert (0);
3000      break;
3001
3002   case TGSI_OPCODE_CEIL:
3003      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3004         FETCH( &r[0], 0, chan_index );
3005         micro_ceil( &r[0], &r[0] );
3006         STORE( &r[0], 0, chan_index );
3007      }
3008      break;
3009
3010   case TGSI_OPCODE_I2F:
3011      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3012         FETCH( &r[0], 0, chan_index );
3013         micro_i2f( &r[0], &r[0] );
3014         STORE( &r[0], 0, chan_index );
3015      }
3016      break;
3017
3018   case TGSI_OPCODE_NOT:
3019      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3020         FETCH( &r[0], 0, chan_index );
3021         micro_not( &r[0], &r[0] );
3022         STORE( &r[0], 0, chan_index );
3023      }
3024      break;
3025
3026   case TGSI_OPCODE_TRUNC:
3027      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3028         FETCH( &r[0], 0, chan_index );
3029         micro_trunc( &r[0], &r[0] );
3030         STORE( &r[0], 0, chan_index );
3031      }
3032      break;
3033
3034   case TGSI_OPCODE_SHL:
3035      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3036         FETCH( &r[0], 0, chan_index );
3037         FETCH( &r[1], 1, chan_index );
3038         micro_shl( &r[0], &r[0], &r[1] );
3039         STORE( &r[0], 0, chan_index );
3040      }
3041      break;
3042
3043   case TGSI_OPCODE_SHR:
3044      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3045         FETCH( &r[0], 0, chan_index );
3046         FETCH( &r[1], 1, chan_index );
3047         micro_ishr( &r[0], &r[0], &r[1] );
3048         STORE( &r[0], 0, chan_index );
3049      }
3050      break;
3051
3052   case TGSI_OPCODE_AND:
3053      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3054         FETCH( &r[0], 0, chan_index );
3055         FETCH( &r[1], 1, chan_index );
3056         micro_and( &r[0], &r[0], &r[1] );
3057         STORE( &r[0], 0, chan_index );
3058      }
3059      break;
3060
3061   case TGSI_OPCODE_OR:
3062      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3063         FETCH( &r[0], 0, chan_index );
3064         FETCH( &r[1], 1, chan_index );
3065         micro_or( &r[0], &r[0], &r[1] );
3066         STORE( &r[0], 0, chan_index );
3067      }
3068      break;
3069
3070   case TGSI_OPCODE_MOD:
3071      assert (0);
3072      break;
3073
3074   case TGSI_OPCODE_XOR:
3075      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
3076         FETCH( &r[0], 0, chan_index );
3077         FETCH( &r[1], 1, chan_index );
3078         micro_xor( &r[0], &r[0], &r[1] );
3079         STORE( &r[0], 0, chan_index );
3080      }
3081      break;
3082
3083   case TGSI_OPCODE_SAD:
3084      assert (0);
3085      break;
3086
3087   case TGSI_OPCODE_TXF:
3088      assert (0);
3089      break;
3090
3091   case TGSI_OPCODE_TXQ:
3092      assert (0);
3093      break;
3094
3095   case TGSI_OPCODE_EMIT:
3096      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
3097      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
3098      break;
3099
3100   case TGSI_OPCODE_ENDPRIM:
3101      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
3102      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
3103      break;
3104
3105   case TGSI_OPCODE_LOOP:
3106      /* fall-through (for now) */
3107   case TGSI_OPCODE_BGNLOOP2:
3108      /* push LoopMask and ContMasks */
3109      assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3110      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3111      assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3112      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3113      break;
3114
3115   case TGSI_OPCODE_ENDLOOP:
3116      /* fall-through (for now at least) */
3117   case TGSI_OPCODE_ENDLOOP2:
3118      /* Restore ContMask, but don't pop */
3119      assert(mach->ContStackTop > 0);
3120      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
3121      UPDATE_EXEC_MASK(mach);
3122      if (mach->ExecMask) {
3123         /* repeat loop: jump to instruction just past BGNLOOP */
3124         *pc = inst->InstructionExtLabel.Label + 1;
3125      }
3126      else {
3127         /* exit loop: pop LoopMask */
3128         assert(mach->LoopStackTop > 0);
3129         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
3130         /* pop ContMask */
3131         assert(mach->ContStackTop > 0);
3132         mach->ContMask = mach->ContStack[--mach->ContStackTop];
3133      }
3134      UPDATE_EXEC_MASK(mach);
3135      break;
3136
3137   case TGSI_OPCODE_BRK:
3138      /* turn off loop channels for each enabled exec channel */
3139      mach->LoopMask &= ~mach->ExecMask;
3140      /* Todo: if mach->LoopMask == 0, jump to end of loop */
3141      UPDATE_EXEC_MASK(mach);
3142      break;
3143
3144   case TGSI_OPCODE_CONT:
3145      /* turn off cont channels for each enabled exec channel */
3146      mach->ContMask &= ~mach->ExecMask;
3147      /* Todo: if mach->LoopMask == 0, jump to end of loop */
3148      UPDATE_EXEC_MASK(mach);
3149      break;
3150
3151   case TGSI_OPCODE_BGNSUB:
3152      /* no-op */
3153      break;
3154
3155   case TGSI_OPCODE_ENDSUB:
3156      /* no-op */
3157      break;
3158
3159   case TGSI_OPCODE_NOISE1:
3160      assert( 0 );
3161      break;
3162
3163   case TGSI_OPCODE_NOISE2:
3164      assert( 0 );
3165      break;
3166
3167   case TGSI_OPCODE_NOISE3:
3168      assert( 0 );
3169      break;
3170
3171   case TGSI_OPCODE_NOISE4:
3172      assert( 0 );
3173      break;
3174
3175   case TGSI_OPCODE_NOP:
3176      break;
3177
3178   default:
3179      assert( 0 );
3180   }
3181}
3182
3183
3184/**
3185 * Run TGSI interpreter.
3186 * \return bitmask of "alive" quad components
3187 */
3188uint
3189tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3190{
3191   uint i;
3192   int pc = 0;
3193
3194   mach->CondMask = 0xf;
3195   mach->LoopMask = 0xf;
3196   mach->ContMask = 0xf;
3197   mach->FuncMask = 0xf;
3198   mach->ExecMask = 0xf;
3199
3200   mach->CondStackTop = 0; /* temporarily subvert this assertion */
3201   assert(mach->CondStackTop == 0);
3202   assert(mach->LoopStackTop == 0);
3203   assert(mach->ContStackTop == 0);
3204   assert(mach->CallStackTop == 0);
3205
3206   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3207   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3208
3209   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3210      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3211      mach->Primitives[0] = 0;
3212   }
3213
3214   for (i = 0; i < QUAD_SIZE; i++) {
3215      mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3216         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3217         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3218         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3219         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3220   }
3221
3222   /* execute declarations (interpolants) */
3223   for (i = 0; i < mach->NumDeclarations; i++) {
3224      exec_declaration( mach, mach->Declarations+i );
3225   }
3226
3227   /* execute instructions, until pc is set to -1 */
3228   while (pc != -1) {
3229      assert(pc < (int) mach->NumInstructions);
3230      exec_instruction( mach, mach->Instructions + pc, &pc );
3231   }
3232
3233#if 0
3234   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3235   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3236      /*
3237       * Scale back depth component.
3238       */
3239      for (i = 0; i < 4; i++)
3240         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3241   }
3242#endif
3243
3244   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3245}
3246