tgsi_exec.c revision 50f78fcc2e3da24fa6dc076f0985355b3f64e9fd
1/**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers.  This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 *   Michal Krol
50 *   Brian Paul
51 */
52
53#include "pipe/p_compiler.h"
54#include "pipe/p_state.h"
55#include "pipe/p_shader_tokens.h"
56#include "tgsi/tgsi_parse.h"
57#include "tgsi/tgsi_util.h"
58#include "tgsi_exec.h"
59#include "util/u_memory.h"
60#include "util/u_math.h"
61
62#define FAST_MATH 1
63
64#define TILE_TOP_LEFT     0
65#define TILE_TOP_RIGHT    1
66#define TILE_BOTTOM_LEFT  2
67#define TILE_BOTTOM_RIGHT 3
68
69#define CHAN_X  0
70#define CHAN_Y  1
71#define CHAN_Z  2
72#define CHAN_W  3
73
74/*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77#define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
78#define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
79#define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
80#define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
81#define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
82#define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
83#define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
84#define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
85#define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
86#define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
87#define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
88#define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
89#define TEMP_128_I         TGSI_EXEC_TEMP_128_I
90#define TEMP_128_C         TGSI_EXEC_TEMP_128_C
91#define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
92#define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
93#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
94#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
95#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
96#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
97#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
98#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
99#define TEMP_CC_I          TGSI_EXEC_TEMP_CC_I
100#define TEMP_CC_C          TGSI_EXEC_TEMP_CC_C
101#define TEMP_3_I           TGSI_EXEC_TEMP_THREE_I
102#define TEMP_3_C           TGSI_EXEC_TEMP_THREE_C
103#define TEMP_HALF_I        TGSI_EXEC_TEMP_HALF_I
104#define TEMP_HALF_C        TGSI_EXEC_TEMP_HALF_C
105#define TEMP_R0            TGSI_EXEC_TEMP_R0
106
107#define IS_CHANNEL_ENABLED(INST, CHAN)\
108   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110#define IS_CHANNEL_ENABLED2(INST, CHAN)\
111   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115      if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119      if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122/** The execution mask depends on the conditional mask and the loop mask */
123#define UPDATE_EXEC_MASK(MACH) \
124      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126/**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131void
132tgsi_exec_machine_bind_shader(
133   struct tgsi_exec_machine *mach,
134   const struct tgsi_token *tokens,
135   uint numSamplers,
136   struct tgsi_sampler *samplers)
137{
138   uint k;
139   struct tgsi_parse_context parse;
140   struct tgsi_exec_labels *labels = &mach->Labels;
141   struct tgsi_full_instruction *instructions;
142   struct tgsi_full_declaration *declarations;
143   uint maxInstructions = 10, numInstructions = 0;
144   uint maxDeclarations = 10, numDeclarations = 0;
145   uint instno = 0;
146
147#if 0
148   tgsi_dump(tokens, 0);
149#endif
150
151   util_init_math();
152
153   mach->Tokens = tokens;
154   mach->Samplers = samplers;
155
156   k = tgsi_parse_init (&parse, mach->Tokens);
157   if (k != TGSI_PARSE_OK) {
158      debug_printf( "Problem parsing!\n" );
159      return;
160   }
161
162   mach->Processor = parse.FullHeader.Processor.Processor;
163   mach->ImmLimit = 0;
164   labels->count = 0;
165
166   declarations = (struct tgsi_full_declaration *)
167      MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169   if (!declarations) {
170      return;
171   }
172
173   instructions = (struct tgsi_full_instruction *)
174      MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176   if (!instructions) {
177      FREE( declarations );
178      return;
179   }
180
181   while( !tgsi_parse_end_of_tokens( &parse ) ) {
182      uint pointer = parse.Position;
183      uint i;
184
185      tgsi_parse_token( &parse );
186      switch( parse.FullToken.Token.Type ) {
187      case TGSI_TOKEN_TYPE_DECLARATION:
188         /* save expanded declaration */
189         if (numDeclarations == maxDeclarations) {
190            declarations = REALLOC(declarations,
191                                   maxDeclarations
192                                   * sizeof(struct tgsi_full_declaration),
193                                   (maxDeclarations + 10)
194                                   * sizeof(struct tgsi_full_declaration));
195            maxDeclarations += 10;
196         }
197         memcpy(declarations + numDeclarations,
198                &parse.FullToken.FullDeclaration,
199                sizeof(declarations[0]));
200         numDeclarations++;
201         break;
202
203      case TGSI_TOKEN_TYPE_IMMEDIATE:
204         {
205            uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
206            assert( size % 4 == 0 );
207            assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209            for( i = 0; i < size; i++ ) {
210               mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212            }
213            mach->ImmLimit += size / 4;
214         }
215         break;
216
217      case TGSI_TOKEN_TYPE_INSTRUCTION:
218         assert( labels->count < MAX_LABELS );
219
220         labels->labels[labels->count][0] = instno;
221         labels->labels[labels->count][1] = pointer;
222         labels->count++;
223
224         /* save expanded instruction */
225         if (numInstructions == maxInstructions) {
226            instructions = REALLOC(instructions,
227                                   maxInstructions
228                                   * sizeof(struct tgsi_full_instruction),
229                                   (maxInstructions + 10)
230                                   * sizeof(struct tgsi_full_instruction));
231            maxInstructions += 10;
232         }
233         memcpy(instructions + numInstructions,
234                &parse.FullToken.FullInstruction,
235                sizeof(instructions[0]));
236         numInstructions++;
237         break;
238
239      default:
240         assert( 0 );
241      }
242   }
243   tgsi_parse_free (&parse);
244
245   if (mach->Declarations) {
246      FREE( mach->Declarations );
247   }
248   mach->Declarations = declarations;
249   mach->NumDeclarations = numDeclarations;
250
251   if (mach->Instructions) {
252      FREE( mach->Instructions );
253   }
254   mach->Instructions = instructions;
255   mach->NumInstructions = numInstructions;
256}
257
258
259void
260tgsi_exec_machine_init(
261   struct tgsi_exec_machine *mach )
262{
263   uint i;
264
265   mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266   mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268   /* Setup constants. */
269   for( i = 0; i < 4; i++ ) {
270      mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271      mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272      mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273      mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274      mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275      mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276      mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277      mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278      mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279      mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280   }
281}
282
283
284void
285tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286{
287   if (mach->Instructions) {
288      FREE(mach->Instructions);
289      mach->Instructions = NULL;
290      mach->NumInstructions = 0;
291   }
292   if (mach->Declarations) {
293      FREE(mach->Declarations);
294      mach->Declarations = NULL;
295      mach->NumDeclarations = 0;
296   }
297}
298
299
300static void
301micro_abs(
302   union tgsi_exec_channel *dst,
303   const union tgsi_exec_channel *src )
304{
305   dst->f[0] = fabsf( src->f[0] );
306   dst->f[1] = fabsf( src->f[1] );
307   dst->f[2] = fabsf( src->f[2] );
308   dst->f[3] = fabsf( src->f[3] );
309}
310
311static void
312micro_add(
313   union tgsi_exec_channel *dst,
314   const union tgsi_exec_channel *src0,
315   const union tgsi_exec_channel *src1 )
316{
317   dst->f[0] = src0->f[0] + src1->f[0];
318   dst->f[1] = src0->f[1] + src1->f[1];
319   dst->f[2] = src0->f[2] + src1->f[2];
320   dst->f[3] = src0->f[3] + src1->f[3];
321}
322
323static void
324micro_iadd(
325   union tgsi_exec_channel *dst,
326   const union tgsi_exec_channel *src0,
327   const union tgsi_exec_channel *src1 )
328{
329   dst->i[0] = src0->i[0] + src1->i[0];
330   dst->i[1] = src0->i[1] + src1->i[1];
331   dst->i[2] = src0->i[2] + src1->i[2];
332   dst->i[3] = src0->i[3] + src1->i[3];
333}
334
335static void
336micro_and(
337   union tgsi_exec_channel *dst,
338   const union tgsi_exec_channel *src0,
339   const union tgsi_exec_channel *src1 )
340{
341   dst->u[0] = src0->u[0] & src1->u[0];
342   dst->u[1] = src0->u[1] & src1->u[1];
343   dst->u[2] = src0->u[2] & src1->u[2];
344   dst->u[3] = src0->u[3] & src1->u[3];
345}
346
347static void
348micro_ceil(
349   union tgsi_exec_channel *dst,
350   const union tgsi_exec_channel *src )
351{
352   dst->f[0] = ceilf( src->f[0] );
353   dst->f[1] = ceilf( src->f[1] );
354   dst->f[2] = ceilf( src->f[2] );
355   dst->f[3] = ceilf( src->f[3] );
356}
357
358static void
359micro_cos(
360   union tgsi_exec_channel *dst,
361   const union tgsi_exec_channel *src )
362{
363   dst->f[0] = cosf( src->f[0] );
364   dst->f[1] = cosf( src->f[1] );
365   dst->f[2] = cosf( src->f[2] );
366   dst->f[3] = cosf( src->f[3] );
367}
368
369static void
370micro_ddx(
371   union tgsi_exec_channel *dst,
372   const union tgsi_exec_channel *src )
373{
374   dst->f[0] =
375   dst->f[1] =
376   dst->f[2] =
377   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
378}
379
380static void
381micro_ddy(
382   union tgsi_exec_channel *dst,
383   const union tgsi_exec_channel *src )
384{
385   dst->f[0] =
386   dst->f[1] =
387   dst->f[2] =
388   dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
389}
390
391static void
392micro_div(
393   union tgsi_exec_channel *dst,
394   const union tgsi_exec_channel *src0,
395   const union tgsi_exec_channel *src1 )
396{
397   if (src1->f[0] != 0) {
398      dst->f[0] = src0->f[0] / src1->f[0];
399   }
400   if (src1->f[1] != 0) {
401      dst->f[1] = src0->f[1] / src1->f[1];
402   }
403   if (src1->f[2] != 0) {
404      dst->f[2] = src0->f[2] / src1->f[2];
405   }
406   if (src1->f[3] != 0) {
407      dst->f[3] = src0->f[3] / src1->f[3];
408   }
409}
410
411static void
412micro_udiv(
413   union tgsi_exec_channel *dst,
414   const union tgsi_exec_channel *src0,
415   const union tgsi_exec_channel *src1 )
416{
417   dst->u[0] = src0->u[0] / src1->u[0];
418   dst->u[1] = src0->u[1] / src1->u[1];
419   dst->u[2] = src0->u[2] / src1->u[2];
420   dst->u[3] = src0->u[3] / src1->u[3];
421}
422
423static void
424micro_eq(
425   union tgsi_exec_channel *dst,
426   const union tgsi_exec_channel *src0,
427   const union tgsi_exec_channel *src1,
428   const union tgsi_exec_channel *src2,
429   const union tgsi_exec_channel *src3 )
430{
431   dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
432   dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
433   dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
434   dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
435}
436
437static void
438micro_ieq(
439   union tgsi_exec_channel *dst,
440   const union tgsi_exec_channel *src0,
441   const union tgsi_exec_channel *src1,
442   const union tgsi_exec_channel *src2,
443   const union tgsi_exec_channel *src3 )
444{
445   dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
446   dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
447   dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
448   dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
449}
450
451static void
452micro_exp2(
453   union tgsi_exec_channel *dst,
454   const union tgsi_exec_channel *src)
455{
456#if FAST_MATH
457   dst->f[0] = util_fast_exp2( src->f[0] );
458   dst->f[1] = util_fast_exp2( src->f[1] );
459   dst->f[2] = util_fast_exp2( src->f[2] );
460   dst->f[3] = util_fast_exp2( src->f[3] );
461#else
462   dst->f[0] = powf( 2.0f, src->f[0] );
463   dst->f[1] = powf( 2.0f, src->f[1] );
464   dst->f[2] = powf( 2.0f, src->f[2] );
465   dst->f[3] = powf( 2.0f, src->f[3] );
466#endif
467}
468
469static void
470micro_f2it(
471   union tgsi_exec_channel *dst,
472   const union tgsi_exec_channel *src )
473{
474   dst->i[0] = (int) src->f[0];
475   dst->i[1] = (int) src->f[1];
476   dst->i[2] = (int) src->f[2];
477   dst->i[3] = (int) src->f[3];
478}
479
480static void
481micro_f2ut(
482   union tgsi_exec_channel *dst,
483   const union tgsi_exec_channel *src )
484{
485   dst->u[0] = (uint) src->f[0];
486   dst->u[1] = (uint) src->f[1];
487   dst->u[2] = (uint) src->f[2];
488   dst->u[3] = (uint) src->f[3];
489}
490
491static void
492micro_flr(
493   union tgsi_exec_channel *dst,
494   const union tgsi_exec_channel *src )
495{
496   dst->f[0] = floorf( src->f[0] );
497   dst->f[1] = floorf( src->f[1] );
498   dst->f[2] = floorf( src->f[2] );
499   dst->f[3] = floorf( src->f[3] );
500}
501
502static void
503micro_frc(
504   union tgsi_exec_channel *dst,
505   const union tgsi_exec_channel *src )
506{
507   dst->f[0] = src->f[0] - floorf( src->f[0] );
508   dst->f[1] = src->f[1] - floorf( src->f[1] );
509   dst->f[2] = src->f[2] - floorf( src->f[2] );
510   dst->f[3] = src->f[3] - floorf( src->f[3] );
511}
512
513static void
514micro_ge(
515   union tgsi_exec_channel *dst,
516   const union tgsi_exec_channel *src0,
517   const union tgsi_exec_channel *src1,
518   const union tgsi_exec_channel *src2,
519   const union tgsi_exec_channel *src3 )
520{
521   dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
522   dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
523   dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
524   dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
525}
526
527static void
528micro_i2f(
529   union tgsi_exec_channel *dst,
530   const union tgsi_exec_channel *src )
531{
532   dst->f[0] = (float) src->i[0];
533   dst->f[1] = (float) src->i[1];
534   dst->f[2] = (float) src->i[2];
535   dst->f[3] = (float) src->i[3];
536}
537
538static void
539micro_lg2(
540   union tgsi_exec_channel *dst,
541   const union tgsi_exec_channel *src )
542{
543#if FAST_MATH
544   dst->f[0] = util_fast_log2( src->f[0] );
545   dst->f[1] = util_fast_log2( src->f[1] );
546   dst->f[2] = util_fast_log2( src->f[2] );
547   dst->f[3] = util_fast_log2( src->f[3] );
548#else
549   dst->f[0] = logf( src->f[0] ) * 1.442695f;
550   dst->f[1] = logf( src->f[1] ) * 1.442695f;
551   dst->f[2] = logf( src->f[2] ) * 1.442695f;
552   dst->f[3] = logf( src->f[3] ) * 1.442695f;
553#endif
554}
555
556static void
557micro_le(
558   union tgsi_exec_channel *dst,
559   const union tgsi_exec_channel *src0,
560   const union tgsi_exec_channel *src1,
561   const union tgsi_exec_channel *src2,
562   const union tgsi_exec_channel *src3 )
563{
564   dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
565   dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
566   dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
567   dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
568}
569
570static void
571micro_lt(
572   union tgsi_exec_channel *dst,
573   const union tgsi_exec_channel *src0,
574   const union tgsi_exec_channel *src1,
575   const union tgsi_exec_channel *src2,
576   const union tgsi_exec_channel *src3 )
577{
578   dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
579   dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
580   dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
581   dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
582}
583
584static void
585micro_ilt(
586   union tgsi_exec_channel *dst,
587   const union tgsi_exec_channel *src0,
588   const union tgsi_exec_channel *src1,
589   const union tgsi_exec_channel *src2,
590   const union tgsi_exec_channel *src3 )
591{
592   dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
593   dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
594   dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
595   dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
596}
597
598static void
599micro_ult(
600   union tgsi_exec_channel *dst,
601   const union tgsi_exec_channel *src0,
602   const union tgsi_exec_channel *src1,
603   const union tgsi_exec_channel *src2,
604   const union tgsi_exec_channel *src3 )
605{
606   dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
607   dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
608   dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
609   dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
610}
611
612static void
613micro_max(
614   union tgsi_exec_channel *dst,
615   const union tgsi_exec_channel *src0,
616   const union tgsi_exec_channel *src1 )
617{
618   dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
619   dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
620   dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
621   dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
622}
623
624static void
625micro_imax(
626   union tgsi_exec_channel *dst,
627   const union tgsi_exec_channel *src0,
628   const union tgsi_exec_channel *src1 )
629{
630   dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
631   dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
632   dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
633   dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
634}
635
636static void
637micro_umax(
638   union tgsi_exec_channel *dst,
639   const union tgsi_exec_channel *src0,
640   const union tgsi_exec_channel *src1 )
641{
642   dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
643   dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
644   dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
645   dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
646}
647
648static void
649micro_min(
650   union tgsi_exec_channel *dst,
651   const union tgsi_exec_channel *src0,
652   const union tgsi_exec_channel *src1 )
653{
654   dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
655   dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
656   dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
657   dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
658}
659
660static void
661micro_imin(
662   union tgsi_exec_channel *dst,
663   const union tgsi_exec_channel *src0,
664   const union tgsi_exec_channel *src1 )
665{
666   dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
667   dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
668   dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
669   dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
670}
671
672static void
673micro_umin(
674   union tgsi_exec_channel *dst,
675   const union tgsi_exec_channel *src0,
676   const union tgsi_exec_channel *src1 )
677{
678   dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
679   dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
680   dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
681   dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
682}
683
684static void
685micro_umod(
686   union tgsi_exec_channel *dst,
687   const union tgsi_exec_channel *src0,
688   const union tgsi_exec_channel *src1 )
689{
690   dst->u[0] = src0->u[0] % src1->u[0];
691   dst->u[1] = src0->u[1] % src1->u[1];
692   dst->u[2] = src0->u[2] % src1->u[2];
693   dst->u[3] = src0->u[3] % src1->u[3];
694}
695
696static void
697micro_mul(
698   union tgsi_exec_channel *dst,
699   const union tgsi_exec_channel *src0,
700   const union tgsi_exec_channel *src1 )
701{
702   dst->f[0] = src0->f[0] * src1->f[0];
703   dst->f[1] = src0->f[1] * src1->f[1];
704   dst->f[2] = src0->f[2] * src1->f[2];
705   dst->f[3] = src0->f[3] * src1->f[3];
706}
707
708static void
709micro_imul(
710   union tgsi_exec_channel *dst,
711   const union tgsi_exec_channel *src0,
712   const union tgsi_exec_channel *src1 )
713{
714   dst->i[0] = src0->i[0] * src1->i[0];
715   dst->i[1] = src0->i[1] * src1->i[1];
716   dst->i[2] = src0->i[2] * src1->i[2];
717   dst->i[3] = src0->i[3] * src1->i[3];
718}
719
720static void
721micro_imul64(
722   union tgsi_exec_channel *dst0,
723   union tgsi_exec_channel *dst1,
724   const union tgsi_exec_channel *src0,
725   const union tgsi_exec_channel *src1 )
726{
727   dst1->i[0] = src0->i[0] * src1->i[0];
728   dst1->i[1] = src0->i[1] * src1->i[1];
729   dst1->i[2] = src0->i[2] * src1->i[2];
730   dst1->i[3] = src0->i[3] * src1->i[3];
731   dst0->i[0] = 0;
732   dst0->i[1] = 0;
733   dst0->i[2] = 0;
734   dst0->i[3] = 0;
735}
736
737static void
738micro_umul64(
739   union tgsi_exec_channel *dst0,
740   union tgsi_exec_channel *dst1,
741   const union tgsi_exec_channel *src0,
742   const union tgsi_exec_channel *src1 )
743{
744   dst1->u[0] = src0->u[0] * src1->u[0];
745   dst1->u[1] = src0->u[1] * src1->u[1];
746   dst1->u[2] = src0->u[2] * src1->u[2];
747   dst1->u[3] = src0->u[3] * src1->u[3];
748   dst0->u[0] = 0;
749   dst0->u[1] = 0;
750   dst0->u[2] = 0;
751   dst0->u[3] = 0;
752}
753
754static void
755micro_movc(
756   union tgsi_exec_channel *dst,
757   const union tgsi_exec_channel *src0,
758   const union tgsi_exec_channel *src1,
759   const union tgsi_exec_channel *src2 )
760{
761   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
762   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
763   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
764   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
765}
766
767static void
768micro_neg(
769   union tgsi_exec_channel *dst,
770   const union tgsi_exec_channel *src )
771{
772   dst->f[0] = -src->f[0];
773   dst->f[1] = -src->f[1];
774   dst->f[2] = -src->f[2];
775   dst->f[3] = -src->f[3];
776}
777
778static void
779micro_ineg(
780   union tgsi_exec_channel *dst,
781   const union tgsi_exec_channel *src )
782{
783   dst->i[0] = -src->i[0];
784   dst->i[1] = -src->i[1];
785   dst->i[2] = -src->i[2];
786   dst->i[3] = -src->i[3];
787}
788
789static void
790micro_not(
791   union tgsi_exec_channel *dst,
792   const union tgsi_exec_channel *src )
793{
794   dst->u[0] = ~src->u[0];
795   dst->u[1] = ~src->u[1];
796   dst->u[2] = ~src->u[2];
797   dst->u[3] = ~src->u[3];
798}
799
800static void
801micro_or(
802   union tgsi_exec_channel *dst,
803   const union tgsi_exec_channel *src0,
804   const union tgsi_exec_channel *src1 )
805{
806   dst->u[0] = src0->u[0] | src1->u[0];
807   dst->u[1] = src0->u[1] | src1->u[1];
808   dst->u[2] = src0->u[2] | src1->u[2];
809   dst->u[3] = src0->u[3] | src1->u[3];
810}
811
812static void
813micro_pow(
814   union tgsi_exec_channel *dst,
815   const union tgsi_exec_channel *src0,
816   const union tgsi_exec_channel *src1 )
817{
818#if FAST_MATH
819   dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
820   dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
821   dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
822   dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
823#else
824   dst->f[0] = powf( src0->f[0], src1->f[0] );
825   dst->f[1] = powf( src0->f[1], src1->f[1] );
826   dst->f[2] = powf( src0->f[2], src1->f[2] );
827   dst->f[3] = powf( src0->f[3], src1->f[3] );
828#endif
829}
830
831static void
832micro_rnd(
833   union tgsi_exec_channel *dst,
834   const union tgsi_exec_channel *src )
835{
836   dst->f[0] = floorf( src->f[0] + 0.5f );
837   dst->f[1] = floorf( src->f[1] + 0.5f );
838   dst->f[2] = floorf( src->f[2] + 0.5f );
839   dst->f[3] = floorf( src->f[3] + 0.5f );
840}
841
842static void
843micro_shl(
844   union tgsi_exec_channel *dst,
845   const union tgsi_exec_channel *src0,
846   const union tgsi_exec_channel *src1 )
847{
848   dst->i[0] = src0->i[0] << src1->i[0];
849   dst->i[1] = src0->i[1] << src1->i[1];
850   dst->i[2] = src0->i[2] << src1->i[2];
851   dst->i[3] = src0->i[3] << src1->i[3];
852}
853
854static void
855micro_ishr(
856   union tgsi_exec_channel *dst,
857   const union tgsi_exec_channel *src0,
858   const union tgsi_exec_channel *src1 )
859{
860   dst->i[0] = src0->i[0] >> src1->i[0];
861   dst->i[1] = src0->i[1] >> src1->i[1];
862   dst->i[2] = src0->i[2] >> src1->i[2];
863   dst->i[3] = src0->i[3] >> src1->i[3];
864}
865
866static void
867micro_trunc(
868   union tgsi_exec_channel *dst,
869   const union tgsi_exec_channel *src0 )
870{
871   dst->f[0] = (float) (int) src0->f[0];
872   dst->f[1] = (float) (int) src0->f[1];
873   dst->f[2] = (float) (int) src0->f[2];
874   dst->f[3] = (float) (int) src0->f[3];
875}
876
877static void
878micro_ushr(
879   union tgsi_exec_channel *dst,
880   const union tgsi_exec_channel *src0,
881   const union tgsi_exec_channel *src1 )
882{
883   dst->u[0] = src0->u[0] >> src1->u[0];
884   dst->u[1] = src0->u[1] >> src1->u[1];
885   dst->u[2] = src0->u[2] >> src1->u[2];
886   dst->u[3] = src0->u[3] >> src1->u[3];
887}
888
889static void
890micro_sin(
891   union tgsi_exec_channel *dst,
892   const union tgsi_exec_channel *src )
893{
894   dst->f[0] = sinf( src->f[0] );
895   dst->f[1] = sinf( src->f[1] );
896   dst->f[2] = sinf( src->f[2] );
897   dst->f[3] = sinf( src->f[3] );
898}
899
900static void
901micro_sqrt( union tgsi_exec_channel *dst,
902            const union tgsi_exec_channel *src )
903{
904   dst->f[0] = sqrtf( src->f[0] );
905   dst->f[1] = sqrtf( src->f[1] );
906   dst->f[2] = sqrtf( src->f[2] );
907   dst->f[3] = sqrtf( src->f[3] );
908}
909
910static void
911micro_sub(
912   union tgsi_exec_channel *dst,
913   const union tgsi_exec_channel *src0,
914   const union tgsi_exec_channel *src1 )
915{
916   dst->f[0] = src0->f[0] - src1->f[0];
917   dst->f[1] = src0->f[1] - src1->f[1];
918   dst->f[2] = src0->f[2] - src1->f[2];
919   dst->f[3] = src0->f[3] - src1->f[3];
920}
921
922static void
923micro_u2f(
924   union tgsi_exec_channel *dst,
925   const union tgsi_exec_channel *src )
926{
927   dst->f[0] = (float) src->u[0];
928   dst->f[1] = (float) src->u[1];
929   dst->f[2] = (float) src->u[2];
930   dst->f[3] = (float) src->u[3];
931}
932
933static void
934micro_xor(
935   union tgsi_exec_channel *dst,
936   const union tgsi_exec_channel *src0,
937   const union tgsi_exec_channel *src1 )
938{
939   dst->u[0] = src0->u[0] ^ src1->u[0];
940   dst->u[1] = src0->u[1] ^ src1->u[1];
941   dst->u[2] = src0->u[2] ^ src1->u[2];
942   dst->u[3] = src0->u[3] ^ src1->u[3];
943}
944
945static void
946fetch_src_file_channel(
947   const struct tgsi_exec_machine *mach,
948   const uint file,
949   const uint swizzle,
950   const union tgsi_exec_channel *index,
951   union tgsi_exec_channel *chan )
952{
953   switch( swizzle ) {
954   case TGSI_EXTSWIZZLE_X:
955   case TGSI_EXTSWIZZLE_Y:
956   case TGSI_EXTSWIZZLE_Z:
957   case TGSI_EXTSWIZZLE_W:
958      switch( file ) {
959      case TGSI_FILE_CONSTANT:
960         assert(mach->Consts);
961         chan->f[0] = mach->Consts[index->i[0]][swizzle];
962         chan->f[1] = mach->Consts[index->i[1]][swizzle];
963         chan->f[2] = mach->Consts[index->i[2]][swizzle];
964         chan->f[3] = mach->Consts[index->i[3]][swizzle];
965         break;
966
967      case TGSI_FILE_INPUT:
968         chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
969         chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
970         chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
971         chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
972         break;
973
974      case TGSI_FILE_TEMPORARY:
975         assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
976         chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
977         chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
978         chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
979         chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
980         break;
981
982      case TGSI_FILE_IMMEDIATE:
983         assert( index->i[0] < (int) mach->ImmLimit );
984         chan->f[0] = mach->Imms[index->i[0]][swizzle];
985         assert( index->i[1] < (int) mach->ImmLimit );
986         chan->f[1] = mach->Imms[index->i[1]][swizzle];
987         assert( index->i[2] < (int) mach->ImmLimit );
988         chan->f[2] = mach->Imms[index->i[2]][swizzle];
989         assert( index->i[3] < (int) mach->ImmLimit );
990         chan->f[3] = mach->Imms[index->i[3]][swizzle];
991         break;
992
993      case TGSI_FILE_ADDRESS:
994         chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
995         chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
996         chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
997         chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
998         break;
999
1000      case TGSI_FILE_OUTPUT:
1001         /* vertex/fragment output vars can be read too */
1002         chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1003         chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1004         chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1005         chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1006         break;
1007
1008      default:
1009         assert( 0 );
1010      }
1011      break;
1012
1013   case TGSI_EXTSWIZZLE_ZERO:
1014      *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1015      break;
1016
1017   case TGSI_EXTSWIZZLE_ONE:
1018      *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1019      break;
1020
1021   default:
1022      assert( 0 );
1023   }
1024}
1025
1026static void
1027fetch_source(
1028   const struct tgsi_exec_machine *mach,
1029   union tgsi_exec_channel *chan,
1030   const struct tgsi_full_src_register *reg,
1031   const uint chan_index )
1032{
1033   union tgsi_exec_channel index;
1034   uint swizzle;
1035
1036   index.i[0] =
1037   index.i[1] =
1038   index.i[2] =
1039   index.i[3] = reg->SrcRegister.Index;
1040
1041   if (reg->SrcRegister.Indirect) {
1042      union tgsi_exec_channel index2;
1043      union tgsi_exec_channel indir_index;
1044
1045      index2.i[0] =
1046      index2.i[1] =
1047      index2.i[2] =
1048      index2.i[3] = reg->SrcRegisterInd.Index;
1049
1050      swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1051      fetch_src_file_channel(
1052         mach,
1053         reg->SrcRegisterInd.File,
1054         swizzle,
1055         &index2,
1056         &indir_index );
1057
1058      index.i[0] += indir_index.i[0];
1059      index.i[1] += indir_index.i[1];
1060      index.i[2] += indir_index.i[2];
1061      index.i[3] += indir_index.i[3];
1062   }
1063
1064   if( reg->SrcRegister.Dimension ) {
1065      switch( reg->SrcRegister.File ) {
1066      case TGSI_FILE_INPUT:
1067         index.i[0] *= 17;
1068         index.i[1] *= 17;
1069         index.i[2] *= 17;
1070         index.i[3] *= 17;
1071         break;
1072      case TGSI_FILE_CONSTANT:
1073         index.i[0] *= 4096;
1074         index.i[1] *= 4096;
1075         index.i[2] *= 4096;
1076         index.i[3] *= 4096;
1077         break;
1078      default:
1079         assert( 0 );
1080      }
1081
1082      index.i[0] += reg->SrcRegisterDim.Index;
1083      index.i[1] += reg->SrcRegisterDim.Index;
1084      index.i[2] += reg->SrcRegisterDim.Index;
1085      index.i[3] += reg->SrcRegisterDim.Index;
1086
1087      if (reg->SrcRegisterDim.Indirect) {
1088         union tgsi_exec_channel index2;
1089         union tgsi_exec_channel indir_index;
1090
1091         index2.i[0] =
1092         index2.i[1] =
1093         index2.i[2] =
1094         index2.i[3] = reg->SrcRegisterDimInd.Index;
1095
1096         swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1097         fetch_src_file_channel(
1098            mach,
1099            reg->SrcRegisterDimInd.File,
1100            swizzle,
1101            &index2,
1102            &indir_index );
1103
1104         index.i[0] += indir_index.i[0];
1105         index.i[1] += indir_index.i[1];
1106         index.i[2] += indir_index.i[2];
1107         index.i[3] += indir_index.i[3];
1108      }
1109   }
1110
1111   swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1112   fetch_src_file_channel(
1113      mach,
1114      reg->SrcRegister.File,
1115      swizzle,
1116      &index,
1117      chan );
1118
1119   switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1120   case TGSI_UTIL_SIGN_CLEAR:
1121      micro_abs( chan, chan );
1122      break;
1123
1124   case TGSI_UTIL_SIGN_SET:
1125      micro_abs( chan, chan );
1126      micro_neg( chan, chan );
1127      break;
1128
1129   case TGSI_UTIL_SIGN_TOGGLE:
1130      micro_neg( chan, chan );
1131      break;
1132
1133   case TGSI_UTIL_SIGN_KEEP:
1134      break;
1135   }
1136
1137   if (reg->SrcRegisterExtMod.Complement) {
1138      micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1139   }
1140}
1141
1142static void
1143store_dest(
1144   struct tgsi_exec_machine *mach,
1145   const union tgsi_exec_channel *chan,
1146   const struct tgsi_full_dst_register *reg,
1147   const struct tgsi_full_instruction *inst,
1148   uint chan_index )
1149{
1150   uint i;
1151   union tgsi_exec_channel null;
1152   union tgsi_exec_channel *dst;
1153   uint execmask = mach->ExecMask;
1154
1155   switch (reg->DstRegister.File) {
1156   case TGSI_FILE_NULL:
1157      dst = &null;
1158      break;
1159
1160   case TGSI_FILE_OUTPUT:
1161      dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1162                           + reg->DstRegister.Index].xyzw[chan_index];
1163      break;
1164
1165   case TGSI_FILE_TEMPORARY:
1166      assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1167      dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1168      break;
1169
1170   case TGSI_FILE_ADDRESS:
1171      dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1172      break;
1173
1174   default:
1175      assert( 0 );
1176      return;
1177   }
1178
1179   if (inst->InstructionExtNv.CondFlowEnable) {
1180      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1181      uint swizzle;
1182      uint shift;
1183      uint mask;
1184      uint test;
1185
1186      /* Only CC0 supported.
1187       */
1188      assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1189
1190      switch (chan_index) {
1191      case CHAN_X:
1192         swizzle = inst->InstructionExtNv.CondSwizzleX;
1193         break;
1194      case CHAN_Y:
1195         swizzle = inst->InstructionExtNv.CondSwizzleY;
1196         break;
1197      case CHAN_Z:
1198         swizzle = inst->InstructionExtNv.CondSwizzleZ;
1199         break;
1200      case CHAN_W:
1201         swizzle = inst->InstructionExtNv.CondSwizzleW;
1202         break;
1203      default:
1204         assert( 0 );
1205         return;
1206      }
1207
1208      switch (swizzle) {
1209      case TGSI_SWIZZLE_X:
1210         shift = TGSI_EXEC_CC_X_SHIFT;
1211         mask = TGSI_EXEC_CC_X_MASK;
1212         break;
1213      case TGSI_SWIZZLE_Y:
1214         shift = TGSI_EXEC_CC_Y_SHIFT;
1215         mask = TGSI_EXEC_CC_Y_MASK;
1216         break;
1217      case TGSI_SWIZZLE_Z:
1218         shift = TGSI_EXEC_CC_Z_SHIFT;
1219         mask = TGSI_EXEC_CC_Z_MASK;
1220         break;
1221      case TGSI_SWIZZLE_W:
1222         shift = TGSI_EXEC_CC_W_SHIFT;
1223         mask = TGSI_EXEC_CC_W_MASK;
1224         break;
1225      default:
1226         assert( 0 );
1227         return;
1228      }
1229
1230      switch (inst->InstructionExtNv.CondMask) {
1231      case TGSI_CC_GT:
1232         test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1233         for (i = 0; i < QUAD_SIZE; i++)
1234            if (cc->u[i] & test)
1235               execmask &= ~(1 << i);
1236         break;
1237
1238      case TGSI_CC_EQ:
1239         test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1240         for (i = 0; i < QUAD_SIZE; i++)
1241            if (cc->u[i] & test)
1242               execmask &= ~(1 << i);
1243         break;
1244
1245      case TGSI_CC_LT:
1246         test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1247         for (i = 0; i < QUAD_SIZE; i++)
1248            if (cc->u[i] & test)
1249               execmask &= ~(1 << i);
1250         break;
1251
1252      case TGSI_CC_GE:
1253         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1254         for (i = 0; i < QUAD_SIZE; i++)
1255            if (cc->u[i] & test)
1256               execmask &= ~(1 << i);
1257         break;
1258
1259      case TGSI_CC_LE:
1260         test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1261         for (i = 0; i < QUAD_SIZE; i++)
1262            if (cc->u[i] & test)
1263               execmask &= ~(1 << i);
1264         break;
1265
1266      case TGSI_CC_NE:
1267         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1268         for (i = 0; i < QUAD_SIZE; i++)
1269            if (cc->u[i] & test)
1270               execmask &= ~(1 << i);
1271         break;
1272
1273      case TGSI_CC_TR:
1274         break;
1275
1276      case TGSI_CC_FL:
1277         for (i = 0; i < QUAD_SIZE; i++)
1278            execmask &= ~(1 << i);
1279         break;
1280
1281      default:
1282         assert( 0 );
1283         return;
1284      }
1285   }
1286
1287   switch (inst->Instruction.Saturate) {
1288   case TGSI_SAT_NONE:
1289      for (i = 0; i < QUAD_SIZE; i++)
1290         if (execmask & (1 << i))
1291            dst->i[i] = chan->i[i];
1292      break;
1293
1294   case TGSI_SAT_ZERO_ONE:
1295      for (i = 0; i < QUAD_SIZE; i++)
1296         if (execmask & (1 << i)) {
1297            if (chan->f[i] < 0.0f)
1298               dst->f[i] = 0.0f;
1299            else if (chan->f[i] > 1.0f)
1300               dst->f[i] = 1.0f;
1301            else
1302               dst->i[i] = chan->i[i];
1303         }
1304      break;
1305
1306   case TGSI_SAT_MINUS_PLUS_ONE:
1307      for (i = 0; i < QUAD_SIZE; i++)
1308         if (execmask & (1 << i)) {
1309            if (chan->f[i] < -1.0f)
1310               dst->f[i] = -1.0f;
1311            else if (chan->f[i] > 1.0f)
1312               dst->f[i] = 1.0f;
1313            else
1314               dst->i[i] = chan->i[i];
1315         }
1316      break;
1317
1318   default:
1319      assert( 0 );
1320   }
1321
1322   if (inst->InstructionExtNv.CondDstUpdate) {
1323      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1324      uint shift;
1325      uint mask;
1326
1327      /* Only CC0 supported.
1328       */
1329      assert( inst->InstructionExtNv.CondDstIndex < 1 );
1330
1331      switch (chan_index) {
1332      case CHAN_X:
1333         shift = TGSI_EXEC_CC_X_SHIFT;
1334         mask = ~TGSI_EXEC_CC_X_MASK;
1335         break;
1336      case CHAN_Y:
1337         shift = TGSI_EXEC_CC_Y_SHIFT;
1338         mask = ~TGSI_EXEC_CC_Y_MASK;
1339         break;
1340      case CHAN_Z:
1341         shift = TGSI_EXEC_CC_Z_SHIFT;
1342         mask = ~TGSI_EXEC_CC_Z_MASK;
1343         break;
1344      case CHAN_W:
1345         shift = TGSI_EXEC_CC_W_SHIFT;
1346         mask = ~TGSI_EXEC_CC_W_MASK;
1347         break;
1348      default:
1349         assert( 0 );
1350         return;
1351      }
1352
1353      for (i = 0; i < QUAD_SIZE; i++)
1354         if (execmask & (1 << i)) {
1355            cc->u[i] &= mask;
1356            if (dst->f[i] < 0.0f)
1357               cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1358            else if (dst->f[i] > 0.0f)
1359               cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1360            else if (dst->f[i] == 0.0f)
1361               cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1362            else
1363               cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1364         }
1365   }
1366}
1367
1368#define FETCH(VAL,INDEX,CHAN)\
1369    fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1370
1371#define STORE(VAL,INDEX,CHAN)\
1372    store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1373
1374
1375/**
1376 * Execute ARB-style KIL which is predicated by a src register.
1377 * Kill fragment if any of the four values is less than zero.
1378 */
1379static void
1380exec_kil(struct tgsi_exec_machine *mach,
1381         const struct tgsi_full_instruction *inst)
1382{
1383   uint uniquemask;
1384   uint chan_index;
1385   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1386   union tgsi_exec_channel r[1];
1387
1388   /* This mask stores component bits that were already tested. Note that
1389    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1390    * tested. */
1391   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1392
1393   for (chan_index = 0; chan_index < 4; chan_index++)
1394   {
1395      uint swizzle;
1396      uint i;
1397
1398      /* unswizzle channel */
1399      swizzle = tgsi_util_get_full_src_register_extswizzle (
1400                        &inst->FullSrcRegisters[0],
1401                        chan_index);
1402
1403      /* check if the component has not been already tested */
1404      if (uniquemask & (1 << swizzle))
1405         continue;
1406      uniquemask |= 1 << swizzle;
1407
1408      FETCH(&r[0], 0, chan_index);
1409      for (i = 0; i < 4; i++)
1410         if (r[0].f[i] < 0.0f)
1411            kilmask |= 1 << i;
1412   }
1413
1414   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1415}
1416
1417/**
1418 * Execute NVIDIA-style KIL which is predicated by a condition code.
1419 * Kill fragment if the condition code is TRUE.
1420 */
1421static void
1422exec_kilp(struct tgsi_exec_machine *mach,
1423          const struct tgsi_full_instruction *inst)
1424{
1425   uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1426
1427   if (inst->InstructionExtNv.CondFlowEnable) {
1428      uint swizzle[4];
1429      uint chan_index;
1430
1431      kilmask = 0x0;
1432
1433      swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1434      swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1435      swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1436      swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1437
1438      for (chan_index = 0; chan_index < 4; chan_index++)
1439      {
1440         uint i;
1441
1442         for (i = 0; i < 4; i++) {
1443            /* TODO: evaluate the condition code */
1444            if (0)
1445               kilmask |= 1 << i;
1446         }
1447      }
1448   }
1449   else {
1450      /* "unconditional" kil */
1451      kilmask = mach->ExecMask;
1452   }
1453   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1454}
1455
1456
1457/*
1458 * Fetch a texel using STR texture coordinates.
1459 */
1460static void
1461fetch_texel( struct tgsi_sampler *sampler,
1462             const union tgsi_exec_channel *s,
1463             const union tgsi_exec_channel *t,
1464             const union tgsi_exec_channel *p,
1465             float lodbias,  /* XXX should be float[4] */
1466             union tgsi_exec_channel *r,
1467             union tgsi_exec_channel *g,
1468             union tgsi_exec_channel *b,
1469             union tgsi_exec_channel *a )
1470{
1471   uint j;
1472   float rgba[NUM_CHANNELS][QUAD_SIZE];
1473
1474   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1475
1476   for (j = 0; j < 4; j++) {
1477      r->f[j] = rgba[0][j];
1478      g->f[j] = rgba[1][j];
1479      b->f[j] = rgba[2][j];
1480      a->f[j] = rgba[3][j];
1481   }
1482}
1483
1484
1485static void
1486exec_tex(struct tgsi_exec_machine *mach,
1487         const struct tgsi_full_instruction *inst,
1488         boolean biasLod,
1489         boolean projected)
1490{
1491   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1492   union tgsi_exec_channel r[8];
1493   uint chan_index;
1494   float lodBias;
1495
1496   /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
1497
1498   switch (inst->InstructionExtTexture.Texture) {
1499   case TGSI_TEXTURE_1D:
1500
1501      FETCH(&r[0], 0, CHAN_X);
1502
1503      if (projected) {
1504         FETCH(&r[1], 0, CHAN_W);
1505         micro_div( &r[0], &r[0], &r[1] );
1506      }
1507
1508      if (biasLod) {
1509         FETCH(&r[1], 0, CHAN_W);
1510         lodBias = r[2].f[0];
1511      }
1512      else
1513         lodBias = 0.0;
1514
1515      fetch_texel(&mach->Samplers[unit],
1516                  &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */
1517                  &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1518      break;
1519
1520   case TGSI_TEXTURE_2D:
1521   case TGSI_TEXTURE_RECT:
1522
1523      FETCH(&r[0], 0, CHAN_X);
1524      FETCH(&r[1], 0, CHAN_Y);
1525      FETCH(&r[2], 0, CHAN_Z);
1526
1527      if (projected) {
1528         FETCH(&r[3], 0, CHAN_W);
1529         micro_div( &r[0], &r[0], &r[3] );
1530         micro_div( &r[1], &r[1], &r[3] );
1531         micro_div( &r[2], &r[2], &r[3] );
1532      }
1533
1534      if (biasLod) {
1535         FETCH(&r[3], 0, CHAN_W);
1536         lodBias = r[3].f[0];
1537      }
1538      else
1539         lodBias = 0.0;
1540
1541      fetch_texel(&mach->Samplers[unit],
1542                  &r[0], &r[1], &r[2], lodBias,  /* inputs */
1543                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */
1544      break;
1545
1546   case TGSI_TEXTURE_3D:
1547   case TGSI_TEXTURE_CUBE:
1548
1549      FETCH(&r[0], 0, CHAN_X);
1550      FETCH(&r[1], 0, CHAN_Y);
1551      FETCH(&r[2], 0, CHAN_Z);
1552
1553      if (projected) {
1554         FETCH(&r[3], 0, CHAN_W);
1555         micro_div( &r[0], &r[0], &r[3] );
1556         micro_div( &r[1], &r[1], &r[3] );
1557         micro_div( &r[2], &r[2], &r[3] );
1558      }
1559
1560      if (biasLod) {
1561         FETCH(&r[3], 0, CHAN_W);
1562         lodBias = r[3].f[0];
1563      }
1564      else
1565         lodBias = 0.0;
1566
1567      fetch_texel(&mach->Samplers[unit],
1568                  &r[0], &r[1], &r[2], lodBias,
1569                  &r[0], &r[1], &r[2], &r[3]);
1570      break;
1571
1572   default:
1573      assert (0);
1574   }
1575
1576   FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1577      STORE( &r[chan_index], 0, chan_index );
1578   }
1579}
1580
1581
1582/**
1583 * Evaluate a constant-valued coefficient at the position of the
1584 * current quad.
1585 */
1586static void
1587eval_constant_coef(
1588   struct tgsi_exec_machine *mach,
1589   unsigned attrib,
1590   unsigned chan )
1591{
1592   unsigned i;
1593
1594   for( i = 0; i < QUAD_SIZE; i++ ) {
1595      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1596   }
1597}
1598
1599/**
1600 * Evaluate a linear-valued coefficient at the position of the
1601 * current quad.
1602 */
1603static void
1604eval_linear_coef(
1605   struct tgsi_exec_machine *mach,
1606   unsigned attrib,
1607   unsigned chan )
1608{
1609   const float x = mach->QuadPos.xyzw[0].f[0];
1610   const float y = mach->QuadPos.xyzw[1].f[0];
1611   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1612   const float dady = mach->InterpCoefs[attrib].dady[chan];
1613   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1614   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1615   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1616   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1617   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1618}
1619
1620/**
1621 * Evaluate a perspective-valued coefficient at the position of the
1622 * current quad.
1623 */
1624static void
1625eval_perspective_coef(
1626   struct tgsi_exec_machine *mach,
1627   unsigned attrib,
1628   unsigned chan )
1629{
1630   const float x = mach->QuadPos.xyzw[0].f[0];
1631   const float y = mach->QuadPos.xyzw[1].f[0];
1632   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1633   const float dady = mach->InterpCoefs[attrib].dady[chan];
1634   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1635   const float *w = mach->QuadPos.xyzw[3].f;
1636   /* divide by W here */
1637   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1638   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1639   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1640   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1641}
1642
1643
1644typedef void (* eval_coef_func)(
1645   struct tgsi_exec_machine *mach,
1646   unsigned attrib,
1647   unsigned chan );
1648
1649static void
1650exec_declaration(
1651   struct tgsi_exec_machine *mach,
1652   const struct tgsi_full_declaration *decl )
1653{
1654   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1655      if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1656         unsigned first, last, mask;
1657         eval_coef_func eval;
1658
1659         first = decl->DeclarationRange.First;
1660         last = decl->DeclarationRange.Last;
1661         mask = decl->Declaration.UsageMask;
1662
1663         switch( decl->Declaration.Interpolate ) {
1664         case TGSI_INTERPOLATE_CONSTANT:
1665            eval = eval_constant_coef;
1666            break;
1667
1668         case TGSI_INTERPOLATE_LINEAR:
1669            eval = eval_linear_coef;
1670            break;
1671
1672         case TGSI_INTERPOLATE_PERSPECTIVE:
1673            eval = eval_perspective_coef;
1674            break;
1675
1676         default:
1677            eval = NULL;
1678            assert( 0 );
1679         }
1680
1681         if( mask == TGSI_WRITEMASK_XYZW ) {
1682            unsigned i, j;
1683
1684            for( i = first; i <= last; i++ ) {
1685               for( j = 0; j < NUM_CHANNELS; j++ ) {
1686                  eval( mach, i, j );
1687               }
1688            }
1689         }
1690         else {
1691            unsigned i, j;
1692
1693            for( j = 0; j < NUM_CHANNELS; j++ ) {
1694               if( mask & (1 << j) ) {
1695                  for( i = first; i <= last; i++ ) {
1696                     eval( mach, i, j );
1697                  }
1698               }
1699            }
1700         }
1701      }
1702   }
1703}
1704
1705static void
1706exec_instruction(
1707   struct tgsi_exec_machine *mach,
1708   const struct tgsi_full_instruction *inst,
1709   int *pc )
1710{
1711   uint chan_index;
1712   union tgsi_exec_channel r[8];
1713
1714   (*pc)++;
1715
1716   switch (inst->Instruction.Opcode) {
1717   case TGSI_OPCODE_ARL:
1718      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1719         FETCH( &r[0], 0, chan_index );
1720         micro_f2it( &r[0], &r[0] );
1721         STORE( &r[0], 0, chan_index );
1722      }
1723      break;
1724
1725   case TGSI_OPCODE_MOV:
1726   case TGSI_OPCODE_SWZ:
1727      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1728         FETCH( &r[0], 0, chan_index );
1729         STORE( &r[0], 0, chan_index );
1730      }
1731      break;
1732
1733   case TGSI_OPCODE_LIT:
1734      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1735	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1736      }
1737
1738      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1739	 FETCH( &r[0], 0, CHAN_X );
1740	 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1741	    micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1742	    STORE( &r[0], 0, CHAN_Y );
1743	 }
1744
1745	 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1746	    FETCH( &r[1], 0, CHAN_Y );
1747	    micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1748
1749	    FETCH( &r[2], 0, CHAN_W );
1750	    micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1751	    micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1752	    micro_pow( &r[1], &r[1], &r[2] );
1753	    micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1754	    STORE( &r[0], 0, CHAN_Z );
1755	 }
1756      }
1757
1758      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1759	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1760      }
1761      break;
1762
1763   case TGSI_OPCODE_RCP:
1764   /* TGSI_OPCODE_RECIP */
1765      FETCH( &r[0], 0, CHAN_X );
1766      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1767      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1768	 STORE( &r[0], 0, chan_index );
1769      }
1770      break;
1771
1772   case TGSI_OPCODE_RSQ:
1773   /* TGSI_OPCODE_RECIPSQRT */
1774      FETCH( &r[0], 0, CHAN_X );
1775      micro_sqrt( &r[0], &r[0] );
1776      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1777      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1778	 STORE( &r[0], 0, chan_index );
1779      }
1780      break;
1781
1782   case TGSI_OPCODE_EXP:
1783      FETCH( &r[0], 0, CHAN_X );
1784      micro_flr( &r[1], &r[0] );  /* r1 = floor(r0) */
1785      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1786         micro_exp2( &r[2], &r[1] );       /* r2 = 2 ^ r1 */
1787         STORE( &r[2], 0, CHAN_X );        /* store r2 */
1788      }
1789      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1790         micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1791         STORE( &r[2], 0, CHAN_Y );        /* store r2 */
1792      }
1793      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1794         micro_exp2( &r[2], &r[0] );       /* r2 = 2 ^ r0 */
1795         STORE( &r[2], 0, CHAN_Z );        /* store r2 */
1796      }
1797      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1798         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1799      }
1800      break;
1801
1802   case TGSI_OPCODE_LOG:
1803      FETCH( &r[0], 0, CHAN_X );
1804      micro_abs( &r[2], &r[0] );  /* r2 = abs(r0) */
1805      micro_lg2( &r[1], &r[2] );  /* r1 = lg2(r2) */
1806      micro_flr( &r[0], &r[1] );  /* r0 = floor(r1) */
1807      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1808         STORE( &r[0], 0, CHAN_X );
1809      }
1810      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1811         micro_exp2( &r[0], &r[0] );       /* r0 = 2 ^ r0 */
1812         micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1813         STORE( &r[0], 0, CHAN_Y );
1814      }
1815      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1816         STORE( &r[1], 0, CHAN_Z );
1817      }
1818      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1819         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1820      }
1821      break;
1822
1823   case TGSI_OPCODE_MUL:
1824      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1825      {
1826         FETCH(&r[0], 0, chan_index);
1827         FETCH(&r[1], 1, chan_index);
1828
1829         micro_mul( &r[0], &r[0], &r[1] );
1830
1831         STORE(&r[0], 0, chan_index);
1832      }
1833      break;
1834
1835   case TGSI_OPCODE_ADD:
1836      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1837         FETCH( &r[0], 0, chan_index );
1838         FETCH( &r[1], 1, chan_index );
1839         micro_add( &r[0], &r[0], &r[1] );
1840         STORE( &r[0], 0, chan_index );
1841      }
1842      break;
1843
1844   case TGSI_OPCODE_DP3:
1845   /* TGSI_OPCODE_DOT3 */
1846      FETCH( &r[0], 0, CHAN_X );
1847      FETCH( &r[1], 1, CHAN_X );
1848      micro_mul( &r[0], &r[0], &r[1] );
1849
1850      FETCH( &r[1], 0, CHAN_Y );
1851      FETCH( &r[2], 1, CHAN_Y );
1852      micro_mul( &r[1], &r[1], &r[2] );
1853      micro_add( &r[0], &r[0], &r[1] );
1854
1855      FETCH( &r[1], 0, CHAN_Z );
1856      FETCH( &r[2], 1, CHAN_Z );
1857      micro_mul( &r[1], &r[1], &r[2] );
1858      micro_add( &r[0], &r[0], &r[1] );
1859
1860      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1861         STORE( &r[0], 0, chan_index );
1862      }
1863      break;
1864
1865    case TGSI_OPCODE_DP4:
1866    /* TGSI_OPCODE_DOT4 */
1867       FETCH(&r[0], 0, CHAN_X);
1868       FETCH(&r[1], 1, CHAN_X);
1869
1870       micro_mul( &r[0], &r[0], &r[1] );
1871
1872       FETCH(&r[1], 0, CHAN_Y);
1873       FETCH(&r[2], 1, CHAN_Y);
1874
1875       micro_mul( &r[1], &r[1], &r[2] );
1876       micro_add( &r[0], &r[0], &r[1] );
1877
1878       FETCH(&r[1], 0, CHAN_Z);
1879       FETCH(&r[2], 1, CHAN_Z);
1880
1881       micro_mul( &r[1], &r[1], &r[2] );
1882       micro_add( &r[0], &r[0], &r[1] );
1883
1884       FETCH(&r[1], 0, CHAN_W);
1885       FETCH(&r[2], 1, CHAN_W);
1886
1887       micro_mul( &r[1], &r[1], &r[2] );
1888       micro_add( &r[0], &r[0], &r[1] );
1889
1890      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1891	 STORE( &r[0], 0, chan_index );
1892      }
1893      break;
1894
1895   case TGSI_OPCODE_DST:
1896      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1897	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1898      }
1899
1900      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1901	 FETCH( &r[0], 0, CHAN_Y );
1902	 FETCH( &r[1], 1, CHAN_Y);
1903	 micro_mul( &r[0], &r[0], &r[1] );
1904	 STORE( &r[0], 0, CHAN_Y );
1905      }
1906
1907      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1908	 FETCH( &r[0], 0, CHAN_Z );
1909	 STORE( &r[0], 0, CHAN_Z );
1910      }
1911
1912      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1913	 FETCH( &r[0], 1, CHAN_W );
1914	 STORE( &r[0], 0, CHAN_W );
1915      }
1916      break;
1917
1918   case TGSI_OPCODE_MIN:
1919      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1920         FETCH(&r[0], 0, chan_index);
1921         FETCH(&r[1], 1, chan_index);
1922
1923         /* XXX use micro_min()?? */
1924         micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1925
1926         STORE(&r[0], 0, chan_index);
1927      }
1928      break;
1929
1930   case TGSI_OPCODE_MAX:
1931      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1932         FETCH(&r[0], 0, chan_index);
1933         FETCH(&r[1], 1, chan_index);
1934
1935         /* XXX use micro_max()?? */
1936         micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1937
1938         STORE(&r[0], 0, chan_index );
1939      }
1940      break;
1941
1942   case TGSI_OPCODE_SLT:
1943   /* TGSI_OPCODE_SETLT */
1944      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1945         FETCH( &r[0], 0, chan_index );
1946         FETCH( &r[1], 1, chan_index );
1947         micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1948         STORE( &r[0], 0, chan_index );
1949      }
1950      break;
1951
1952   case TGSI_OPCODE_SGE:
1953   /* TGSI_OPCODE_SETGE */
1954      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1955         FETCH( &r[0], 0, chan_index );
1956         FETCH( &r[1], 1, chan_index );
1957         micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1958         STORE( &r[0], 0, chan_index );
1959      }
1960      break;
1961
1962   case TGSI_OPCODE_MAD:
1963   /* TGSI_OPCODE_MADD */
1964      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1965         FETCH( &r[0], 0, chan_index );
1966         FETCH( &r[1], 1, chan_index );
1967         micro_mul( &r[0], &r[0], &r[1] );
1968         FETCH( &r[1], 2, chan_index );
1969         micro_add( &r[0], &r[0], &r[1] );
1970         STORE( &r[0], 0, chan_index );
1971      }
1972      break;
1973
1974   case TGSI_OPCODE_SUB:
1975      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1976         FETCH(&r[0], 0, chan_index);
1977         FETCH(&r[1], 1, chan_index);
1978
1979         micro_sub( &r[0], &r[0], &r[1] );
1980
1981         STORE(&r[0], 0, chan_index);
1982      }
1983      break;
1984
1985   case TGSI_OPCODE_LERP:
1986   /* TGSI_OPCODE_LRP */
1987      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1988         FETCH(&r[0], 0, chan_index);
1989         FETCH(&r[1], 1, chan_index);
1990         FETCH(&r[2], 2, chan_index);
1991
1992         micro_sub( &r[1], &r[1], &r[2] );
1993         micro_mul( &r[0], &r[0], &r[1] );
1994         micro_add( &r[0], &r[0], &r[2] );
1995
1996         STORE(&r[0], 0, chan_index);
1997      }
1998      break;
1999
2000   case TGSI_OPCODE_CND:
2001      assert (0);
2002      break;
2003
2004   case TGSI_OPCODE_CND0:
2005      assert (0);
2006      break;
2007
2008   case TGSI_OPCODE_DOT2ADD:
2009      /* TGSI_OPCODE_DP2A */
2010      assert (0);
2011      break;
2012
2013   case TGSI_OPCODE_INDEX:
2014      assert (0);
2015      break;
2016
2017   case TGSI_OPCODE_NEGATE:
2018      assert (0);
2019      break;
2020
2021   case TGSI_OPCODE_FRAC:
2022   /* TGSI_OPCODE_FRC */
2023      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2024         FETCH( &r[0], 0, chan_index );
2025         micro_frc( &r[0], &r[0] );
2026         STORE( &r[0], 0, chan_index );
2027      }
2028      break;
2029
2030   case TGSI_OPCODE_CLAMP:
2031      assert (0);
2032      break;
2033
2034   case TGSI_OPCODE_FLOOR:
2035   /* TGSI_OPCODE_FLR */
2036      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2037         FETCH( &r[0], 0, chan_index );
2038         micro_flr( &r[0], &r[0] );
2039         STORE( &r[0], 0, chan_index );
2040      }
2041      break;
2042
2043   case TGSI_OPCODE_ROUND:
2044      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2045         FETCH( &r[0], 0, chan_index );
2046         micro_rnd( &r[0], &r[0] );
2047         STORE( &r[0], 0, chan_index );
2048      }
2049      break;
2050
2051   case TGSI_OPCODE_EXPBASE2:
2052    /* TGSI_OPCODE_EX2 */
2053      FETCH(&r[0], 0, CHAN_X);
2054
2055#if FAST_MATH
2056      micro_exp2( &r[0], &r[0] );
2057#else
2058      micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2059#endif
2060
2061      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2062	 STORE( &r[0], 0, chan_index );
2063      }
2064      break;
2065
2066   case TGSI_OPCODE_LOGBASE2:
2067   /* TGSI_OPCODE_LG2 */
2068      FETCH( &r[0], 0, CHAN_X );
2069      micro_lg2( &r[0], &r[0] );
2070      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2071         STORE( &r[0], 0, chan_index );
2072      }
2073      break;
2074
2075   case TGSI_OPCODE_POWER:
2076      /* TGSI_OPCODE_POW */
2077      FETCH(&r[0], 0, CHAN_X);
2078      FETCH(&r[1], 1, CHAN_X);
2079
2080      micro_pow( &r[0], &r[0], &r[1] );
2081
2082      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2083	 STORE( &r[0], 0, chan_index );
2084      }
2085      break;
2086
2087   case TGSI_OPCODE_CROSSPRODUCT:
2088      /* TGSI_OPCODE_XPD */
2089      FETCH(&r[0], 0, CHAN_Y);
2090      FETCH(&r[1], 1, CHAN_Z);
2091
2092      micro_mul( &r[2], &r[0], &r[1] );
2093
2094      FETCH(&r[3], 0, CHAN_Z);
2095      FETCH(&r[4], 1, CHAN_Y);
2096
2097      micro_mul( &r[5], &r[3], &r[4] );
2098      micro_sub( &r[2], &r[2], &r[5] );
2099
2100      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2101         STORE( &r[2], 0, CHAN_X );
2102      }
2103
2104      FETCH(&r[2], 1, CHAN_X);
2105
2106      micro_mul( &r[3], &r[3], &r[2] );
2107
2108      FETCH(&r[5], 0, CHAN_X);
2109
2110      micro_mul( &r[1], &r[1], &r[5] );
2111      micro_sub( &r[3], &r[3], &r[1] );
2112
2113      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2114         STORE( &r[3], 0, CHAN_Y );
2115      }
2116
2117      micro_mul( &r[5], &r[5], &r[4] );
2118      micro_mul( &r[0], &r[0], &r[2] );
2119      micro_sub( &r[5], &r[5], &r[0] );
2120
2121      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2122         STORE( &r[5], 0, CHAN_Z );
2123      }
2124
2125      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2126         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2127      }
2128      break;
2129
2130    case TGSI_OPCODE_MULTIPLYMATRIX:
2131       assert (0);
2132       break;
2133
2134    case TGSI_OPCODE_ABS:
2135       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2136          FETCH(&r[0], 0, chan_index);
2137
2138          micro_abs( &r[0], &r[0] );
2139
2140          STORE(&r[0], 0, chan_index);
2141       }
2142       break;
2143
2144   case TGSI_OPCODE_RCC:
2145      assert (0);
2146      break;
2147
2148   case TGSI_OPCODE_DPH:
2149      FETCH(&r[0], 0, CHAN_X);
2150      FETCH(&r[1], 1, CHAN_X);
2151
2152      micro_mul( &r[0], &r[0], &r[1] );
2153
2154      FETCH(&r[1], 0, CHAN_Y);
2155      FETCH(&r[2], 1, CHAN_Y);
2156
2157      micro_mul( &r[1], &r[1], &r[2] );
2158      micro_add( &r[0], &r[0], &r[1] );
2159
2160      FETCH(&r[1], 0, CHAN_Z);
2161      FETCH(&r[2], 1, CHAN_Z);
2162
2163      micro_mul( &r[1], &r[1], &r[2] );
2164      micro_add( &r[0], &r[0], &r[1] );
2165
2166      FETCH(&r[1], 1, CHAN_W);
2167
2168      micro_add( &r[0], &r[0], &r[1] );
2169
2170      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2171	 STORE( &r[0], 0, chan_index );
2172      }
2173      break;
2174
2175   case TGSI_OPCODE_COS:
2176      FETCH(&r[0], 0, CHAN_X);
2177
2178      micro_cos( &r[0], &r[0] );
2179
2180      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2181	 STORE( &r[0], 0, chan_index );
2182      }
2183      break;
2184
2185   case TGSI_OPCODE_DDX:
2186      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2187         FETCH( &r[0], 0, chan_index );
2188         micro_ddx( &r[0], &r[0] );
2189         STORE( &r[0], 0, chan_index );
2190      }
2191      break;
2192
2193   case TGSI_OPCODE_DDY:
2194      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2195         FETCH( &r[0], 0, chan_index );
2196         micro_ddy( &r[0], &r[0] );
2197         STORE( &r[0], 0, chan_index );
2198      }
2199      break;
2200
2201   case TGSI_OPCODE_KILP:
2202      exec_kilp (mach, inst);
2203      break;
2204
2205   case TGSI_OPCODE_KIL:
2206      exec_kil (mach, inst);
2207      break;
2208
2209   case TGSI_OPCODE_PK2H:
2210      assert (0);
2211      break;
2212
2213   case TGSI_OPCODE_PK2US:
2214      assert (0);
2215      break;
2216
2217   case TGSI_OPCODE_PK4B:
2218      assert (0);
2219      break;
2220
2221   case TGSI_OPCODE_PK4UB:
2222      assert (0);
2223      break;
2224
2225   case TGSI_OPCODE_RFL:
2226      assert (0);
2227      break;
2228
2229   case TGSI_OPCODE_SEQ:
2230      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2231         FETCH( &r[0], 0, chan_index );
2232         FETCH( &r[1], 1, chan_index );
2233         micro_eq( &r[0], &r[0], &r[1],
2234                   &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2235                   &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2236         STORE( &r[0], 0, chan_index );
2237      }
2238      break;
2239
2240   case TGSI_OPCODE_SFL:
2241      assert (0);
2242      break;
2243
2244   case TGSI_OPCODE_SGT:
2245      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2246         FETCH( &r[0], 0, chan_index );
2247         FETCH( &r[1], 1, chan_index );
2248         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2249         STORE( &r[0], 0, chan_index );
2250      }
2251      break;
2252
2253   case TGSI_OPCODE_SIN:
2254      FETCH( &r[0], 0, CHAN_X );
2255      micro_sin( &r[0], &r[0] );
2256      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2257         STORE( &r[0], 0, chan_index );
2258      }
2259      break;
2260
2261   case TGSI_OPCODE_SLE:
2262      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2263         FETCH( &r[0], 0, chan_index );
2264         FETCH( &r[1], 1, chan_index );
2265         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2266         STORE( &r[0], 0, chan_index );
2267      }
2268      break;
2269
2270   case TGSI_OPCODE_SNE:
2271      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2272         FETCH( &r[0], 0, chan_index );
2273         FETCH( &r[1], 1, chan_index );
2274         micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2275         STORE( &r[0], 0, chan_index );
2276      }
2277      break;
2278
2279   case TGSI_OPCODE_STR:
2280      assert (0);
2281      break;
2282
2283   case TGSI_OPCODE_TEX:
2284      /* simple texture lookup */
2285      /* src[0] = texcoord */
2286      /* src[1] = sampler unit */
2287      exec_tex(mach, inst, FALSE, FALSE);
2288      break;
2289
2290   case TGSI_OPCODE_TXB:
2291      /* Texture lookup with lod bias */
2292      /* src[0] = texcoord (src[0].w = LOD bias) */
2293      /* src[1] = sampler unit */
2294      exec_tex(mach, inst, TRUE, FALSE);
2295      break;
2296
2297   case TGSI_OPCODE_TXD:
2298      /* Texture lookup with explict partial derivatives */
2299      /* src[0] = texcoord */
2300      /* src[1] = d[strq]/dx */
2301      /* src[2] = d[strq]/dy */
2302      /* src[3] = sampler unit */
2303      assert (0);
2304      break;
2305
2306   case TGSI_OPCODE_TXL:
2307      /* Texture lookup with explit LOD */
2308      /* src[0] = texcoord (src[0].w = LOD) */
2309      /* src[1] = sampler unit */
2310      exec_tex(mach, inst, TRUE, FALSE);
2311      break;
2312
2313   case TGSI_OPCODE_TXP:
2314      /* Texture lookup with projection */
2315      /* src[0] = texcoord (src[0].w = projection) */
2316      /* src[1] = sampler unit */
2317      exec_tex(mach, inst, FALSE, TRUE);
2318      break;
2319
2320   case TGSI_OPCODE_UP2H:
2321      assert (0);
2322      break;
2323
2324   case TGSI_OPCODE_UP2US:
2325      assert (0);
2326      break;
2327
2328   case TGSI_OPCODE_UP4B:
2329      assert (0);
2330      break;
2331
2332   case TGSI_OPCODE_UP4UB:
2333      assert (0);
2334      break;
2335
2336   case TGSI_OPCODE_X2D:
2337      assert (0);
2338      break;
2339
2340   case TGSI_OPCODE_ARA:
2341      assert (0);
2342      break;
2343
2344   case TGSI_OPCODE_ARR:
2345      assert (0);
2346      break;
2347
2348   case TGSI_OPCODE_BRA:
2349      assert (0);
2350      break;
2351
2352   case TGSI_OPCODE_CAL:
2353      /* skip the call if no execution channels are enabled */
2354      if (mach->ExecMask) {
2355         /* do the call */
2356
2357         /* push the Cond, Loop, Cont stacks */
2358         assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2359         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2360         assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2361         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2362         assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2363         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2364
2365         assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2366         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2367
2368         /* note that PC was already incremented above */
2369         mach->CallStack[mach->CallStackTop++] = *pc;
2370         *pc = inst->InstructionExtLabel.Label;
2371      }
2372      break;
2373
2374   case TGSI_OPCODE_RET:
2375      mach->FuncMask &= ~mach->ExecMask;
2376      UPDATE_EXEC_MASK(mach);
2377
2378      if (mach->FuncMask == 0x0) {
2379         /* really return now (otherwise, keep executing */
2380
2381         if (mach->CallStackTop == 0) {
2382            /* returning from main() */
2383            *pc = -1;
2384            return;
2385         }
2386         *pc = mach->CallStack[--mach->CallStackTop];
2387
2388         /* pop the Cond, Loop, Cont stacks */
2389         assert(mach->CondStackTop > 0);
2390         mach->CondMask = mach->CondStack[--mach->CondStackTop];
2391         assert(mach->LoopStackTop > 0);
2392         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2393         assert(mach->ContStackTop > 0);
2394         mach->ContMask = mach->ContStack[--mach->ContStackTop];
2395         assert(mach->FuncStackTop > 0);
2396         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2397
2398         UPDATE_EXEC_MASK(mach);
2399      }
2400      break;
2401
2402   case TGSI_OPCODE_SSG:
2403      assert (0);
2404      break;
2405
2406   case TGSI_OPCODE_CMP:
2407      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2408         FETCH(&r[0], 0, chan_index);
2409         FETCH(&r[1], 1, chan_index);
2410         FETCH(&r[2], 2, chan_index);
2411
2412         micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2413
2414         STORE(&r[0], 0, chan_index);
2415      }
2416      break;
2417
2418   case TGSI_OPCODE_SCS:
2419      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2420         FETCH( &r[0], 0, CHAN_X );
2421      }
2422      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2423         micro_cos( &r[1], &r[0] );
2424         STORE( &r[1], 0, CHAN_X );
2425      }
2426      if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2427         micro_sin( &r[1], &r[0] );
2428         STORE( &r[1], 0, CHAN_Y );
2429      }
2430      if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2431         STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2432      }
2433      if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2434         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2435      }
2436      break;
2437
2438   case TGSI_OPCODE_NRM:
2439      assert (0);
2440      break;
2441
2442   case TGSI_OPCODE_DIV:
2443      assert( 0 );
2444      break;
2445
2446   case TGSI_OPCODE_DP2:
2447      FETCH( &r[0], 0, CHAN_X );
2448      FETCH( &r[1], 1, CHAN_X );
2449      micro_mul( &r[0], &r[0], &r[1] );
2450
2451      FETCH( &r[1], 0, CHAN_Y );
2452      FETCH( &r[2], 1, CHAN_Y );
2453      micro_mul( &r[1], &r[1], &r[2] );
2454      micro_add( &r[0], &r[0], &r[1] );
2455
2456      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2457         STORE( &r[0], 0, chan_index );
2458      }
2459      break;
2460
2461   case TGSI_OPCODE_IF:
2462      /* push CondMask */
2463      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2464      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2465      FETCH( &r[0], 0, CHAN_X );
2466      /* update CondMask */
2467      if( ! r[0].u[0] ) {
2468         mach->CondMask &= ~0x1;
2469      }
2470      if( ! r[0].u[1] ) {
2471         mach->CondMask &= ~0x2;
2472      }
2473      if( ! r[0].u[2] ) {
2474         mach->CondMask &= ~0x4;
2475      }
2476      if( ! r[0].u[3] ) {
2477         mach->CondMask &= ~0x8;
2478      }
2479      UPDATE_EXEC_MASK(mach);
2480      /* Todo: If CondMask==0, jump to ELSE */
2481      break;
2482
2483   case TGSI_OPCODE_ELSE:
2484      /* invert CondMask wrt previous mask */
2485      {
2486         uint prevMask;
2487         assert(mach->CondStackTop > 0);
2488         prevMask = mach->CondStack[mach->CondStackTop - 1];
2489         mach->CondMask = ~mach->CondMask & prevMask;
2490         UPDATE_EXEC_MASK(mach);
2491         /* Todo: If CondMask==0, jump to ENDIF */
2492      }
2493      break;
2494
2495   case TGSI_OPCODE_ENDIF:
2496      /* pop CondMask */
2497      assert(mach->CondStackTop > 0);
2498      mach->CondMask = mach->CondStack[--mach->CondStackTop];
2499      UPDATE_EXEC_MASK(mach);
2500      break;
2501
2502   case TGSI_OPCODE_END:
2503      /* halt execution */
2504      *pc = -1;
2505      break;
2506
2507   case TGSI_OPCODE_REP:
2508      assert (0);
2509      break;
2510
2511   case TGSI_OPCODE_ENDREP:
2512       assert (0);
2513       break;
2514
2515   case TGSI_OPCODE_PUSHA:
2516      assert (0);
2517      break;
2518
2519   case TGSI_OPCODE_POPA:
2520      assert (0);
2521      break;
2522
2523   case TGSI_OPCODE_CEIL:
2524      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2525         FETCH( &r[0], 0, chan_index );
2526         micro_ceil( &r[0], &r[0] );
2527         STORE( &r[0], 0, chan_index );
2528      }
2529      break;
2530
2531   case TGSI_OPCODE_I2F:
2532      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2533         FETCH( &r[0], 0, chan_index );
2534         micro_i2f( &r[0], &r[0] );
2535         STORE( &r[0], 0, chan_index );
2536      }
2537      break;
2538
2539   case TGSI_OPCODE_NOT:
2540      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2541         FETCH( &r[0], 0, chan_index );
2542         micro_not( &r[0], &r[0] );
2543         STORE( &r[0], 0, chan_index );
2544      }
2545      break;
2546
2547   case TGSI_OPCODE_TRUNC:
2548      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2549         FETCH( &r[0], 0, chan_index );
2550         micro_trunc( &r[0], &r[0] );
2551         STORE( &r[0], 0, chan_index );
2552      }
2553      break;
2554
2555   case TGSI_OPCODE_SHL:
2556      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2557         FETCH( &r[0], 0, chan_index );
2558         FETCH( &r[1], 1, chan_index );
2559         micro_shl( &r[0], &r[0], &r[1] );
2560         STORE( &r[0], 0, chan_index );
2561      }
2562      break;
2563
2564   case TGSI_OPCODE_SHR:
2565      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2566         FETCH( &r[0], 0, chan_index );
2567         FETCH( &r[1], 1, chan_index );
2568         micro_ishr( &r[0], &r[0], &r[1] );
2569         STORE( &r[0], 0, chan_index );
2570      }
2571      break;
2572
2573   case TGSI_OPCODE_AND:
2574      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2575         FETCH( &r[0], 0, chan_index );
2576         FETCH( &r[1], 1, chan_index );
2577         micro_and( &r[0], &r[0], &r[1] );
2578         STORE( &r[0], 0, chan_index );
2579      }
2580      break;
2581
2582   case TGSI_OPCODE_OR:
2583      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2584         FETCH( &r[0], 0, chan_index );
2585         FETCH( &r[1], 1, chan_index );
2586         micro_or( &r[0], &r[0], &r[1] );
2587         STORE( &r[0], 0, chan_index );
2588      }
2589      break;
2590
2591   case TGSI_OPCODE_MOD:
2592      assert (0);
2593      break;
2594
2595   case TGSI_OPCODE_XOR:
2596      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2597         FETCH( &r[0], 0, chan_index );
2598         FETCH( &r[1], 1, chan_index );
2599         micro_xor( &r[0], &r[0], &r[1] );
2600         STORE( &r[0], 0, chan_index );
2601      }
2602      break;
2603
2604   case TGSI_OPCODE_SAD:
2605      assert (0);
2606      break;
2607
2608   case TGSI_OPCODE_TXF:
2609      assert (0);
2610      break;
2611
2612   case TGSI_OPCODE_TXQ:
2613      assert (0);
2614      break;
2615
2616   case TGSI_OPCODE_EMIT:
2617      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2618      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2619      break;
2620
2621   case TGSI_OPCODE_ENDPRIM:
2622      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2623      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2624      break;
2625
2626   case TGSI_OPCODE_LOOP:
2627      /* fall-through (for now) */
2628   case TGSI_OPCODE_BGNLOOP2:
2629      /* push LoopMask and ContMasks */
2630      assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2631      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2632      assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2633      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2634      break;
2635
2636   case TGSI_OPCODE_ENDLOOP:
2637      /* fall-through (for now at least) */
2638   case TGSI_OPCODE_ENDLOOP2:
2639      /* Restore ContMask, but don't pop */
2640      assert(mach->ContStackTop > 0);
2641      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2642      UPDATE_EXEC_MASK(mach);
2643      if (mach->ExecMask) {
2644         /* repeat loop: jump to instruction just past BGNLOOP */
2645         *pc = inst->InstructionExtLabel.Label + 1;
2646      }
2647      else {
2648         /* exit loop: pop LoopMask */
2649         assert(mach->LoopStackTop > 0);
2650         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2651         /* pop ContMask */
2652         assert(mach->ContStackTop > 0);
2653         mach->ContMask = mach->ContStack[--mach->ContStackTop];
2654      }
2655      UPDATE_EXEC_MASK(mach);
2656      break;
2657
2658   case TGSI_OPCODE_BRK:
2659      /* turn off loop channels for each enabled exec channel */
2660      mach->LoopMask &= ~mach->ExecMask;
2661      /* Todo: if mach->LoopMask == 0, jump to end of loop */
2662      UPDATE_EXEC_MASK(mach);
2663      break;
2664
2665   case TGSI_OPCODE_CONT:
2666      /* turn off cont channels for each enabled exec channel */
2667      mach->ContMask &= ~mach->ExecMask;
2668      /* Todo: if mach->LoopMask == 0, jump to end of loop */
2669      UPDATE_EXEC_MASK(mach);
2670      break;
2671
2672   case TGSI_OPCODE_BGNSUB:
2673      /* no-op */
2674      break;
2675
2676   case TGSI_OPCODE_ENDSUB:
2677      /* no-op */
2678      break;
2679
2680   case TGSI_OPCODE_NOISE1:
2681      assert( 0 );
2682      break;
2683
2684   case TGSI_OPCODE_NOISE2:
2685      assert( 0 );
2686      break;
2687
2688   case TGSI_OPCODE_NOISE3:
2689      assert( 0 );
2690      break;
2691
2692   case TGSI_OPCODE_NOISE4:
2693      assert( 0 );
2694      break;
2695
2696   case TGSI_OPCODE_NOP:
2697      break;
2698
2699   default:
2700      assert( 0 );
2701   }
2702}
2703
2704
2705/**
2706 * Run TGSI interpreter.
2707 * \return bitmask of "alive" quad components
2708 */
2709uint
2710tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2711{
2712   uint i;
2713   int pc = 0;
2714
2715   mach->CondMask = 0xf;
2716   mach->LoopMask = 0xf;
2717   mach->ContMask = 0xf;
2718   mach->FuncMask = 0xf;
2719   mach->ExecMask = 0xf;
2720
2721   mach->CondStackTop = 0; /* temporarily subvert this assertion */
2722   assert(mach->CondStackTop == 0);
2723   assert(mach->LoopStackTop == 0);
2724   assert(mach->ContStackTop == 0);
2725   assert(mach->CallStackTop == 0);
2726
2727   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2728   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2729
2730   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2731      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2732      mach->Primitives[0] = 0;
2733   }
2734
2735   for (i = 0; i < QUAD_SIZE; i++) {
2736      mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2737         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2738         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2739         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2740         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2741   }
2742
2743   /* execute declarations (interpolants) */
2744   for (i = 0; i < mach->NumDeclarations; i++) {
2745      exec_declaration( mach, mach->Declarations+i );
2746   }
2747
2748   /* execute instructions, until pc is set to -1 */
2749   while (pc != -1) {
2750      assert(pc < (int) mach->NumInstructions);
2751      exec_instruction( mach, mach->Instructions + pc, &pc );
2752   }
2753
2754#if 0
2755   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2756   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2757      /*
2758       * Scale back depth component.
2759       */
2760      for (i = 0; i < 4; i++)
2761         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2762   }
2763#endif
2764
2765   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2766}
2767
2768
2769