tgsi_exec.c revision 4b3c74b4d6786475bc45f883612e76069e722cbd
1/**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * TGSI interpreter/executor.
30 *
31 * Flow control information:
32 *
33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35 * care since a condition may be true for some quad components but false
36 * for other components.
37 *
38 * We basically execute all statements (even if they're in the part of
39 * an IF/ELSE clause that's "not taken") and use a special mask to
40 * control writing to destination registers.  This is the ExecMask.
41 * See store_dest().
42 *
43 * The ExecMask is computed from three other masks (CondMask, LoopMask and
44 * ContMask) which are controlled by the flow control instructions (namely:
45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46 *
47 *
48 * Authors:
49 *   Michal Krol
50 *   Brian Paul
51 */
52
53#include "pipe/p_compiler.h"
54#include "pipe/p_state.h"
55#include "pipe/p_shader_tokens.h"
56#include "tgsi/tgsi_parse.h"
57#include "tgsi/tgsi_util.h"
58#include "tgsi_exec.h"
59#include "util/u_memory.h"
60#include "util/u_math.h"
61
62#define FAST_MATH 1
63
64#define TILE_TOP_LEFT     0
65#define TILE_TOP_RIGHT    1
66#define TILE_BOTTOM_LEFT  2
67#define TILE_BOTTOM_RIGHT 3
68
69#define CHAN_X  0
70#define CHAN_Y  1
71#define CHAN_Z  2
72#define CHAN_W  3
73
74/*
75 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76 */
77#define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
78#define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
79#define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
80#define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
81#define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
82#define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
83#define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
84#define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
85#define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
86#define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
87#define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
88#define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
89#define TEMP_128_I         TGSI_EXEC_TEMP_128_I
90#define TEMP_128_C         TGSI_EXEC_TEMP_128_C
91#define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
92#define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
93#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
94#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
95#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
96#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
97#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
98#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
99#define TEMP_CC_I          TGSI_EXEC_TEMP_CC_I
100#define TEMP_CC_C          TGSI_EXEC_TEMP_CC_C
101#define TEMP_3_I           TGSI_EXEC_TEMP_THREE_I
102#define TEMP_3_C           TGSI_EXEC_TEMP_THREE_C
103#define TEMP_HALF_I        TGSI_EXEC_TEMP_HALF_I
104#define TEMP_HALF_C        TGSI_EXEC_TEMP_HALF_C
105#define TEMP_R0            TGSI_EXEC_TEMP_R0
106
107#define IS_CHANNEL_ENABLED(INST, CHAN)\
108   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109
110#define IS_CHANNEL_ENABLED2(INST, CHAN)\
111   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112
113#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115      if (IS_CHANNEL_ENABLED( INST, CHAN ))
116
117#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119      if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120
121
122/** The execution mask depends on the conditional mask and the loop mask */
123#define UPDATE_EXEC_MASK(MACH) \
124      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125
126/**
127 * Initialize machine state by expanding tokens to full instructions,
128 * allocating temporary storage, setting up constants, etc.
129 * After this, we can call tgsi_exec_machine_run() many times.
130 */
131void
132tgsi_exec_machine_bind_shader(
133   struct tgsi_exec_machine *mach,
134   const struct tgsi_token *tokens,
135   uint numSamplers,
136   struct tgsi_sampler *samplers)
137{
138   uint k;
139   struct tgsi_parse_context parse;
140   struct tgsi_exec_labels *labels = &mach->Labels;
141   struct tgsi_full_instruction *instructions;
142   struct tgsi_full_declaration *declarations;
143   uint maxInstructions = 10, numInstructions = 0;
144   uint maxDeclarations = 10, numDeclarations = 0;
145   uint instno = 0;
146
147#if 0
148   tgsi_dump(tokens, 0);
149#endif
150
151   util_init_math();
152
153   mach->Tokens = tokens;
154   mach->Samplers = samplers;
155
156   k = tgsi_parse_init (&parse, mach->Tokens);
157   if (k != TGSI_PARSE_OK) {
158      debug_printf( "Problem parsing!\n" );
159      return;
160   }
161
162   mach->Processor = parse.FullHeader.Processor.Processor;
163   mach->ImmLimit = 0;
164   labels->count = 0;
165
166   declarations = (struct tgsi_full_declaration *)
167      MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
168
169   if (!declarations) {
170      return;
171   }
172
173   instructions = (struct tgsi_full_instruction *)
174      MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
175
176   if (!instructions) {
177      FREE( declarations );
178      return;
179   }
180
181   while( !tgsi_parse_end_of_tokens( &parse ) ) {
182      uint pointer = parse.Position;
183      uint i;
184
185      tgsi_parse_token( &parse );
186      switch( parse.FullToken.Token.Type ) {
187      case TGSI_TOKEN_TYPE_DECLARATION:
188         /* save expanded declaration */
189         if (numDeclarations == maxDeclarations) {
190            declarations = REALLOC(declarations,
191                                   maxDeclarations
192                                   * sizeof(struct tgsi_full_declaration),
193                                   (maxDeclarations + 10)
194                                   * sizeof(struct tgsi_full_declaration));
195            maxDeclarations += 10;
196         }
197         memcpy(declarations + numDeclarations,
198                &parse.FullToken.FullDeclaration,
199                sizeof(declarations[0]));
200         numDeclarations++;
201         break;
202
203      case TGSI_TOKEN_TYPE_IMMEDIATE:
204         {
205            uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
206            assert( size % 4 == 0 );
207            assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
208
209            for( i = 0; i < size; i++ ) {
210               mach->Imms[mach->ImmLimit + i / 4][i % 4] =
211		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
212            }
213            mach->ImmLimit += size / 4;
214         }
215         break;
216
217      case TGSI_TOKEN_TYPE_INSTRUCTION:
218         assert( labels->count < MAX_LABELS );
219
220         labels->labels[labels->count][0] = instno;
221         labels->labels[labels->count][1] = pointer;
222         labels->count++;
223
224         /* save expanded instruction */
225         if (numInstructions == maxInstructions) {
226            instructions = REALLOC(instructions,
227                                   maxInstructions
228                                   * sizeof(struct tgsi_full_instruction),
229                                   (maxInstructions + 10)
230                                   * sizeof(struct tgsi_full_instruction));
231            maxInstructions += 10;
232         }
233         memcpy(instructions + numInstructions,
234                &parse.FullToken.FullInstruction,
235                sizeof(instructions[0]));
236         numInstructions++;
237         break;
238
239      default:
240         assert( 0 );
241      }
242   }
243   tgsi_parse_free (&parse);
244
245   if (mach->Declarations) {
246      FREE( mach->Declarations );
247   }
248   mach->Declarations = declarations;
249   mach->NumDeclarations = numDeclarations;
250
251   if (mach->Instructions) {
252      FREE( mach->Instructions );
253   }
254   mach->Instructions = instructions;
255   mach->NumInstructions = numInstructions;
256}
257
258
259void
260tgsi_exec_machine_init(
261   struct tgsi_exec_machine *mach )
262{
263   uint i;
264
265   mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
266   mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
267
268   /* Setup constants. */
269   for( i = 0; i < 4; i++ ) {
270      mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
271      mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
272      mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
273      mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
274      mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
275      mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
276      mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
277      mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
278      mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
279      mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
280   }
281}
282
283
284void
285tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
286{
287   if (mach->Instructions) {
288      FREE(mach->Instructions);
289      mach->Instructions = NULL;
290      mach->NumInstructions = 0;
291   }
292   if (mach->Declarations) {
293      FREE(mach->Declarations);
294      mach->Declarations = NULL;
295      mach->NumDeclarations = 0;
296   }
297}
298
299
300static void
301micro_abs(
302   union tgsi_exec_channel *dst,
303   const union tgsi_exec_channel *src )
304{
305   dst->f[0] = fabsf( src->f[0] );
306   dst->f[1] = fabsf( src->f[1] );
307   dst->f[2] = fabsf( src->f[2] );
308   dst->f[3] = fabsf( src->f[3] );
309}
310
311static void
312micro_add(
313   union tgsi_exec_channel *dst,
314   const union tgsi_exec_channel *src0,
315   const union tgsi_exec_channel *src1 )
316{
317   dst->f[0] = src0->f[0] + src1->f[0];
318   dst->f[1] = src0->f[1] + src1->f[1];
319   dst->f[2] = src0->f[2] + src1->f[2];
320   dst->f[3] = src0->f[3] + src1->f[3];
321}
322
323static void
324micro_iadd(
325   union tgsi_exec_channel *dst,
326   const union tgsi_exec_channel *src0,
327   const union tgsi_exec_channel *src1 )
328{
329   dst->i[0] = src0->i[0] + src1->i[0];
330   dst->i[1] = src0->i[1] + src1->i[1];
331   dst->i[2] = src0->i[2] + src1->i[2];
332   dst->i[3] = src0->i[3] + src1->i[3];
333}
334
335static void
336micro_and(
337   union tgsi_exec_channel *dst,
338   const union tgsi_exec_channel *src0,
339   const union tgsi_exec_channel *src1 )
340{
341   dst->u[0] = src0->u[0] & src1->u[0];
342   dst->u[1] = src0->u[1] & src1->u[1];
343   dst->u[2] = src0->u[2] & src1->u[2];
344   dst->u[3] = src0->u[3] & src1->u[3];
345}
346
347static void
348micro_ceil(
349   union tgsi_exec_channel *dst,
350   const union tgsi_exec_channel *src )
351{
352   dst->f[0] = ceilf( src->f[0] );
353   dst->f[1] = ceilf( src->f[1] );
354   dst->f[2] = ceilf( src->f[2] );
355   dst->f[3] = ceilf( src->f[3] );
356}
357
358static void
359micro_cos(
360   union tgsi_exec_channel *dst,
361   const union tgsi_exec_channel *src )
362{
363   dst->f[0] = cosf( src->f[0] );
364   dst->f[1] = cosf( src->f[1] );
365   dst->f[2] = cosf( src->f[2] );
366   dst->f[3] = cosf( src->f[3] );
367}
368
369static void
370micro_ddx(
371   union tgsi_exec_channel *dst,
372   const union tgsi_exec_channel *src )
373{
374   dst->f[0] =
375   dst->f[1] =
376   dst->f[2] =
377   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
378}
379
380static void
381micro_ddy(
382   union tgsi_exec_channel *dst,
383   const union tgsi_exec_channel *src )
384{
385   dst->f[0] =
386   dst->f[1] =
387   dst->f[2] =
388   dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
389}
390
391static void
392micro_div(
393   union tgsi_exec_channel *dst,
394   const union tgsi_exec_channel *src0,
395   const union tgsi_exec_channel *src1 )
396{
397   if (src1->f[0] != 0) {
398      dst->f[0] = src0->f[0] / src1->f[0];
399   }
400   if (src1->f[1] != 0) {
401      dst->f[1] = src0->f[1] / src1->f[1];
402   }
403   if (src1->f[2] != 0) {
404      dst->f[2] = src0->f[2] / src1->f[2];
405   }
406   if (src1->f[3] != 0) {
407      dst->f[3] = src0->f[3] / src1->f[3];
408   }
409}
410
411static void
412micro_udiv(
413   union tgsi_exec_channel *dst,
414   const union tgsi_exec_channel *src0,
415   const union tgsi_exec_channel *src1 )
416{
417   dst->u[0] = src0->u[0] / src1->u[0];
418   dst->u[1] = src0->u[1] / src1->u[1];
419   dst->u[2] = src0->u[2] / src1->u[2];
420   dst->u[3] = src0->u[3] / src1->u[3];
421}
422
423static void
424micro_eq(
425   union tgsi_exec_channel *dst,
426   const union tgsi_exec_channel *src0,
427   const union tgsi_exec_channel *src1,
428   const union tgsi_exec_channel *src2,
429   const union tgsi_exec_channel *src3 )
430{
431   dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
432   dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
433   dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
434   dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
435}
436
437static void
438micro_ieq(
439   union tgsi_exec_channel *dst,
440   const union tgsi_exec_channel *src0,
441   const union tgsi_exec_channel *src1,
442   const union tgsi_exec_channel *src2,
443   const union tgsi_exec_channel *src3 )
444{
445   dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
446   dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
447   dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
448   dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
449}
450
451static void
452micro_exp2(
453   union tgsi_exec_channel *dst,
454   const union tgsi_exec_channel *src)
455{
456#if FAST_MATH
457   dst->f[0] = util_fast_exp2( src->f[0] );
458   dst->f[1] = util_fast_exp2( src->f[1] );
459   dst->f[2] = util_fast_exp2( src->f[2] );
460   dst->f[3] = util_fast_exp2( src->f[3] );
461#else
462   dst->f[0] = powf( 2.0f, src->f[0] );
463   dst->f[1] = powf( 2.0f, src->f[1] );
464   dst->f[2] = powf( 2.0f, src->f[2] );
465   dst->f[3] = powf( 2.0f, src->f[3] );
466#endif
467}
468
469static void
470micro_f2ut(
471   union tgsi_exec_channel *dst,
472   const union tgsi_exec_channel *src )
473{
474   dst->u[0] = (uint) src->f[0];
475   dst->u[1] = (uint) src->f[1];
476   dst->u[2] = (uint) src->f[2];
477   dst->u[3] = (uint) src->f[3];
478}
479
480static void
481micro_flr(
482   union tgsi_exec_channel *dst,
483   const union tgsi_exec_channel *src )
484{
485   dst->f[0] = floorf( src->f[0] );
486   dst->f[1] = floorf( src->f[1] );
487   dst->f[2] = floorf( src->f[2] );
488   dst->f[3] = floorf( src->f[3] );
489}
490
491static void
492micro_frc(
493   union tgsi_exec_channel *dst,
494   const union tgsi_exec_channel *src )
495{
496   dst->f[0] = src->f[0] - floorf( src->f[0] );
497   dst->f[1] = src->f[1] - floorf( src->f[1] );
498   dst->f[2] = src->f[2] - floorf( src->f[2] );
499   dst->f[3] = src->f[3] - floorf( src->f[3] );
500}
501
502static void
503micro_ge(
504   union tgsi_exec_channel *dst,
505   const union tgsi_exec_channel *src0,
506   const union tgsi_exec_channel *src1,
507   const union tgsi_exec_channel *src2,
508   const union tgsi_exec_channel *src3 )
509{
510   dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
511   dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
512   dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
513   dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
514}
515
516static void
517micro_i2f(
518   union tgsi_exec_channel *dst,
519   const union tgsi_exec_channel *src )
520{
521   dst->f[0] = (float) src->i[0];
522   dst->f[1] = (float) src->i[1];
523   dst->f[2] = (float) src->i[2];
524   dst->f[3] = (float) src->i[3];
525}
526
527static void
528micro_lg2(
529   union tgsi_exec_channel *dst,
530   const union tgsi_exec_channel *src )
531{
532#if FAST_MATH
533   dst->f[0] = util_fast_log2( src->f[0] );
534   dst->f[1] = util_fast_log2( src->f[1] );
535   dst->f[2] = util_fast_log2( src->f[2] );
536   dst->f[3] = util_fast_log2( src->f[3] );
537#else
538   dst->f[0] = logf( src->f[0] ) * 1.442695f;
539   dst->f[1] = logf( src->f[1] ) * 1.442695f;
540   dst->f[2] = logf( src->f[2] ) * 1.442695f;
541   dst->f[3] = logf( src->f[3] ) * 1.442695f;
542#endif
543}
544
545static void
546micro_le(
547   union tgsi_exec_channel *dst,
548   const union tgsi_exec_channel *src0,
549   const union tgsi_exec_channel *src1,
550   const union tgsi_exec_channel *src2,
551   const union tgsi_exec_channel *src3 )
552{
553   dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
554   dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
555   dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
556   dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
557}
558
559static void
560micro_lt(
561   union tgsi_exec_channel *dst,
562   const union tgsi_exec_channel *src0,
563   const union tgsi_exec_channel *src1,
564   const union tgsi_exec_channel *src2,
565   const union tgsi_exec_channel *src3 )
566{
567   dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
568   dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
569   dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
570   dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
571}
572
573static void
574micro_ilt(
575   union tgsi_exec_channel *dst,
576   const union tgsi_exec_channel *src0,
577   const union tgsi_exec_channel *src1,
578   const union tgsi_exec_channel *src2,
579   const union tgsi_exec_channel *src3 )
580{
581   dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
582   dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
583   dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
584   dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
585}
586
587static void
588micro_ult(
589   union tgsi_exec_channel *dst,
590   const union tgsi_exec_channel *src0,
591   const union tgsi_exec_channel *src1,
592   const union tgsi_exec_channel *src2,
593   const union tgsi_exec_channel *src3 )
594{
595   dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
596   dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
597   dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
598   dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
599}
600
601static void
602micro_max(
603   union tgsi_exec_channel *dst,
604   const union tgsi_exec_channel *src0,
605   const union tgsi_exec_channel *src1 )
606{
607   dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
608   dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
609   dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
610   dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
611}
612
613static void
614micro_imax(
615   union tgsi_exec_channel *dst,
616   const union tgsi_exec_channel *src0,
617   const union tgsi_exec_channel *src1 )
618{
619   dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
620   dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
621   dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
622   dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
623}
624
625static void
626micro_umax(
627   union tgsi_exec_channel *dst,
628   const union tgsi_exec_channel *src0,
629   const union tgsi_exec_channel *src1 )
630{
631   dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
632   dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
633   dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
634   dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
635}
636
637static void
638micro_min(
639   union tgsi_exec_channel *dst,
640   const union tgsi_exec_channel *src0,
641   const union tgsi_exec_channel *src1 )
642{
643   dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
644   dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
645   dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
646   dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
647}
648
649static void
650micro_imin(
651   union tgsi_exec_channel *dst,
652   const union tgsi_exec_channel *src0,
653   const union tgsi_exec_channel *src1 )
654{
655   dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
656   dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
657   dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
658   dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
659}
660
661static void
662micro_umin(
663   union tgsi_exec_channel *dst,
664   const union tgsi_exec_channel *src0,
665   const union tgsi_exec_channel *src1 )
666{
667   dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
668   dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
669   dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
670   dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
671}
672
673static void
674micro_umod(
675   union tgsi_exec_channel *dst,
676   const union tgsi_exec_channel *src0,
677   const union tgsi_exec_channel *src1 )
678{
679   dst->u[0] = src0->u[0] % src1->u[0];
680   dst->u[1] = src0->u[1] % src1->u[1];
681   dst->u[2] = src0->u[2] % src1->u[2];
682   dst->u[3] = src0->u[3] % src1->u[3];
683}
684
685static void
686micro_mul(
687   union tgsi_exec_channel *dst,
688   const union tgsi_exec_channel *src0,
689   const union tgsi_exec_channel *src1 )
690{
691   dst->f[0] = src0->f[0] * src1->f[0];
692   dst->f[1] = src0->f[1] * src1->f[1];
693   dst->f[2] = src0->f[2] * src1->f[2];
694   dst->f[3] = src0->f[3] * src1->f[3];
695}
696
697static void
698micro_imul(
699   union tgsi_exec_channel *dst,
700   const union tgsi_exec_channel *src0,
701   const union tgsi_exec_channel *src1 )
702{
703   dst->i[0] = src0->i[0] * src1->i[0];
704   dst->i[1] = src0->i[1] * src1->i[1];
705   dst->i[2] = src0->i[2] * src1->i[2];
706   dst->i[3] = src0->i[3] * src1->i[3];
707}
708
709static void
710micro_imul64(
711   union tgsi_exec_channel *dst0,
712   union tgsi_exec_channel *dst1,
713   const union tgsi_exec_channel *src0,
714   const union tgsi_exec_channel *src1 )
715{
716   dst1->i[0] = src0->i[0] * src1->i[0];
717   dst1->i[1] = src0->i[1] * src1->i[1];
718   dst1->i[2] = src0->i[2] * src1->i[2];
719   dst1->i[3] = src0->i[3] * src1->i[3];
720   dst0->i[0] = 0;
721   dst0->i[1] = 0;
722   dst0->i[2] = 0;
723   dst0->i[3] = 0;
724}
725
726static void
727micro_umul64(
728   union tgsi_exec_channel *dst0,
729   union tgsi_exec_channel *dst1,
730   const union tgsi_exec_channel *src0,
731   const union tgsi_exec_channel *src1 )
732{
733   dst1->u[0] = src0->u[0] * src1->u[0];
734   dst1->u[1] = src0->u[1] * src1->u[1];
735   dst1->u[2] = src0->u[2] * src1->u[2];
736   dst1->u[3] = src0->u[3] * src1->u[3];
737   dst0->u[0] = 0;
738   dst0->u[1] = 0;
739   dst0->u[2] = 0;
740   dst0->u[3] = 0;
741}
742
743static void
744micro_movc(
745   union tgsi_exec_channel *dst,
746   const union tgsi_exec_channel *src0,
747   const union tgsi_exec_channel *src1,
748   const union tgsi_exec_channel *src2 )
749{
750   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
751   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
752   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
753   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
754}
755
756static void
757micro_neg(
758   union tgsi_exec_channel *dst,
759   const union tgsi_exec_channel *src )
760{
761   dst->f[0] = -src->f[0];
762   dst->f[1] = -src->f[1];
763   dst->f[2] = -src->f[2];
764   dst->f[3] = -src->f[3];
765}
766
767static void
768micro_ineg(
769   union tgsi_exec_channel *dst,
770   const union tgsi_exec_channel *src )
771{
772   dst->i[0] = -src->i[0];
773   dst->i[1] = -src->i[1];
774   dst->i[2] = -src->i[2];
775   dst->i[3] = -src->i[3];
776}
777
778static void
779micro_not(
780   union tgsi_exec_channel *dst,
781   const union tgsi_exec_channel *src )
782{
783   dst->u[0] = ~src->u[0];
784   dst->u[1] = ~src->u[1];
785   dst->u[2] = ~src->u[2];
786   dst->u[3] = ~src->u[3];
787}
788
789static void
790micro_or(
791   union tgsi_exec_channel *dst,
792   const union tgsi_exec_channel *src0,
793   const union tgsi_exec_channel *src1 )
794{
795   dst->u[0] = src0->u[0] | src1->u[0];
796   dst->u[1] = src0->u[1] | src1->u[1];
797   dst->u[2] = src0->u[2] | src1->u[2];
798   dst->u[3] = src0->u[3] | src1->u[3];
799}
800
801static void
802micro_pow(
803   union tgsi_exec_channel *dst,
804   const union tgsi_exec_channel *src0,
805   const union tgsi_exec_channel *src1 )
806{
807#if FAST_MATH
808   dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
809   dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
810   dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
811   dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
812#else
813   dst->f[0] = powf( src0->f[0], src1->f[0] );
814   dst->f[1] = powf( src0->f[1], src1->f[1] );
815   dst->f[2] = powf( src0->f[2], src1->f[2] );
816   dst->f[3] = powf( src0->f[3], src1->f[3] );
817#endif
818}
819
820static void
821micro_rnd(
822   union tgsi_exec_channel *dst,
823   const union tgsi_exec_channel *src )
824{
825   dst->f[0] = floorf( src->f[0] + 0.5f );
826   dst->f[1] = floorf( src->f[1] + 0.5f );
827   dst->f[2] = floorf( src->f[2] + 0.5f );
828   dst->f[3] = floorf( src->f[3] + 0.5f );
829}
830
831static void
832micro_sgn(
833   union tgsi_exec_channel *dst,
834   const union tgsi_exec_channel *src )
835{
836   dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
837   dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
838   dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
839   dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
840}
841
842static void
843micro_shl(
844   union tgsi_exec_channel *dst,
845   const union tgsi_exec_channel *src0,
846   const union tgsi_exec_channel *src1 )
847{
848   dst->i[0] = src0->i[0] << src1->i[0];
849   dst->i[1] = src0->i[1] << src1->i[1];
850   dst->i[2] = src0->i[2] << src1->i[2];
851   dst->i[3] = src0->i[3] << src1->i[3];
852}
853
854static void
855micro_ishr(
856   union tgsi_exec_channel *dst,
857   const union tgsi_exec_channel *src0,
858   const union tgsi_exec_channel *src1 )
859{
860   dst->i[0] = src0->i[0] >> src1->i[0];
861   dst->i[1] = src0->i[1] >> src1->i[1];
862   dst->i[2] = src0->i[2] >> src1->i[2];
863   dst->i[3] = src0->i[3] >> src1->i[3];
864}
865
866static void
867micro_trunc(
868   union tgsi_exec_channel *dst,
869   const union tgsi_exec_channel *src0 )
870{
871   dst->f[0] = (float) (int) src0->f[0];
872   dst->f[1] = (float) (int) src0->f[1];
873   dst->f[2] = (float) (int) src0->f[2];
874   dst->f[3] = (float) (int) src0->f[3];
875}
876
877static void
878micro_ushr(
879   union tgsi_exec_channel *dst,
880   const union tgsi_exec_channel *src0,
881   const union tgsi_exec_channel *src1 )
882{
883   dst->u[0] = src0->u[0] >> src1->u[0];
884   dst->u[1] = src0->u[1] >> src1->u[1];
885   dst->u[2] = src0->u[2] >> src1->u[2];
886   dst->u[3] = src0->u[3] >> src1->u[3];
887}
888
889static void
890micro_sin(
891   union tgsi_exec_channel *dst,
892   const union tgsi_exec_channel *src )
893{
894   dst->f[0] = sinf( src->f[0] );
895   dst->f[1] = sinf( src->f[1] );
896   dst->f[2] = sinf( src->f[2] );
897   dst->f[3] = sinf( src->f[3] );
898}
899
900static void
901micro_sqrt( union tgsi_exec_channel *dst,
902            const union tgsi_exec_channel *src )
903{
904   dst->f[0] = sqrtf( src->f[0] );
905   dst->f[1] = sqrtf( src->f[1] );
906   dst->f[2] = sqrtf( src->f[2] );
907   dst->f[3] = sqrtf( src->f[3] );
908}
909
910static void
911micro_sub(
912   union tgsi_exec_channel *dst,
913   const union tgsi_exec_channel *src0,
914   const union tgsi_exec_channel *src1 )
915{
916   dst->f[0] = src0->f[0] - src1->f[0];
917   dst->f[1] = src0->f[1] - src1->f[1];
918   dst->f[2] = src0->f[2] - src1->f[2];
919   dst->f[3] = src0->f[3] - src1->f[3];
920}
921
922static void
923micro_u2f(
924   union tgsi_exec_channel *dst,
925   const union tgsi_exec_channel *src )
926{
927   dst->f[0] = (float) src->u[0];
928   dst->f[1] = (float) src->u[1];
929   dst->f[2] = (float) src->u[2];
930   dst->f[3] = (float) src->u[3];
931}
932
933static void
934micro_xor(
935   union tgsi_exec_channel *dst,
936   const union tgsi_exec_channel *src0,
937   const union tgsi_exec_channel *src1 )
938{
939   dst->u[0] = src0->u[0] ^ src1->u[0];
940   dst->u[1] = src0->u[1] ^ src1->u[1];
941   dst->u[2] = src0->u[2] ^ src1->u[2];
942   dst->u[3] = src0->u[3] ^ src1->u[3];
943}
944
945static void
946fetch_src_file_channel(
947   const struct tgsi_exec_machine *mach,
948   const uint file,
949   const uint swizzle,
950   const union tgsi_exec_channel *index,
951   union tgsi_exec_channel *chan )
952{
953   switch( swizzle ) {
954   case TGSI_EXTSWIZZLE_X:
955   case TGSI_EXTSWIZZLE_Y:
956   case TGSI_EXTSWIZZLE_Z:
957   case TGSI_EXTSWIZZLE_W:
958      switch( file ) {
959      case TGSI_FILE_CONSTANT:
960         assert(mach->Consts);
961         if (index->i[0] < 0)
962            chan->f[0] = 0.0f;
963         else
964            chan->f[0] = mach->Consts[index->i[0]][swizzle];
965         if (index->i[1] < 0)
966            chan->f[1] = 0.0f;
967         else
968            chan->f[1] = mach->Consts[index->i[1]][swizzle];
969         if (index->i[2] < 0)
970            chan->f[2] = 0.0f;
971         else
972            chan->f[2] = mach->Consts[index->i[2]][swizzle];
973         if (index->i[3] < 0)
974            chan->f[3] = 0.0f;
975         else
976            chan->f[3] = mach->Consts[index->i[3]][swizzle];
977         break;
978
979      case TGSI_FILE_INPUT:
980         chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
981         chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
982         chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
983         chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
984         break;
985
986      case TGSI_FILE_TEMPORARY:
987         assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
988         chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
989         chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
990         chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
991         chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
992         break;
993
994      case TGSI_FILE_IMMEDIATE:
995         assert( index->i[0] < (int) mach->ImmLimit );
996         chan->f[0] = mach->Imms[index->i[0]][swizzle];
997         assert( index->i[1] < (int) mach->ImmLimit );
998         chan->f[1] = mach->Imms[index->i[1]][swizzle];
999         assert( index->i[2] < (int) mach->ImmLimit );
1000         chan->f[2] = mach->Imms[index->i[2]][swizzle];
1001         assert( index->i[3] < (int) mach->ImmLimit );
1002         chan->f[3] = mach->Imms[index->i[3]][swizzle];
1003         break;
1004
1005      case TGSI_FILE_ADDRESS:
1006         chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1007         chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1008         chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1009         chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1010         break;
1011
1012      case TGSI_FILE_OUTPUT:
1013         /* vertex/fragment output vars can be read too */
1014         chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1015         chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1016         chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1017         chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1018         break;
1019
1020      default:
1021         assert( 0 );
1022      }
1023      break;
1024
1025   case TGSI_EXTSWIZZLE_ZERO:
1026      *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1027      break;
1028
1029   case TGSI_EXTSWIZZLE_ONE:
1030      *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1031      break;
1032
1033   default:
1034      assert( 0 );
1035   }
1036}
1037
1038static void
1039fetch_source(
1040   const struct tgsi_exec_machine *mach,
1041   union tgsi_exec_channel *chan,
1042   const struct tgsi_full_src_register *reg,
1043   const uint chan_index )
1044{
1045   union tgsi_exec_channel index;
1046   uint swizzle;
1047
1048   index.i[0] =
1049   index.i[1] =
1050   index.i[2] =
1051   index.i[3] = reg->SrcRegister.Index;
1052
1053   if (reg->SrcRegister.Indirect) {
1054      union tgsi_exec_channel index2;
1055      union tgsi_exec_channel indir_index;
1056      const uint execmask = mach->ExecMask;
1057      uint i;
1058
1059      /* which address register (always zero now) */
1060      index2.i[0] =
1061      index2.i[1] =
1062      index2.i[2] =
1063      index2.i[3] = reg->SrcRegisterInd.Index;
1064
1065      /* get current value of address register[swizzle] */
1066      swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1067      fetch_src_file_channel(
1068         mach,
1069         reg->SrcRegisterInd.File,
1070         swizzle,
1071         &index2,
1072         &indir_index );
1073
1074      /* add value of address register to the offset */
1075      index.i[0] += (int) indir_index.f[0];
1076      index.i[1] += (int) indir_index.f[1];
1077      index.i[2] += (int) indir_index.f[2];
1078      index.i[3] += (int) indir_index.f[3];
1079
1080      /* for disabled execution channels, zero-out the index to
1081       * avoid using a potential garbage value.
1082       */
1083      for (i = 0; i < QUAD_SIZE; i++) {
1084         if ((execmask & (1 << i)) == 0)
1085            index.i[i] = 0;
1086      }
1087   }
1088
1089   if( reg->SrcRegister.Dimension ) {
1090      switch( reg->SrcRegister.File ) {
1091      case TGSI_FILE_INPUT:
1092         index.i[0] *= 17;
1093         index.i[1] *= 17;
1094         index.i[2] *= 17;
1095         index.i[3] *= 17;
1096         break;
1097      case TGSI_FILE_CONSTANT:
1098         index.i[0] *= 4096;
1099         index.i[1] *= 4096;
1100         index.i[2] *= 4096;
1101         index.i[3] *= 4096;
1102         break;
1103      default:
1104         assert( 0 );
1105      }
1106
1107      index.i[0] += reg->SrcRegisterDim.Index;
1108      index.i[1] += reg->SrcRegisterDim.Index;
1109      index.i[2] += reg->SrcRegisterDim.Index;
1110      index.i[3] += reg->SrcRegisterDim.Index;
1111
1112      if (reg->SrcRegisterDim.Indirect) {
1113         union tgsi_exec_channel index2;
1114         union tgsi_exec_channel indir_index;
1115         const uint execmask = mach->ExecMask;
1116         uint i;
1117
1118         index2.i[0] =
1119         index2.i[1] =
1120         index2.i[2] =
1121         index2.i[3] = reg->SrcRegisterDimInd.Index;
1122
1123         swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1124         fetch_src_file_channel(
1125            mach,
1126            reg->SrcRegisterDimInd.File,
1127            swizzle,
1128            &index2,
1129            &indir_index );
1130
1131         index.i[0] += (int) indir_index.f[0];
1132         index.i[1] += (int) indir_index.f[1];
1133         index.i[2] += (int) indir_index.f[2];
1134         index.i[3] += (int) indir_index.f[3];
1135
1136         /* for disabled execution channels, zero-out the index to
1137          * avoid using a potential garbage value.
1138          */
1139         for (i = 0; i < QUAD_SIZE; i++) {
1140            if ((execmask & (1 << i)) == 0)
1141               index.i[i] = 0;
1142         }
1143      }
1144   }
1145
1146   swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1147   fetch_src_file_channel(
1148      mach,
1149      reg->SrcRegister.File,
1150      swizzle,
1151      &index,
1152      chan );
1153
1154   switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1155   case TGSI_UTIL_SIGN_CLEAR:
1156      micro_abs( chan, chan );
1157      break;
1158
1159   case TGSI_UTIL_SIGN_SET:
1160      micro_abs( chan, chan );
1161      micro_neg( chan, chan );
1162      break;
1163
1164   case TGSI_UTIL_SIGN_TOGGLE:
1165      micro_neg( chan, chan );
1166      break;
1167
1168   case TGSI_UTIL_SIGN_KEEP:
1169      break;
1170   }
1171
1172   if (reg->SrcRegisterExtMod.Complement) {
1173      micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1174   }
1175}
1176
1177static void
1178store_dest(
1179   struct tgsi_exec_machine *mach,
1180   const union tgsi_exec_channel *chan,
1181   const struct tgsi_full_dst_register *reg,
1182   const struct tgsi_full_instruction *inst,
1183   uint chan_index )
1184{
1185   uint i;
1186   union tgsi_exec_channel null;
1187   union tgsi_exec_channel *dst;
1188   uint execmask = mach->ExecMask;
1189
1190   switch (reg->DstRegister.File) {
1191   case TGSI_FILE_NULL:
1192      dst = &null;
1193      break;
1194
1195   case TGSI_FILE_OUTPUT:
1196      dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1197                           + reg->DstRegister.Index].xyzw[chan_index];
1198      break;
1199
1200   case TGSI_FILE_TEMPORARY:
1201      assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1202      dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1203      break;
1204
1205   case TGSI_FILE_ADDRESS:
1206      dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1207      break;
1208
1209   default:
1210      assert( 0 );
1211      return;
1212   }
1213
1214   if (inst->InstructionExtNv.CondFlowEnable) {
1215      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1216      uint swizzle;
1217      uint shift;
1218      uint mask;
1219      uint test;
1220
1221      /* Only CC0 supported.
1222       */
1223      assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1224
1225      switch (chan_index) {
1226      case CHAN_X:
1227         swizzle = inst->InstructionExtNv.CondSwizzleX;
1228         break;
1229      case CHAN_Y:
1230         swizzle = inst->InstructionExtNv.CondSwizzleY;
1231         break;
1232      case CHAN_Z:
1233         swizzle = inst->InstructionExtNv.CondSwizzleZ;
1234         break;
1235      case CHAN_W:
1236         swizzle = inst->InstructionExtNv.CondSwizzleW;
1237         break;
1238      default:
1239         assert( 0 );
1240         return;
1241      }
1242
1243      switch (swizzle) {
1244      case TGSI_SWIZZLE_X:
1245         shift = TGSI_EXEC_CC_X_SHIFT;
1246         mask = TGSI_EXEC_CC_X_MASK;
1247         break;
1248      case TGSI_SWIZZLE_Y:
1249         shift = TGSI_EXEC_CC_Y_SHIFT;
1250         mask = TGSI_EXEC_CC_Y_MASK;
1251         break;
1252      case TGSI_SWIZZLE_Z:
1253         shift = TGSI_EXEC_CC_Z_SHIFT;
1254         mask = TGSI_EXEC_CC_Z_MASK;
1255         break;
1256      case TGSI_SWIZZLE_W:
1257         shift = TGSI_EXEC_CC_W_SHIFT;
1258         mask = TGSI_EXEC_CC_W_MASK;
1259         break;
1260      default:
1261         assert( 0 );
1262         return;
1263      }
1264
1265      switch (inst->InstructionExtNv.CondMask) {
1266      case TGSI_CC_GT:
1267         test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1268         for (i = 0; i < QUAD_SIZE; i++)
1269            if (cc->u[i] & test)
1270               execmask &= ~(1 << i);
1271         break;
1272
1273      case TGSI_CC_EQ:
1274         test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1275         for (i = 0; i < QUAD_SIZE; i++)
1276            if (cc->u[i] & test)
1277               execmask &= ~(1 << i);
1278         break;
1279
1280      case TGSI_CC_LT:
1281         test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1282         for (i = 0; i < QUAD_SIZE; i++)
1283            if (cc->u[i] & test)
1284               execmask &= ~(1 << i);
1285         break;
1286
1287      case TGSI_CC_GE:
1288         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1289         for (i = 0; i < QUAD_SIZE; i++)
1290            if (cc->u[i] & test)
1291               execmask &= ~(1 << i);
1292         break;
1293
1294      case TGSI_CC_LE:
1295         test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1296         for (i = 0; i < QUAD_SIZE; i++)
1297            if (cc->u[i] & test)
1298               execmask &= ~(1 << i);
1299         break;
1300
1301      case TGSI_CC_NE:
1302         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1303         for (i = 0; i < QUAD_SIZE; i++)
1304            if (cc->u[i] & test)
1305               execmask &= ~(1 << i);
1306         break;
1307
1308      case TGSI_CC_TR:
1309         break;
1310
1311      case TGSI_CC_FL:
1312         for (i = 0; i < QUAD_SIZE; i++)
1313            execmask &= ~(1 << i);
1314         break;
1315
1316      default:
1317         assert( 0 );
1318         return;
1319      }
1320   }
1321
1322   switch (inst->Instruction.Saturate) {
1323   case TGSI_SAT_NONE:
1324      for (i = 0; i < QUAD_SIZE; i++)
1325         if (execmask & (1 << i))
1326            dst->i[i] = chan->i[i];
1327      break;
1328
1329   case TGSI_SAT_ZERO_ONE:
1330      for (i = 0; i < QUAD_SIZE; i++)
1331         if (execmask & (1 << i)) {
1332            if (chan->f[i] < 0.0f)
1333               dst->f[i] = 0.0f;
1334            else if (chan->f[i] > 1.0f)
1335               dst->f[i] = 1.0f;
1336            else
1337               dst->i[i] = chan->i[i];
1338         }
1339      break;
1340
1341   case TGSI_SAT_MINUS_PLUS_ONE:
1342      for (i = 0; i < QUAD_SIZE; i++)
1343         if (execmask & (1 << i)) {
1344            if (chan->f[i] < -1.0f)
1345               dst->f[i] = -1.0f;
1346            else if (chan->f[i] > 1.0f)
1347               dst->f[i] = 1.0f;
1348            else
1349               dst->i[i] = chan->i[i];
1350         }
1351      break;
1352
1353   default:
1354      assert( 0 );
1355   }
1356
1357   if (inst->InstructionExtNv.CondDstUpdate) {
1358      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1359      uint shift;
1360      uint mask;
1361
1362      /* Only CC0 supported.
1363       */
1364      assert( inst->InstructionExtNv.CondDstIndex < 1 );
1365
1366      switch (chan_index) {
1367      case CHAN_X:
1368         shift = TGSI_EXEC_CC_X_SHIFT;
1369         mask = ~TGSI_EXEC_CC_X_MASK;
1370         break;
1371      case CHAN_Y:
1372         shift = TGSI_EXEC_CC_Y_SHIFT;
1373         mask = ~TGSI_EXEC_CC_Y_MASK;
1374         break;
1375      case CHAN_Z:
1376         shift = TGSI_EXEC_CC_Z_SHIFT;
1377         mask = ~TGSI_EXEC_CC_Z_MASK;
1378         break;
1379      case CHAN_W:
1380         shift = TGSI_EXEC_CC_W_SHIFT;
1381         mask = ~TGSI_EXEC_CC_W_MASK;
1382         break;
1383      default:
1384         assert( 0 );
1385         return;
1386      }
1387
1388      for (i = 0; i < QUAD_SIZE; i++)
1389         if (execmask & (1 << i)) {
1390            cc->u[i] &= mask;
1391            if (dst->f[i] < 0.0f)
1392               cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1393            else if (dst->f[i] > 0.0f)
1394               cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1395            else if (dst->f[i] == 0.0f)
1396               cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1397            else
1398               cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1399         }
1400   }
1401}
1402
1403#define FETCH(VAL,INDEX,CHAN)\
1404    fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1405
1406#define STORE(VAL,INDEX,CHAN)\
1407    store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1408
1409
1410/**
1411 * Execute ARB-style KIL which is predicated by a src register.
1412 * Kill fragment if any of the four values is less than zero.
1413 */
1414static void
1415exec_kil(struct tgsi_exec_machine *mach,
1416         const struct tgsi_full_instruction *inst)
1417{
1418   uint uniquemask;
1419   uint chan_index;
1420   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1421   union tgsi_exec_channel r[1];
1422
1423   /* This mask stores component bits that were already tested. Note that
1424    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1425    * tested. */
1426   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1427
1428   for (chan_index = 0; chan_index < 4; chan_index++)
1429   {
1430      uint swizzle;
1431      uint i;
1432
1433      /* unswizzle channel */
1434      swizzle = tgsi_util_get_full_src_register_extswizzle (
1435                        &inst->FullSrcRegisters[0],
1436                        chan_index);
1437
1438      /* check if the component has not been already tested */
1439      if (uniquemask & (1 << swizzle))
1440         continue;
1441      uniquemask |= 1 << swizzle;
1442
1443      FETCH(&r[0], 0, chan_index);
1444      for (i = 0; i < 4; i++)
1445         if (r[0].f[i] < 0.0f)
1446            kilmask |= 1 << i;
1447   }
1448
1449   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1450}
1451
1452/**
1453 * Execute NVIDIA-style KIL which is predicated by a condition code.
1454 * Kill fragment if the condition code is TRUE.
1455 */
1456static void
1457exec_kilp(struct tgsi_exec_machine *mach,
1458          const struct tgsi_full_instruction *inst)
1459{
1460   uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1461
1462   if (inst->InstructionExtNv.CondFlowEnable) {
1463      uint swizzle[4];
1464      uint chan_index;
1465
1466      kilmask = 0x0;
1467
1468      swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1469      swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1470      swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1471      swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1472
1473      for (chan_index = 0; chan_index < 4; chan_index++)
1474      {
1475         uint i;
1476
1477         for (i = 0; i < 4; i++) {
1478            /* TODO: evaluate the condition code */
1479            if (0)
1480               kilmask |= 1 << i;
1481         }
1482      }
1483   }
1484   else {
1485      /* "unconditional" kil */
1486      kilmask = mach->ExecMask;
1487   }
1488   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1489}
1490
1491
1492/*
1493 * Fetch a texel using STR texture coordinates.
1494 */
1495static void
1496fetch_texel( struct tgsi_sampler *sampler,
1497             const union tgsi_exec_channel *s,
1498             const union tgsi_exec_channel *t,
1499             const union tgsi_exec_channel *p,
1500             float lodbias,  /* XXX should be float[4] */
1501             union tgsi_exec_channel *r,
1502             union tgsi_exec_channel *g,
1503             union tgsi_exec_channel *b,
1504             union tgsi_exec_channel *a )
1505{
1506   uint j;
1507   float rgba[NUM_CHANNELS][QUAD_SIZE];
1508
1509   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1510
1511   for (j = 0; j < 4; j++) {
1512      r->f[j] = rgba[0][j];
1513      g->f[j] = rgba[1][j];
1514      b->f[j] = rgba[2][j];
1515      a->f[j] = rgba[3][j];
1516   }
1517}
1518
1519
1520static void
1521exec_tex(struct tgsi_exec_machine *mach,
1522         const struct tgsi_full_instruction *inst,
1523         boolean biasLod,
1524         boolean projected)
1525{
1526   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1527   union tgsi_exec_channel r[8];
1528   uint chan_index;
1529   float lodBias;
1530
1531   /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
1532
1533   switch (inst->InstructionExtTexture.Texture) {
1534   case TGSI_TEXTURE_1D:
1535
1536      FETCH(&r[0], 0, CHAN_X);
1537
1538      if (projected) {
1539         FETCH(&r[1], 0, CHAN_W);
1540         micro_div( &r[0], &r[0], &r[1] );
1541      }
1542
1543      if (biasLod) {
1544         FETCH(&r[1], 0, CHAN_W);
1545         lodBias = r[2].f[0];
1546      }
1547      else
1548         lodBias = 0.0;
1549
1550      fetch_texel(&mach->Samplers[unit],
1551                  &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */
1552                  &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1553      break;
1554
1555   case TGSI_TEXTURE_2D:
1556   case TGSI_TEXTURE_RECT:
1557
1558      FETCH(&r[0], 0, CHAN_X);
1559      FETCH(&r[1], 0, CHAN_Y);
1560      FETCH(&r[2], 0, CHAN_Z);
1561
1562      if (projected) {
1563         FETCH(&r[3], 0, CHAN_W);
1564         micro_div( &r[0], &r[0], &r[3] );
1565         micro_div( &r[1], &r[1], &r[3] );
1566         micro_div( &r[2], &r[2], &r[3] );
1567      }
1568
1569      if (biasLod) {
1570         FETCH(&r[3], 0, CHAN_W);
1571         lodBias = r[3].f[0];
1572      }
1573      else
1574         lodBias = 0.0;
1575
1576      fetch_texel(&mach->Samplers[unit],
1577                  &r[0], &r[1], &r[2], lodBias,  /* inputs */
1578                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */
1579      break;
1580
1581   case TGSI_TEXTURE_3D:
1582   case TGSI_TEXTURE_CUBE:
1583
1584      FETCH(&r[0], 0, CHAN_X);
1585      FETCH(&r[1], 0, CHAN_Y);
1586      FETCH(&r[2], 0, CHAN_Z);
1587
1588      if (projected) {
1589         FETCH(&r[3], 0, CHAN_W);
1590         micro_div( &r[0], &r[0], &r[3] );
1591         micro_div( &r[1], &r[1], &r[3] );
1592         micro_div( &r[2], &r[2], &r[3] );
1593      }
1594
1595      if (biasLod) {
1596         FETCH(&r[3], 0, CHAN_W);
1597         lodBias = r[3].f[0];
1598      }
1599      else
1600         lodBias = 0.0;
1601
1602      fetch_texel(&mach->Samplers[unit],
1603                  &r[0], &r[1], &r[2], lodBias,
1604                  &r[0], &r[1], &r[2], &r[3]);
1605      break;
1606
1607   default:
1608      assert (0);
1609   }
1610
1611   FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1612      STORE( &r[chan_index], 0, chan_index );
1613   }
1614}
1615
1616
1617/**
1618 * Evaluate a constant-valued coefficient at the position of the
1619 * current quad.
1620 */
1621static void
1622eval_constant_coef(
1623   struct tgsi_exec_machine *mach,
1624   unsigned attrib,
1625   unsigned chan )
1626{
1627   unsigned i;
1628
1629   for( i = 0; i < QUAD_SIZE; i++ ) {
1630      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1631   }
1632}
1633
1634/**
1635 * Evaluate a linear-valued coefficient at the position of the
1636 * current quad.
1637 */
1638static void
1639eval_linear_coef(
1640   struct tgsi_exec_machine *mach,
1641   unsigned attrib,
1642   unsigned chan )
1643{
1644   const float x = mach->QuadPos.xyzw[0].f[0];
1645   const float y = mach->QuadPos.xyzw[1].f[0];
1646   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1647   const float dady = mach->InterpCoefs[attrib].dady[chan];
1648   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1649   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1650   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1651   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1652   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1653}
1654
1655/**
1656 * Evaluate a perspective-valued coefficient at the position of the
1657 * current quad.
1658 */
1659static void
1660eval_perspective_coef(
1661   struct tgsi_exec_machine *mach,
1662   unsigned attrib,
1663   unsigned chan )
1664{
1665   const float x = mach->QuadPos.xyzw[0].f[0];
1666   const float y = mach->QuadPos.xyzw[1].f[0];
1667   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1668   const float dady = mach->InterpCoefs[attrib].dady[chan];
1669   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1670   const float *w = mach->QuadPos.xyzw[3].f;
1671   /* divide by W here */
1672   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1673   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1674   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1675   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1676}
1677
1678
1679typedef void (* eval_coef_func)(
1680   struct tgsi_exec_machine *mach,
1681   unsigned attrib,
1682   unsigned chan );
1683
1684static void
1685exec_declaration(
1686   struct tgsi_exec_machine *mach,
1687   const struct tgsi_full_declaration *decl )
1688{
1689   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1690      if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1691         unsigned first, last, mask;
1692         eval_coef_func eval;
1693
1694         first = decl->DeclarationRange.First;
1695         last = decl->DeclarationRange.Last;
1696         mask = decl->Declaration.UsageMask;
1697
1698         switch( decl->Declaration.Interpolate ) {
1699         case TGSI_INTERPOLATE_CONSTANT:
1700            eval = eval_constant_coef;
1701            break;
1702
1703         case TGSI_INTERPOLATE_LINEAR:
1704            eval = eval_linear_coef;
1705            break;
1706
1707         case TGSI_INTERPOLATE_PERSPECTIVE:
1708            eval = eval_perspective_coef;
1709            break;
1710
1711         default:
1712            assert( 0 );
1713         }
1714
1715         if( mask == TGSI_WRITEMASK_XYZW ) {
1716            unsigned i, j;
1717
1718            for( i = first; i <= last; i++ ) {
1719               for( j = 0; j < NUM_CHANNELS; j++ ) {
1720                  eval( mach, i, j );
1721               }
1722            }
1723         }
1724         else {
1725            unsigned i, j;
1726
1727            for( j = 0; j < NUM_CHANNELS; j++ ) {
1728               if( mask & (1 << j) ) {
1729                  for( i = first; i <= last; i++ ) {
1730                     eval( mach, i, j );
1731                  }
1732               }
1733            }
1734         }
1735      }
1736   }
1737}
1738
1739static void
1740exec_instruction(
1741   struct tgsi_exec_machine *mach,
1742   const struct tgsi_full_instruction *inst,
1743   int *pc )
1744{
1745   uint chan_index;
1746   union tgsi_exec_channel r[8];
1747
1748   (*pc)++;
1749
1750   switch (inst->Instruction.Opcode) {
1751   case TGSI_OPCODE_ARL:
1752      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1753         FETCH( &r[0], 0, chan_index );
1754         micro_trunc( &r[0], &r[0] );
1755         STORE( &r[0], 0, chan_index );
1756      }
1757      break;
1758
1759   case TGSI_OPCODE_MOV:
1760   case TGSI_OPCODE_SWZ:
1761      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1762         FETCH( &r[0], 0, chan_index );
1763         STORE( &r[0], 0, chan_index );
1764      }
1765      break;
1766
1767   case TGSI_OPCODE_LIT:
1768      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1769	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1770      }
1771
1772      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1773	 FETCH( &r[0], 0, CHAN_X );
1774	 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1775	    micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1776	    STORE( &r[0], 0, CHAN_Y );
1777	 }
1778
1779	 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1780	    FETCH( &r[1], 0, CHAN_Y );
1781	    micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1782
1783	    FETCH( &r[2], 0, CHAN_W );
1784	    micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1785	    micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1786	    micro_pow( &r[1], &r[1], &r[2] );
1787	    micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1788	    STORE( &r[0], 0, CHAN_Z );
1789	 }
1790      }
1791
1792      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1793	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1794      }
1795      break;
1796
1797   case TGSI_OPCODE_RCP:
1798   /* TGSI_OPCODE_RECIP */
1799      FETCH( &r[0], 0, CHAN_X );
1800      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1801      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1802	 STORE( &r[0], 0, chan_index );
1803      }
1804      break;
1805
1806   case TGSI_OPCODE_RSQ:
1807   /* TGSI_OPCODE_RECIPSQRT */
1808      FETCH( &r[0], 0, CHAN_X );
1809      micro_sqrt( &r[0], &r[0] );
1810      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1811      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1812	 STORE( &r[0], 0, chan_index );
1813      }
1814      break;
1815
1816   case TGSI_OPCODE_EXP:
1817      FETCH( &r[0], 0, CHAN_X );
1818      micro_flr( &r[1], &r[0] );  /* r1 = floor(r0) */
1819      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1820         micro_exp2( &r[2], &r[1] );       /* r2 = 2 ^ r1 */
1821         STORE( &r[2], 0, CHAN_X );        /* store r2 */
1822      }
1823      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1824         micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1825         STORE( &r[2], 0, CHAN_Y );        /* store r2 */
1826      }
1827      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1828         micro_exp2( &r[2], &r[0] );       /* r2 = 2 ^ r0 */
1829         STORE( &r[2], 0, CHAN_Z );        /* store r2 */
1830      }
1831      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1832         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1833      }
1834      break;
1835
1836   case TGSI_OPCODE_LOG:
1837      FETCH( &r[0], 0, CHAN_X );
1838      micro_abs( &r[2], &r[0] );  /* r2 = abs(r0) */
1839      micro_lg2( &r[1], &r[2] );  /* r1 = lg2(r2) */
1840      micro_flr( &r[0], &r[1] );  /* r0 = floor(r1) */
1841      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1842         STORE( &r[0], 0, CHAN_X );
1843      }
1844      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1845         micro_exp2( &r[0], &r[0] );       /* r0 = 2 ^ r0 */
1846         micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1847         STORE( &r[0], 0, CHAN_Y );
1848      }
1849      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1850         STORE( &r[1], 0, CHAN_Z );
1851      }
1852      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1853         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1854      }
1855      break;
1856
1857   case TGSI_OPCODE_MUL:
1858      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1859      {
1860         FETCH(&r[0], 0, chan_index);
1861         FETCH(&r[1], 1, chan_index);
1862
1863         micro_mul( &r[0], &r[0], &r[1] );
1864
1865         STORE(&r[0], 0, chan_index);
1866      }
1867      break;
1868
1869   case TGSI_OPCODE_ADD:
1870      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1871         FETCH( &r[0], 0, chan_index );
1872         FETCH( &r[1], 1, chan_index );
1873         micro_add( &r[0], &r[0], &r[1] );
1874         STORE( &r[0], 0, chan_index );
1875      }
1876      break;
1877
1878   case TGSI_OPCODE_DP3:
1879   /* TGSI_OPCODE_DOT3 */
1880      FETCH( &r[0], 0, CHAN_X );
1881      FETCH( &r[1], 1, CHAN_X );
1882      micro_mul( &r[0], &r[0], &r[1] );
1883
1884      FETCH( &r[1], 0, CHAN_Y );
1885      FETCH( &r[2], 1, CHAN_Y );
1886      micro_mul( &r[1], &r[1], &r[2] );
1887      micro_add( &r[0], &r[0], &r[1] );
1888
1889      FETCH( &r[1], 0, CHAN_Z );
1890      FETCH( &r[2], 1, CHAN_Z );
1891      micro_mul( &r[1], &r[1], &r[2] );
1892      micro_add( &r[0], &r[0], &r[1] );
1893
1894      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1895         STORE( &r[0], 0, chan_index );
1896      }
1897      break;
1898
1899    case TGSI_OPCODE_DP4:
1900    /* TGSI_OPCODE_DOT4 */
1901       FETCH(&r[0], 0, CHAN_X);
1902       FETCH(&r[1], 1, CHAN_X);
1903
1904       micro_mul( &r[0], &r[0], &r[1] );
1905
1906       FETCH(&r[1], 0, CHAN_Y);
1907       FETCH(&r[2], 1, CHAN_Y);
1908
1909       micro_mul( &r[1], &r[1], &r[2] );
1910       micro_add( &r[0], &r[0], &r[1] );
1911
1912       FETCH(&r[1], 0, CHAN_Z);
1913       FETCH(&r[2], 1, CHAN_Z);
1914
1915       micro_mul( &r[1], &r[1], &r[2] );
1916       micro_add( &r[0], &r[0], &r[1] );
1917
1918       FETCH(&r[1], 0, CHAN_W);
1919       FETCH(&r[2], 1, CHAN_W);
1920
1921       micro_mul( &r[1], &r[1], &r[2] );
1922       micro_add( &r[0], &r[0], &r[1] );
1923
1924      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1925	 STORE( &r[0], 0, chan_index );
1926      }
1927      break;
1928
1929   case TGSI_OPCODE_DST:
1930      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1931	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1932      }
1933
1934      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1935	 FETCH( &r[0], 0, CHAN_Y );
1936	 FETCH( &r[1], 1, CHAN_Y);
1937	 micro_mul( &r[0], &r[0], &r[1] );
1938	 STORE( &r[0], 0, CHAN_Y );
1939      }
1940
1941      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1942	 FETCH( &r[0], 0, CHAN_Z );
1943	 STORE( &r[0], 0, CHAN_Z );
1944      }
1945
1946      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1947	 FETCH( &r[0], 1, CHAN_W );
1948	 STORE( &r[0], 0, CHAN_W );
1949      }
1950      break;
1951
1952   case TGSI_OPCODE_MIN:
1953      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1954         FETCH(&r[0], 0, chan_index);
1955         FETCH(&r[1], 1, chan_index);
1956
1957         /* XXX use micro_min()?? */
1958         micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
1959
1960         STORE(&r[0], 0, chan_index);
1961      }
1962      break;
1963
1964   case TGSI_OPCODE_MAX:
1965      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1966         FETCH(&r[0], 0, chan_index);
1967         FETCH(&r[1], 1, chan_index);
1968
1969         /* XXX use micro_max()?? */
1970         micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
1971
1972         STORE(&r[0], 0, chan_index );
1973      }
1974      break;
1975
1976   case TGSI_OPCODE_SLT:
1977   /* TGSI_OPCODE_SETLT */
1978      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1979         FETCH( &r[0], 0, chan_index );
1980         FETCH( &r[1], 1, chan_index );
1981         micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1982         STORE( &r[0], 0, chan_index );
1983      }
1984      break;
1985
1986   case TGSI_OPCODE_SGE:
1987   /* TGSI_OPCODE_SETGE */
1988      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1989         FETCH( &r[0], 0, chan_index );
1990         FETCH( &r[1], 1, chan_index );
1991         micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1992         STORE( &r[0], 0, chan_index );
1993      }
1994      break;
1995
1996   case TGSI_OPCODE_MAD:
1997   /* TGSI_OPCODE_MADD */
1998      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1999         FETCH( &r[0], 0, chan_index );
2000         FETCH( &r[1], 1, chan_index );
2001         micro_mul( &r[0], &r[0], &r[1] );
2002         FETCH( &r[1], 2, chan_index );
2003         micro_add( &r[0], &r[0], &r[1] );
2004         STORE( &r[0], 0, chan_index );
2005      }
2006      break;
2007
2008   case TGSI_OPCODE_SUB:
2009      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2010         FETCH(&r[0], 0, chan_index);
2011         FETCH(&r[1], 1, chan_index);
2012
2013         micro_sub( &r[0], &r[0], &r[1] );
2014
2015         STORE(&r[0], 0, chan_index);
2016      }
2017      break;
2018
2019   case TGSI_OPCODE_LERP:
2020   /* TGSI_OPCODE_LRP */
2021      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2022         FETCH(&r[0], 0, chan_index);
2023         FETCH(&r[1], 1, chan_index);
2024         FETCH(&r[2], 2, chan_index);
2025
2026         micro_sub( &r[1], &r[1], &r[2] );
2027         micro_mul( &r[0], &r[0], &r[1] );
2028         micro_add( &r[0], &r[0], &r[2] );
2029
2030         STORE(&r[0], 0, chan_index);
2031      }
2032      break;
2033
2034   case TGSI_OPCODE_CND:
2035      assert (0);
2036      break;
2037
2038   case TGSI_OPCODE_CND0:
2039      assert (0);
2040      break;
2041
2042   case TGSI_OPCODE_DOT2ADD:
2043      /* TGSI_OPCODE_DP2A */
2044      assert (0);
2045      break;
2046
2047   case TGSI_OPCODE_INDEX:
2048      assert (0);
2049      break;
2050
2051   case TGSI_OPCODE_NEGATE:
2052      assert (0);
2053      break;
2054
2055   case TGSI_OPCODE_FRAC:
2056   /* TGSI_OPCODE_FRC */
2057      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2058         FETCH( &r[0], 0, chan_index );
2059         micro_frc( &r[0], &r[0] );
2060         STORE( &r[0], 0, chan_index );
2061      }
2062      break;
2063
2064   case TGSI_OPCODE_CLAMP:
2065      assert (0);
2066      break;
2067
2068   case TGSI_OPCODE_FLOOR:
2069   /* TGSI_OPCODE_FLR */
2070      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2071         FETCH( &r[0], 0, chan_index );
2072         micro_flr( &r[0], &r[0] );
2073         STORE( &r[0], 0, chan_index );
2074      }
2075      break;
2076
2077   case TGSI_OPCODE_ROUND:
2078   case TGSI_OPCODE_ARR:
2079      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2080         FETCH( &r[0], 0, chan_index );
2081         micro_rnd( &r[0], &r[0] );
2082         STORE( &r[0], 0, chan_index );
2083      }
2084      break;
2085
2086   case TGSI_OPCODE_EXPBASE2:
2087    /* TGSI_OPCODE_EX2 */
2088      FETCH(&r[0], 0, CHAN_X);
2089
2090#if FAST_MATH
2091      micro_exp2( &r[0], &r[0] );
2092#else
2093      micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2094#endif
2095
2096      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2097	 STORE( &r[0], 0, chan_index );
2098      }
2099      break;
2100
2101   case TGSI_OPCODE_LOGBASE2:
2102   /* TGSI_OPCODE_LG2 */
2103      FETCH( &r[0], 0, CHAN_X );
2104      micro_lg2( &r[0], &r[0] );
2105      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2106         STORE( &r[0], 0, chan_index );
2107      }
2108      break;
2109
2110   case TGSI_OPCODE_POWER:
2111      /* TGSI_OPCODE_POW */
2112      FETCH(&r[0], 0, CHAN_X);
2113      FETCH(&r[1], 1, CHAN_X);
2114
2115      micro_pow( &r[0], &r[0], &r[1] );
2116
2117      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2118	 STORE( &r[0], 0, chan_index );
2119      }
2120      break;
2121
2122   case TGSI_OPCODE_CROSSPRODUCT:
2123      /* TGSI_OPCODE_XPD */
2124      FETCH(&r[0], 0, CHAN_Y);
2125      FETCH(&r[1], 1, CHAN_Z);
2126
2127      micro_mul( &r[2], &r[0], &r[1] );
2128
2129      FETCH(&r[3], 0, CHAN_Z);
2130      FETCH(&r[4], 1, CHAN_Y);
2131
2132      micro_mul( &r[5], &r[3], &r[4] );
2133      micro_sub( &r[2], &r[2], &r[5] );
2134
2135      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2136         STORE( &r[2], 0, CHAN_X );
2137      }
2138
2139      FETCH(&r[2], 1, CHAN_X);
2140
2141      micro_mul( &r[3], &r[3], &r[2] );
2142
2143      FETCH(&r[5], 0, CHAN_X);
2144
2145      micro_mul( &r[1], &r[1], &r[5] );
2146      micro_sub( &r[3], &r[3], &r[1] );
2147
2148      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2149         STORE( &r[3], 0, CHAN_Y );
2150      }
2151
2152      micro_mul( &r[5], &r[5], &r[4] );
2153      micro_mul( &r[0], &r[0], &r[2] );
2154      micro_sub( &r[5], &r[5], &r[0] );
2155
2156      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2157         STORE( &r[5], 0, CHAN_Z );
2158      }
2159
2160      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2161         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2162      }
2163      break;
2164
2165    case TGSI_OPCODE_MULTIPLYMATRIX:
2166       assert (0);
2167       break;
2168
2169    case TGSI_OPCODE_ABS:
2170       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2171          FETCH(&r[0], 0, chan_index);
2172
2173          micro_abs( &r[0], &r[0] );
2174
2175          STORE(&r[0], 0, chan_index);
2176       }
2177       break;
2178
2179   case TGSI_OPCODE_RCC:
2180      assert (0);
2181      break;
2182
2183   case TGSI_OPCODE_DPH:
2184      FETCH(&r[0], 0, CHAN_X);
2185      FETCH(&r[1], 1, CHAN_X);
2186
2187      micro_mul( &r[0], &r[0], &r[1] );
2188
2189      FETCH(&r[1], 0, CHAN_Y);
2190      FETCH(&r[2], 1, CHAN_Y);
2191
2192      micro_mul( &r[1], &r[1], &r[2] );
2193      micro_add( &r[0], &r[0], &r[1] );
2194
2195      FETCH(&r[1], 0, CHAN_Z);
2196      FETCH(&r[2], 1, CHAN_Z);
2197
2198      micro_mul( &r[1], &r[1], &r[2] );
2199      micro_add( &r[0], &r[0], &r[1] );
2200
2201      FETCH(&r[1], 1, CHAN_W);
2202
2203      micro_add( &r[0], &r[0], &r[1] );
2204
2205      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2206	 STORE( &r[0], 0, chan_index );
2207      }
2208      break;
2209
2210   case TGSI_OPCODE_COS:
2211      FETCH(&r[0], 0, CHAN_X);
2212
2213      micro_cos( &r[0], &r[0] );
2214
2215      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2216	 STORE( &r[0], 0, chan_index );
2217      }
2218      break;
2219
2220   case TGSI_OPCODE_DDX:
2221      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2222         FETCH( &r[0], 0, chan_index );
2223         micro_ddx( &r[0], &r[0] );
2224         STORE( &r[0], 0, chan_index );
2225      }
2226      break;
2227
2228   case TGSI_OPCODE_DDY:
2229      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2230         FETCH( &r[0], 0, chan_index );
2231         micro_ddy( &r[0], &r[0] );
2232         STORE( &r[0], 0, chan_index );
2233      }
2234      break;
2235
2236   case TGSI_OPCODE_KILP:
2237      exec_kilp (mach, inst);
2238      break;
2239
2240   case TGSI_OPCODE_KIL:
2241      exec_kil (mach, inst);
2242      break;
2243
2244   case TGSI_OPCODE_PK2H:
2245      assert (0);
2246      break;
2247
2248   case TGSI_OPCODE_PK2US:
2249      assert (0);
2250      break;
2251
2252   case TGSI_OPCODE_PK4B:
2253      assert (0);
2254      break;
2255
2256   case TGSI_OPCODE_PK4UB:
2257      assert (0);
2258      break;
2259
2260   case TGSI_OPCODE_RFL:
2261      assert (0);
2262      break;
2263
2264   case TGSI_OPCODE_SEQ:
2265      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2266         FETCH( &r[0], 0, chan_index );
2267         FETCH( &r[1], 1, chan_index );
2268         micro_eq( &r[0], &r[0], &r[1],
2269                   &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2270                   &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2271         STORE( &r[0], 0, chan_index );
2272      }
2273      break;
2274
2275   case TGSI_OPCODE_SFL:
2276      assert (0);
2277      break;
2278
2279   case TGSI_OPCODE_SGT:
2280      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2281         FETCH( &r[0], 0, chan_index );
2282         FETCH( &r[1], 1, chan_index );
2283         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2284         STORE( &r[0], 0, chan_index );
2285      }
2286      break;
2287
2288   case TGSI_OPCODE_SIN:
2289      FETCH( &r[0], 0, CHAN_X );
2290      micro_sin( &r[0], &r[0] );
2291      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2292         STORE( &r[0], 0, chan_index );
2293      }
2294      break;
2295
2296   case TGSI_OPCODE_SLE:
2297      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2298         FETCH( &r[0], 0, chan_index );
2299         FETCH( &r[1], 1, chan_index );
2300         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2301         STORE( &r[0], 0, chan_index );
2302      }
2303      break;
2304
2305   case TGSI_OPCODE_SNE:
2306      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2307         FETCH( &r[0], 0, chan_index );
2308         FETCH( &r[1], 1, chan_index );
2309         micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2310         STORE( &r[0], 0, chan_index );
2311      }
2312      break;
2313
2314   case TGSI_OPCODE_STR:
2315      assert (0);
2316      break;
2317
2318   case TGSI_OPCODE_TEX:
2319      /* simple texture lookup */
2320      /* src[0] = texcoord */
2321      /* src[1] = sampler unit */
2322      exec_tex(mach, inst, FALSE, FALSE);
2323      break;
2324
2325   case TGSI_OPCODE_TXB:
2326      /* Texture lookup with lod bias */
2327      /* src[0] = texcoord (src[0].w = LOD bias) */
2328      /* src[1] = sampler unit */
2329      exec_tex(mach, inst, TRUE, FALSE);
2330      break;
2331
2332   case TGSI_OPCODE_TXD:
2333      /* Texture lookup with explict partial derivatives */
2334      /* src[0] = texcoord */
2335      /* src[1] = d[strq]/dx */
2336      /* src[2] = d[strq]/dy */
2337      /* src[3] = sampler unit */
2338      assert (0);
2339      break;
2340
2341   case TGSI_OPCODE_TXL:
2342      /* Texture lookup with explit LOD */
2343      /* src[0] = texcoord (src[0].w = LOD) */
2344      /* src[1] = sampler unit */
2345      exec_tex(mach, inst, TRUE, FALSE);
2346      break;
2347
2348   case TGSI_OPCODE_TXP:
2349      /* Texture lookup with projection */
2350      /* src[0] = texcoord (src[0].w = projection) */
2351      /* src[1] = sampler unit */
2352      exec_tex(mach, inst, FALSE, TRUE);
2353      break;
2354
2355   case TGSI_OPCODE_UP2H:
2356      assert (0);
2357      break;
2358
2359   case TGSI_OPCODE_UP2US:
2360      assert (0);
2361      break;
2362
2363   case TGSI_OPCODE_UP4B:
2364      assert (0);
2365      break;
2366
2367   case TGSI_OPCODE_UP4UB:
2368      assert (0);
2369      break;
2370
2371   case TGSI_OPCODE_X2D:
2372      assert (0);
2373      break;
2374
2375   case TGSI_OPCODE_ARA:
2376      assert (0);
2377      break;
2378
2379   case TGSI_OPCODE_BRA:
2380      assert (0);
2381      break;
2382
2383   case TGSI_OPCODE_CAL:
2384      /* skip the call if no execution channels are enabled */
2385      if (mach->ExecMask) {
2386         /* do the call */
2387
2388         /* push the Cond, Loop, Cont stacks */
2389         assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2390         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2391         assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2392         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2393         assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2394         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2395
2396         assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2397         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2398
2399         /* note that PC was already incremented above */
2400         mach->CallStack[mach->CallStackTop++] = *pc;
2401         *pc = inst->InstructionExtLabel.Label;
2402      }
2403      break;
2404
2405   case TGSI_OPCODE_RET:
2406      mach->FuncMask &= ~mach->ExecMask;
2407      UPDATE_EXEC_MASK(mach);
2408
2409      if (mach->FuncMask == 0x0) {
2410         /* really return now (otherwise, keep executing */
2411
2412         if (mach->CallStackTop == 0) {
2413            /* returning from main() */
2414            *pc = -1;
2415            return;
2416         }
2417         *pc = mach->CallStack[--mach->CallStackTop];
2418
2419         /* pop the Cond, Loop, Cont stacks */
2420         assert(mach->CondStackTop > 0);
2421         mach->CondMask = mach->CondStack[--mach->CondStackTop];
2422         assert(mach->LoopStackTop > 0);
2423         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2424         assert(mach->ContStackTop > 0);
2425         mach->ContMask = mach->ContStack[--mach->ContStackTop];
2426         assert(mach->FuncStackTop > 0);
2427         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2428
2429         UPDATE_EXEC_MASK(mach);
2430      }
2431      break;
2432
2433   case TGSI_OPCODE_SSG:
2434   /* TGSI_OPCODE_SGN */
2435      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2436         FETCH( &r[0], 0, chan_index );
2437         micro_sgn( &r[0], &r[0] );
2438         STORE( &r[0], 0, chan_index );
2439      }
2440      break;
2441
2442   case TGSI_OPCODE_CMP:
2443      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2444         FETCH(&r[0], 0, chan_index);
2445         FETCH(&r[1], 1, chan_index);
2446         FETCH(&r[2], 2, chan_index);
2447
2448         micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2449
2450         STORE(&r[0], 0, chan_index);
2451      }
2452      break;
2453
2454   case TGSI_OPCODE_SCS:
2455      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2456         FETCH( &r[0], 0, CHAN_X );
2457      }
2458      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
2459         micro_cos( &r[1], &r[0] );
2460         STORE( &r[1], 0, CHAN_X );
2461      }
2462      if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2463         micro_sin( &r[1], &r[0] );
2464         STORE( &r[1], 0, CHAN_Y );
2465      }
2466      if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2467         STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2468      }
2469      if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2470         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2471      }
2472      break;
2473
2474   case TGSI_OPCODE_NRM:
2475      /* 3-component vector normalize */
2476      {
2477         union tgsi_exec_channel tmp, dot;
2478
2479         /* tmp = dp3(src0, src0): */
2480         FETCH( &r[0], 0, CHAN_X );
2481         micro_mul( &tmp, &r[0], &r[0] );
2482
2483         FETCH( &r[1], 0, CHAN_Y );
2484         micro_mul( &dot, &r[1], &r[1] );
2485         micro_add( &tmp, &tmp, &dot );
2486
2487         FETCH( &r[2], 0, CHAN_Z );
2488         micro_mul( &dot, &r[2], &r[2] );
2489         micro_add( &tmp, &tmp, &dot );
2490
2491         /* tmp = 1 / tmp */
2492         micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2493
2494         /* note: w channel is undefined */
2495         FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2496            /* chan = chan * tmp */
2497            micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2498            STORE( &r[chan_index], 0, chan_index );
2499         }
2500      }
2501      break;
2502
2503   case TGSI_OPCODE_NRM4:
2504      /* 4-component vector normalize */
2505      {
2506         union tgsi_exec_channel tmp, dot;
2507
2508         /* tmp = dp4(src0, src0): */
2509         FETCH( &r[0], 0, CHAN_X );
2510         micro_mul( &tmp, &r[0], &r[0] );
2511
2512         FETCH( &r[1], 0, CHAN_Y );
2513         micro_mul( &dot, &r[1], &r[1] );
2514         micro_add( &tmp, &tmp, &dot );
2515
2516         FETCH( &r[2], 0, CHAN_Z );
2517         micro_mul( &dot, &r[2], &r[2] );
2518         micro_add( &tmp, &tmp, &dot );
2519
2520         FETCH( &r[3], 0, CHAN_W );
2521         micro_mul( &dot, &r[3], &r[3] );
2522         micro_add( &tmp, &tmp, &dot );
2523
2524         /* tmp = 1 / tmp */
2525         micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2526
2527         FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2528            /* chan = chan * tmp */
2529            micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2530            STORE( &r[chan_index], 0, chan_index );
2531         }
2532      }
2533      break;
2534
2535   case TGSI_OPCODE_DIV:
2536      assert( 0 );
2537      break;
2538
2539   case TGSI_OPCODE_DP2:
2540      FETCH( &r[0], 0, CHAN_X );
2541      FETCH( &r[1], 1, CHAN_X );
2542      micro_mul( &r[0], &r[0], &r[1] );
2543
2544      FETCH( &r[1], 0, CHAN_Y );
2545      FETCH( &r[2], 1, CHAN_Y );
2546      micro_mul( &r[1], &r[1], &r[2] );
2547      micro_add( &r[0], &r[0], &r[1] );
2548
2549      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2550         STORE( &r[0], 0, chan_index );
2551      }
2552      break;
2553
2554   case TGSI_OPCODE_IF:
2555      /* push CondMask */
2556      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2557      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2558      FETCH( &r[0], 0, CHAN_X );
2559      /* update CondMask */
2560      if( ! r[0].u[0] ) {
2561         mach->CondMask &= ~0x1;
2562      }
2563      if( ! r[0].u[1] ) {
2564         mach->CondMask &= ~0x2;
2565      }
2566      if( ! r[0].u[2] ) {
2567         mach->CondMask &= ~0x4;
2568      }
2569      if( ! r[0].u[3] ) {
2570         mach->CondMask &= ~0x8;
2571      }
2572      UPDATE_EXEC_MASK(mach);
2573      /* Todo: If CondMask==0, jump to ELSE */
2574      break;
2575
2576   case TGSI_OPCODE_ELSE:
2577      /* invert CondMask wrt previous mask */
2578      {
2579         uint prevMask;
2580         assert(mach->CondStackTop > 0);
2581         prevMask = mach->CondStack[mach->CondStackTop - 1];
2582         mach->CondMask = ~mach->CondMask & prevMask;
2583         UPDATE_EXEC_MASK(mach);
2584         /* Todo: If CondMask==0, jump to ENDIF */
2585      }
2586      break;
2587
2588   case TGSI_OPCODE_ENDIF:
2589      /* pop CondMask */
2590      assert(mach->CondStackTop > 0);
2591      mach->CondMask = mach->CondStack[--mach->CondStackTop];
2592      UPDATE_EXEC_MASK(mach);
2593      break;
2594
2595   case TGSI_OPCODE_END:
2596      /* halt execution */
2597      *pc = -1;
2598      break;
2599
2600   case TGSI_OPCODE_REP:
2601      assert (0);
2602      break;
2603
2604   case TGSI_OPCODE_ENDREP:
2605       assert (0);
2606       break;
2607
2608   case TGSI_OPCODE_PUSHA:
2609      assert (0);
2610      break;
2611
2612   case TGSI_OPCODE_POPA:
2613      assert (0);
2614      break;
2615
2616   case TGSI_OPCODE_CEIL:
2617      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2618         FETCH( &r[0], 0, chan_index );
2619         micro_ceil( &r[0], &r[0] );
2620         STORE( &r[0], 0, chan_index );
2621      }
2622      break;
2623
2624   case TGSI_OPCODE_I2F:
2625      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2626         FETCH( &r[0], 0, chan_index );
2627         micro_i2f( &r[0], &r[0] );
2628         STORE( &r[0], 0, chan_index );
2629      }
2630      break;
2631
2632   case TGSI_OPCODE_NOT:
2633      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2634         FETCH( &r[0], 0, chan_index );
2635         micro_not( &r[0], &r[0] );
2636         STORE( &r[0], 0, chan_index );
2637      }
2638      break;
2639
2640   case TGSI_OPCODE_TRUNC:
2641      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2642         FETCH( &r[0], 0, chan_index );
2643         micro_trunc( &r[0], &r[0] );
2644         STORE( &r[0], 0, chan_index );
2645      }
2646      break;
2647
2648   case TGSI_OPCODE_SHL:
2649      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2650         FETCH( &r[0], 0, chan_index );
2651         FETCH( &r[1], 1, chan_index );
2652         micro_shl( &r[0], &r[0], &r[1] );
2653         STORE( &r[0], 0, chan_index );
2654      }
2655      break;
2656
2657   case TGSI_OPCODE_SHR:
2658      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2659         FETCH( &r[0], 0, chan_index );
2660         FETCH( &r[1], 1, chan_index );
2661         micro_ishr( &r[0], &r[0], &r[1] );
2662         STORE( &r[0], 0, chan_index );
2663      }
2664      break;
2665
2666   case TGSI_OPCODE_AND:
2667      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2668         FETCH( &r[0], 0, chan_index );
2669         FETCH( &r[1], 1, chan_index );
2670         micro_and( &r[0], &r[0], &r[1] );
2671         STORE( &r[0], 0, chan_index );
2672      }
2673      break;
2674
2675   case TGSI_OPCODE_OR:
2676      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2677         FETCH( &r[0], 0, chan_index );
2678         FETCH( &r[1], 1, chan_index );
2679         micro_or( &r[0], &r[0], &r[1] );
2680         STORE( &r[0], 0, chan_index );
2681      }
2682      break;
2683
2684   case TGSI_OPCODE_MOD:
2685      assert (0);
2686      break;
2687
2688   case TGSI_OPCODE_XOR:
2689      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2690         FETCH( &r[0], 0, chan_index );
2691         FETCH( &r[1], 1, chan_index );
2692         micro_xor( &r[0], &r[0], &r[1] );
2693         STORE( &r[0], 0, chan_index );
2694      }
2695      break;
2696
2697   case TGSI_OPCODE_SAD:
2698      assert (0);
2699      break;
2700
2701   case TGSI_OPCODE_TXF:
2702      assert (0);
2703      break;
2704
2705   case TGSI_OPCODE_TXQ:
2706      assert (0);
2707      break;
2708
2709   case TGSI_OPCODE_EMIT:
2710      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2711      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2712      break;
2713
2714   case TGSI_OPCODE_ENDPRIM:
2715      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2716      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2717      break;
2718
2719   case TGSI_OPCODE_LOOP:
2720      /* fall-through (for now) */
2721   case TGSI_OPCODE_BGNLOOP2:
2722      /* push LoopMask and ContMasks */
2723      assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2724      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2725      assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2726      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2727      break;
2728
2729   case TGSI_OPCODE_ENDLOOP:
2730      /* fall-through (for now at least) */
2731   case TGSI_OPCODE_ENDLOOP2:
2732      /* Restore ContMask, but don't pop */
2733      assert(mach->ContStackTop > 0);
2734      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2735      UPDATE_EXEC_MASK(mach);
2736      if (mach->ExecMask) {
2737         /* repeat loop: jump to instruction just past BGNLOOP */
2738         *pc = inst->InstructionExtLabel.Label + 1;
2739      }
2740      else {
2741         /* exit loop: pop LoopMask */
2742         assert(mach->LoopStackTop > 0);
2743         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2744         /* pop ContMask */
2745         assert(mach->ContStackTop > 0);
2746         mach->ContMask = mach->ContStack[--mach->ContStackTop];
2747      }
2748      UPDATE_EXEC_MASK(mach);
2749      break;
2750
2751   case TGSI_OPCODE_BRK:
2752      /* turn off loop channels for each enabled exec channel */
2753      mach->LoopMask &= ~mach->ExecMask;
2754      /* Todo: if mach->LoopMask == 0, jump to end of loop */
2755      UPDATE_EXEC_MASK(mach);
2756      break;
2757
2758   case TGSI_OPCODE_CONT:
2759      /* turn off cont channels for each enabled exec channel */
2760      mach->ContMask &= ~mach->ExecMask;
2761      /* Todo: if mach->LoopMask == 0, jump to end of loop */
2762      UPDATE_EXEC_MASK(mach);
2763      break;
2764
2765   case TGSI_OPCODE_BGNSUB:
2766      /* no-op */
2767      break;
2768
2769   case TGSI_OPCODE_ENDSUB:
2770      /* no-op */
2771      break;
2772
2773   case TGSI_OPCODE_NOISE1:
2774      assert( 0 );
2775      break;
2776
2777   case TGSI_OPCODE_NOISE2:
2778      assert( 0 );
2779      break;
2780
2781   case TGSI_OPCODE_NOISE3:
2782      assert( 0 );
2783      break;
2784
2785   case TGSI_OPCODE_NOISE4:
2786      assert( 0 );
2787      break;
2788
2789   case TGSI_OPCODE_NOP:
2790      break;
2791
2792   default:
2793      assert( 0 );
2794   }
2795}
2796
2797
2798/**
2799 * Run TGSI interpreter.
2800 * \return bitmask of "alive" quad components
2801 */
2802uint
2803tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
2804{
2805   uint i;
2806   int pc = 0;
2807
2808   mach->CondMask = 0xf;
2809   mach->LoopMask = 0xf;
2810   mach->ContMask = 0xf;
2811   mach->FuncMask = 0xf;
2812   mach->ExecMask = 0xf;
2813
2814   mach->CondStackTop = 0; /* temporarily subvert this assertion */
2815   assert(mach->CondStackTop == 0);
2816   assert(mach->LoopStackTop == 0);
2817   assert(mach->ContStackTop == 0);
2818   assert(mach->CallStackTop == 0);
2819
2820   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
2821   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
2822
2823   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
2824      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
2825      mach->Primitives[0] = 0;
2826   }
2827
2828   for (i = 0; i < QUAD_SIZE; i++) {
2829      mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
2830         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
2831         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
2832         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
2833         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
2834   }
2835
2836   /* execute declarations (interpolants) */
2837   for (i = 0; i < mach->NumDeclarations; i++) {
2838      exec_declaration( mach, mach->Declarations+i );
2839   }
2840
2841   /* execute instructions, until pc is set to -1 */
2842   while (pc != -1) {
2843      assert(pc < (int) mach->NumInstructions);
2844      exec_instruction( mach, mach->Instructions + pc, &pc );
2845   }
2846
2847#if 0
2848   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
2849   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2850      /*
2851       * Scale back depth component.
2852       */
2853      for (i = 0; i < 4; i++)
2854         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
2855   }
2856#endif
2857
2858   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
2859}
2860
2861
2862