tgsi_exec.c revision 3673189326e348eb91e354017703fdfd9d6d8184
1600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang/**************************************************************************
2600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang *
3600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * All Rights Reserved.
5600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang *
6600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Permission is hereby granted, free of charge, to any person obtaining a
7600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * copy of this software and associated documentation files (the
8600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * "Software"), to deal in the Software without restriction, including
9600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * without limitation the rights to use, copy, modify, merge, publish,
10600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * distribute, sub license, and/or sell copies of the Software, and to
11600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * permit persons to whom the Software is furnished to do so, subject to
12600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * the following conditions:
13600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang *
14600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * The above copyright notice and this permission notice (including the
15600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * next paragraph) shall be included in all copies or substantial portions
16600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * of the Software.
17600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang *
18600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang *
26600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang **************************************************************************/
27600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
28600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang/**
29600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * TGSI interpreter/executor.
30600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang *
31600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Flow control information:
32600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang *
33600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
34600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
35600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * care since a condition may be true for some quad components but false
36600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * for other components.
37600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang *
38600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * We basically execute all statements (even if they're in the part of
39600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * an IF/ELSE clause that's "not taken") and use a special mask to
40600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * control writing to destination registers.  This is the ExecMask.
41600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * See store_dest().
42600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang *
43600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * The ExecMask is computed from three other masks (CondMask, LoopMask and
44600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * ContMask) which are controlled by the flow control instructions (namely:
45600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
46600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang *
47600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang *
48600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Authors:
49600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang *   Michal Krol
50600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang *   Brian Paul
51600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang */
52600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
53600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "pipe/p_compiler.h"
54600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "pipe/p_state.h"
55600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "pipe/p_shader_tokens.h"
56600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "tgsi/tgsi_parse.h"
57600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "tgsi/tgsi_util.h"
58600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "tgsi_exec.h"
59600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "util/u_memory.h"
60600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "util/u_math.h"
61600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
62600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define FAST_MATH 1
63600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
64600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TILE_TOP_LEFT     0
65600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TILE_TOP_RIGHT    1
66600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TILE_BOTTOM_LEFT  2
67600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TILE_BOTTOM_RIGHT 3
68600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
69600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define CHAN_X  0
70600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define CHAN_Y  1
71600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define CHAN_Z  2
72600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define CHAN_W  3
73600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
74600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang/*
75600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Shorthand locations of various utility registers (_I = Index, _C = Channel)
76600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang */
77600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
78600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
79600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
80600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
81600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
82600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
83600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
84600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
85600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
86600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
87600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
88600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
89600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_128_I         TGSI_EXEC_TEMP_128_I
90600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_128_C         TGSI_EXEC_TEMP_128_C
91600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
92600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
93600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
94600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
95600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
96600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
97600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
98600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
99600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_CC_I          TGSI_EXEC_TEMP_CC_I
100600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_CC_C          TGSI_EXEC_TEMP_CC_C
101600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_3_I           TGSI_EXEC_TEMP_THREE_I
102600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_3_C           TGSI_EXEC_TEMP_THREE_C
103600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_HALF_I        TGSI_EXEC_TEMP_HALF_I
104600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_HALF_C        TGSI_EXEC_TEMP_HALF_C
105600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_R0            TGSI_EXEC_TEMP_R0
106600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
107600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define IS_CHANNEL_ENABLED(INST, CHAN)\
108600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
109600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
110600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define IS_CHANNEL_ENABLED2(INST, CHAN)\
111600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
112600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
113600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
114600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
115600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      if (IS_CHANNEL_ENABLED( INST, CHAN ))
116600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
117600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
118600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
119600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      if (IS_CHANNEL_ENABLED2( INST, CHAN ))
120600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
121600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
122600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang/** The execution mask depends on the conditional mask and the loop mask */
123600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define UPDATE_EXEC_MASK(MACH) \
124600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
125600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
126600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
127600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic const union tgsi_exec_channel ZeroVec =
128600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   { { 0.0, 0.0, 0.0, 0.0 } };
129600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
130600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang/**
131600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Initialize machine state by expanding tokens to full instructions,
132600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * allocating temporary storage, setting up constants, etc.
133600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * After this, we can call tgsi_exec_machine_run() many times.
134600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang */
135600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangvoid
136600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangtgsi_exec_machine_bind_shader(
137600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   struct tgsi_exec_machine *mach,
138600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const struct tgsi_token *tokens,
139600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   uint numSamplers,
140600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   struct tgsi_sampler **samplers)
141600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
142600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   uint k;
143600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   struct tgsi_parse_context parse;
144600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   struct tgsi_exec_labels *labels = &mach->Labels;
145600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   struct tgsi_full_instruction *instructions;
146600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   struct tgsi_full_declaration *declarations;
147600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   uint maxInstructions = 10, numInstructions = 0;
148600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   uint maxDeclarations = 10, numDeclarations = 0;
149600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   uint instno = 0;
150600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
151600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0
152600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   tgsi_dump(tokens, 0);
153600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
154600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
155600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   util_init_math();
156600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
157600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   mach->Tokens = tokens;
158600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   mach->Samplers = samplers;
159600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
160600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   k = tgsi_parse_init (&parse, mach->Tokens);
161600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   if (k != TGSI_PARSE_OK) {
162600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      debug_printf( "Problem parsing!\n" );
163600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      return;
164600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
165600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
166600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   mach->Processor = parse.FullHeader.Processor.Processor;
167600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   mach->ImmLimit = 0;
168600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   labels->count = 0;
169600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
170600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   declarations = (struct tgsi_full_declaration *)
171600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
172600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
173600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   if (!declarations) {
174600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      return;
175600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
176600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
177600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   instructions = (struct tgsi_full_instruction *)
178600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
179600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
180600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   if (!instructions) {
181600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      FREE( declarations );
182600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      return;
183600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
184600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
185600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   while( !tgsi_parse_end_of_tokens( &parse ) ) {
186600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      uint pointer = parse.Position;
187600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      uint i;
188600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
189600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      tgsi_parse_token( &parse );
190600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      switch( parse.FullToken.Token.Type ) {
191600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      case TGSI_TOKEN_TYPE_DECLARATION:
192600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         /* save expanded declaration */
193600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         if (numDeclarations == maxDeclarations) {
194600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            declarations = REALLOC(declarations,
195600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang                                   maxDeclarations
196600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang                                   * sizeof(struct tgsi_full_declaration),
197600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang                                   (maxDeclarations + 10)
198600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang                                   * sizeof(struct tgsi_full_declaration));
199600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            maxDeclarations += 10;
200600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         }
201600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         memcpy(declarations + numDeclarations,
202600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang                &parse.FullToken.FullDeclaration,
203600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang                sizeof(declarations[0]));
204600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         numDeclarations++;
205600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         break;
206600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
207600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      case TGSI_TOKEN_TYPE_IMMEDIATE:
208600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         {
209600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
210600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            assert( size % 4 == 0 );
211600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
212600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
213600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            for( i = 0; i < size; i++ ) {
214600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang               mach->Imms[mach->ImmLimit + i / 4][i % 4] =
215600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
216600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            }
217600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            mach->ImmLimit += size / 4;
218600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         }
219600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         break;
220600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
221600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      case TGSI_TOKEN_TYPE_INSTRUCTION:
222600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         assert( labels->count < MAX_LABELS );
223600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
224600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         labels->labels[labels->count][0] = instno;
225600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         labels->labels[labels->count][1] = pointer;
226600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         labels->count++;
227600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
228600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         /* save expanded instruction */
229600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         if (numInstructions == maxInstructions) {
230600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            instructions = REALLOC(instructions,
231600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang                                   maxInstructions
232600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang                                   * sizeof(struct tgsi_full_instruction),
233600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang                                   (maxInstructions + 10)
234600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang                                   * sizeof(struct tgsi_full_instruction));
235600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            maxInstructions += 10;
236600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         }
237600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         memcpy(instructions + numInstructions,
238600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang                &parse.FullToken.FullInstruction,
239600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang                sizeof(instructions[0]));
240600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         numInstructions++;
241600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         break;
242600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
243600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      default:
244600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         assert( 0 );
245600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      }
246600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
247600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   tgsi_parse_free (&parse);
248600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
249600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   if (mach->Declarations) {
250600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      FREE( mach->Declarations );
251600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
252600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   mach->Declarations = declarations;
253600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   mach->NumDeclarations = numDeclarations;
254600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
255600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   if (mach->Instructions) {
256600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      FREE( mach->Instructions );
257600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
258600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   mach->Instructions = instructions;
259600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   mach->NumInstructions = numInstructions;
260600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
261600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
262600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
263600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangvoid
264600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangtgsi_exec_machine_init(
265600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   struct tgsi_exec_machine *mach )
266600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
267600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   uint i;
268600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
269600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
270600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
271600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
272600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   /* Setup constants. */
273600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   for( i = 0; i < 4; i++ ) {
274600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
275600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
276600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
277600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
278600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
279600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
280600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
281600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
282600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
283600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
284600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
285600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
286600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
287600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
288600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangvoid
289600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangtgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
290600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
291600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   if (mach->Instructions) {
292600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      FREE(mach->Instructions);
293600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->Instructions = NULL;
294600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->NumInstructions = 0;
295600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
296600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   if (mach->Declarations) {
297600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      FREE(mach->Declarations);
298600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->Declarations = NULL;
299600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      mach->NumDeclarations = 0;
300600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
301600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
302600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
303600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
304600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
305600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_abs(
306600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
307600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src )
308600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
309600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = fabsf( src->f[0] );
310600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = fabsf( src->f[1] );
311600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = fabsf( src->f[2] );
312600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = fabsf( src->f[3] );
313600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
314600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
315600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
316600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_add(
317600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
318600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
319600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1 )
320600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
321600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = src0->f[0] + src1->f[0];
322600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = src0->f[1] + src1->f[1];
323600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = src0->f[2] + src1->f[2];
324600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = src0->f[3] + src1->f[3];
325600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
326600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
327600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0
328600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
329600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_iadd(
330600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
331600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
332600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1 )
333600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
334600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[0] = src0->i[0] + src1->i[0];
335600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[1] = src0->i[1] + src1->i[1];
336600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[2] = src0->i[2] + src1->i[2];
337600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[3] = src0->i[3] + src1->i[3];
338600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
339600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
340600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
341600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
342600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_and(
343600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
344600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
345600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1 )
346600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
347600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[0] = src0->u[0] & src1->u[0];
348600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[1] = src0->u[1] & src1->u[1];
349600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[2] = src0->u[2] & src1->u[2];
350600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[3] = src0->u[3] & src1->u[3];
351600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
352600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
353600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
354600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_ceil(
355600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
356600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src )
357600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
358600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = ceilf( src->f[0] );
359600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = ceilf( src->f[1] );
360600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = ceilf( src->f[2] );
361600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = ceilf( src->f[3] );
362600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
363600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
364600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
365600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_cos(
366600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
367600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src )
368600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
369600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = cosf( src->f[0] );
370600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = cosf( src->f[1] );
371600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = cosf( src->f[2] );
372600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = cosf( src->f[3] );
373600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
374600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
375600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
376600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_ddx(
377600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
378600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src )
379600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
380600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] =
381600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] =
382600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] =
383600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
384600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
385600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
386600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
387600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_ddy(
388600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
389600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src )
390600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
391600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] =
392600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] =
393600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] =
394600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
395600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
396600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
397600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
398600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_div(
399600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
400600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
401600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1 )
402600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
403600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   if (src1->f[0] != 0) {
404600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      dst->f[0] = src0->f[0] / src1->f[0];
405600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
406600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   if (src1->f[1] != 0) {
407600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      dst->f[1] = src0->f[1] / src1->f[1];
408600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
409600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   if (src1->f[2] != 0) {
410600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      dst->f[2] = src0->f[2] / src1->f[2];
411600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
412600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   if (src1->f[3] != 0) {
413600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      dst->f[3] = src0->f[3] / src1->f[3];
414600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
415600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
416600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
417600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0
418600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
419600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_udiv(
420600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
421600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
422600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1 )
423600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
424600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[0] = src0->u[0] / src1->u[0];
425600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[1] = src0->u[1] / src1->u[1];
426600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[2] = src0->u[2] / src1->u[2];
427600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[3] = src0->u[3] / src1->u[3];
428600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
429600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
430600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
431600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
432600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_eq(
433600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
434600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
435600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1,
436600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src2,
437600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src3 )
438600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
439600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
440600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
441600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
442600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
443600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
444600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
445600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0
446600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
447600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_ieq(
448600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
449600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
450600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1,
451600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src2,
452600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src3 )
453600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
454600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
455600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
456600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
457600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
458600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
459600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
460600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
461600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
462600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_exp2(
463600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
464600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src)
465600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
466600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if FAST_MATH
467600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = util_fast_exp2( src->f[0] );
468600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = util_fast_exp2( src->f[1] );
469600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = util_fast_exp2( src->f[2] );
470600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = util_fast_exp2( src->f[3] );
471600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#else
472600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = powf( 2.0f, src->f[0] );
473600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = powf( 2.0f, src->f[1] );
474600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = powf( 2.0f, src->f[2] );
475600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = powf( 2.0f, src->f[3] );
476600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
477600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
478600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
479600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0
480600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
481600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_f2ut(
482600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
483600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src )
484600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
485600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[0] = (uint) src->f[0];
486600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[1] = (uint) src->f[1];
487600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[2] = (uint) src->f[2];
488600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[3] = (uint) src->f[3];
489600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
490600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
491600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
492600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
493600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_float_clamp(union tgsi_exec_channel *dst,
494600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang                  const union tgsi_exec_channel *src)
495600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
496600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   uint i;
497600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
498600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   for (i = 0; i < 4; i++) {
499600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      if (src->f[i] > 0.0f) {
500600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         if (src->f[i] > 1.884467e+019f)
501600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            dst->f[i] = 1.884467e+019f;
502600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         else if (src->f[i] < 5.42101e-020f)
503600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            dst->f[i] = 5.42101e-020f;
504600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         else
505600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            dst->f[i] = src->f[i];
506600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      }
507600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      else {
508600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         if (src->f[i] < -1.884467e+019f)
509600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            dst->f[i] = -1.884467e+019f;
510600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         else if (src->f[i] > -5.42101e-020f)
511600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            dst->f[i] = -5.42101e-020f;
512600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang         else
513600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang            dst->f[i] = src->f[i];
514600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang      }
515600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   }
516600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
517600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
518600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
519600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_flr(
520600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
521600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src )
522600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
523600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = floorf( src->f[0] );
524600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = floorf( src->f[1] );
525600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = floorf( src->f[2] );
526600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = floorf( src->f[3] );
527600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
528600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
529600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
530600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_frc(
531600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
532600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src )
533600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
534600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = src->f[0] - floorf( src->f[0] );
535600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = src->f[1] - floorf( src->f[1] );
536600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = src->f[2] - floorf( src->f[2] );
537600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = src->f[3] - floorf( src->f[3] );
538600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
539600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
540600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
541600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_i2f(
542600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
543600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src )
544600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
545600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = (float) src->i[0];
546600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = (float) src->i[1];
547600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = (float) src->i[2];
548600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = (float) src->i[3];
549600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
550600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
551600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
552600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_lg2(
553600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
554600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src )
555600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
556600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if FAST_MATH
557600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = util_fast_log2( src->f[0] );
558600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = util_fast_log2( src->f[1] );
559600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = util_fast_log2( src->f[2] );
560600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = util_fast_log2( src->f[3] );
561600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#else
562600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = logf( src->f[0] ) * 1.442695f;
563600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = logf( src->f[1] ) * 1.442695f;
564600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = logf( src->f[2] ) * 1.442695f;
565600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = logf( src->f[3] ) * 1.442695f;
566600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
567600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
568600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
569600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
570600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_le(
571600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
572600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
573600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1,
574600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src2,
575600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src3 )
576600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
577600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
578600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
579600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
580600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
581600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
582600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
583600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
584600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_lt(
585600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
586600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
587600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1,
588600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src2,
589600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src3 )
590600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
591600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
592600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
593600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
594600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
595600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
596600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
597600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0
598600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
599600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_ilt(
600600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
601600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
602600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1,
603600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src2,
604600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src3 )
605600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
606600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
607600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
608600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
609600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
610600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
611600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
612600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
613600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0
614600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
615600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_ult(
616600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
617600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
618600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1,
619600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src2,
620600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src3 )
621600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
622600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
623600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
624600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
625600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
626600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
627600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
628600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
629600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
630600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_max(
631600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
632600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
633600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1 )
634600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
635600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
636600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
637600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
638600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
639600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
640600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
641600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0
642600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
643600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_imax(
644600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
645600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
646600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1 )
647600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
648600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
649600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
650600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
651600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
652600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
653600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
654600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
655600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0
656600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
657600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_umax(
658600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
659600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
660600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1 )
661600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
662600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
663600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
664600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
665600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
666600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
667600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
668600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
669600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
670600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_min(
671600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
672600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
673600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1 )
674600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
675600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
676600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
677600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
678600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
679600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
680600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
681600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0
682600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
683600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_imin(
684600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
685600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
686600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1 )
687600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
688600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
689600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
690600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
691600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
692600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
693600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
694600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
695600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0
696600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
697600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_umin(
698600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
699600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
700600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1 )
701600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
702600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
703600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
704600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
705600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
706600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
707600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
708600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
709600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0
710600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
711600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_umod(
712600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
713600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
714600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1 )
715600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
716600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[0] = src0->u[0] % src1->u[0];
717600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[1] = src0->u[1] % src1->u[1];
718600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[2] = src0->u[2] % src1->u[2];
719600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->u[3] = src0->u[3] % src1->u[3];
720600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
721600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif
722600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
723600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void
724600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_mul(
725600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   union tgsi_exec_channel *dst,
726600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src0,
727600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   const union tgsi_exec_channel *src1 )
728600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{
729600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[0] = src0->f[0] * src1->f[0];
730600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[1] = src0->f[1] * src1->f[1];
731600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[2] = src0->f[2] * src1->f[2];
732600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang   dst->f[3] = src0->f[3] * src1->f[3];
733600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang}
734600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang
735600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0
736static void
737micro_imul(
738   union tgsi_exec_channel *dst,
739   const union tgsi_exec_channel *src0,
740   const union tgsi_exec_channel *src1 )
741{
742   dst->i[0] = src0->i[0] * src1->i[0];
743   dst->i[1] = src0->i[1] * src1->i[1];
744   dst->i[2] = src0->i[2] * src1->i[2];
745   dst->i[3] = src0->i[3] * src1->i[3];
746}
747#endif
748
749#if 0
750static void
751micro_imul64(
752   union tgsi_exec_channel *dst0,
753   union tgsi_exec_channel *dst1,
754   const union tgsi_exec_channel *src0,
755   const union tgsi_exec_channel *src1 )
756{
757   dst1->i[0] = src0->i[0] * src1->i[0];
758   dst1->i[1] = src0->i[1] * src1->i[1];
759   dst1->i[2] = src0->i[2] * src1->i[2];
760   dst1->i[3] = src0->i[3] * src1->i[3];
761   dst0->i[0] = 0;
762   dst0->i[1] = 0;
763   dst0->i[2] = 0;
764   dst0->i[3] = 0;
765}
766#endif
767
768#if 0
769static void
770micro_umul64(
771   union tgsi_exec_channel *dst0,
772   union tgsi_exec_channel *dst1,
773   const union tgsi_exec_channel *src0,
774   const union tgsi_exec_channel *src1 )
775{
776   dst1->u[0] = src0->u[0] * src1->u[0];
777   dst1->u[1] = src0->u[1] * src1->u[1];
778   dst1->u[2] = src0->u[2] * src1->u[2];
779   dst1->u[3] = src0->u[3] * src1->u[3];
780   dst0->u[0] = 0;
781   dst0->u[1] = 0;
782   dst0->u[2] = 0;
783   dst0->u[3] = 0;
784}
785#endif
786
787
788#if 0
789static void
790micro_movc(
791   union tgsi_exec_channel *dst,
792   const union tgsi_exec_channel *src0,
793   const union tgsi_exec_channel *src1,
794   const union tgsi_exec_channel *src2 )
795{
796   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
797   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
798   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
799   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
800}
801#endif
802
803static void
804micro_neg(
805   union tgsi_exec_channel *dst,
806   const union tgsi_exec_channel *src )
807{
808   dst->f[0] = -src->f[0];
809   dst->f[1] = -src->f[1];
810   dst->f[2] = -src->f[2];
811   dst->f[3] = -src->f[3];
812}
813
814#if 0
815static void
816micro_ineg(
817   union tgsi_exec_channel *dst,
818   const union tgsi_exec_channel *src )
819{
820   dst->i[0] = -src->i[0];
821   dst->i[1] = -src->i[1];
822   dst->i[2] = -src->i[2];
823   dst->i[3] = -src->i[3];
824}
825#endif
826
827static void
828micro_not(
829   union tgsi_exec_channel *dst,
830   const union tgsi_exec_channel *src )
831{
832   dst->u[0] = ~src->u[0];
833   dst->u[1] = ~src->u[1];
834   dst->u[2] = ~src->u[2];
835   dst->u[3] = ~src->u[3];
836}
837
838static void
839micro_or(
840   union tgsi_exec_channel *dst,
841   const union tgsi_exec_channel *src0,
842   const union tgsi_exec_channel *src1 )
843{
844   dst->u[0] = src0->u[0] | src1->u[0];
845   dst->u[1] = src0->u[1] | src1->u[1];
846   dst->u[2] = src0->u[2] | src1->u[2];
847   dst->u[3] = src0->u[3] | src1->u[3];
848}
849
850static void
851micro_pow(
852   union tgsi_exec_channel *dst,
853   const union tgsi_exec_channel *src0,
854   const union tgsi_exec_channel *src1 )
855{
856#if FAST_MATH
857   dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
858   dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
859   dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
860   dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
861#else
862   dst->f[0] = powf( src0->f[0], src1->f[0] );
863   dst->f[1] = powf( src0->f[1], src1->f[1] );
864   dst->f[2] = powf( src0->f[2], src1->f[2] );
865   dst->f[3] = powf( src0->f[3], src1->f[3] );
866#endif
867}
868
869static void
870micro_rnd(
871   union tgsi_exec_channel *dst,
872   const union tgsi_exec_channel *src )
873{
874   dst->f[0] = floorf( src->f[0] + 0.5f );
875   dst->f[1] = floorf( src->f[1] + 0.5f );
876   dst->f[2] = floorf( src->f[2] + 0.5f );
877   dst->f[3] = floorf( src->f[3] + 0.5f );
878}
879
880static void
881micro_sgn(
882   union tgsi_exec_channel *dst,
883   const union tgsi_exec_channel *src )
884{
885   dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
886   dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
887   dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
888   dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
889}
890
891static void
892micro_shl(
893   union tgsi_exec_channel *dst,
894   const union tgsi_exec_channel *src0,
895   const union tgsi_exec_channel *src1 )
896{
897   dst->i[0] = src0->i[0] << src1->i[0];
898   dst->i[1] = src0->i[1] << src1->i[1];
899   dst->i[2] = src0->i[2] << src1->i[2];
900   dst->i[3] = src0->i[3] << src1->i[3];
901}
902
903static void
904micro_ishr(
905   union tgsi_exec_channel *dst,
906   const union tgsi_exec_channel *src0,
907   const union tgsi_exec_channel *src1 )
908{
909   dst->i[0] = src0->i[0] >> src1->i[0];
910   dst->i[1] = src0->i[1] >> src1->i[1];
911   dst->i[2] = src0->i[2] >> src1->i[2];
912   dst->i[3] = src0->i[3] >> src1->i[3];
913}
914
915static void
916micro_trunc(
917   union tgsi_exec_channel *dst,
918   const union tgsi_exec_channel *src0 )
919{
920   dst->f[0] = (float) (int) src0->f[0];
921   dst->f[1] = (float) (int) src0->f[1];
922   dst->f[2] = (float) (int) src0->f[2];
923   dst->f[3] = (float) (int) src0->f[3];
924}
925
926#if 0
927static void
928micro_ushr(
929   union tgsi_exec_channel *dst,
930   const union tgsi_exec_channel *src0,
931   const union tgsi_exec_channel *src1 )
932{
933   dst->u[0] = src0->u[0] >> src1->u[0];
934   dst->u[1] = src0->u[1] >> src1->u[1];
935   dst->u[2] = src0->u[2] >> src1->u[2];
936   dst->u[3] = src0->u[3] >> src1->u[3];
937}
938#endif
939
940static void
941micro_sin(
942   union tgsi_exec_channel *dst,
943   const union tgsi_exec_channel *src )
944{
945   dst->f[0] = sinf( src->f[0] );
946   dst->f[1] = sinf( src->f[1] );
947   dst->f[2] = sinf( src->f[2] );
948   dst->f[3] = sinf( src->f[3] );
949}
950
951static void
952micro_sqrt( union tgsi_exec_channel *dst,
953            const union tgsi_exec_channel *src )
954{
955   dst->f[0] = sqrtf( src->f[0] );
956   dst->f[1] = sqrtf( src->f[1] );
957   dst->f[2] = sqrtf( src->f[2] );
958   dst->f[3] = sqrtf( src->f[3] );
959}
960
961static void
962micro_sub(
963   union tgsi_exec_channel *dst,
964   const union tgsi_exec_channel *src0,
965   const union tgsi_exec_channel *src1 )
966{
967   dst->f[0] = src0->f[0] - src1->f[0];
968   dst->f[1] = src0->f[1] - src1->f[1];
969   dst->f[2] = src0->f[2] - src1->f[2];
970   dst->f[3] = src0->f[3] - src1->f[3];
971}
972
973#if 0
974static void
975micro_u2f(
976   union tgsi_exec_channel *dst,
977   const union tgsi_exec_channel *src )
978{
979   dst->f[0] = (float) src->u[0];
980   dst->f[1] = (float) src->u[1];
981   dst->f[2] = (float) src->u[2];
982   dst->f[3] = (float) src->u[3];
983}
984#endif
985
986static void
987micro_xor(
988   union tgsi_exec_channel *dst,
989   const union tgsi_exec_channel *src0,
990   const union tgsi_exec_channel *src1 )
991{
992   dst->u[0] = src0->u[0] ^ src1->u[0];
993   dst->u[1] = src0->u[1] ^ src1->u[1];
994   dst->u[2] = src0->u[2] ^ src1->u[2];
995   dst->u[3] = src0->u[3] ^ src1->u[3];
996}
997
998static void
999fetch_src_file_channel(
1000   const struct tgsi_exec_machine *mach,
1001   const uint file,
1002   const uint swizzle,
1003   const union tgsi_exec_channel *index,
1004   union tgsi_exec_channel *chan )
1005{
1006   switch( swizzle ) {
1007   case TGSI_EXTSWIZZLE_X:
1008   case TGSI_EXTSWIZZLE_Y:
1009   case TGSI_EXTSWIZZLE_Z:
1010   case TGSI_EXTSWIZZLE_W:
1011      switch( file ) {
1012      case TGSI_FILE_CONSTANT:
1013         assert(mach->Consts);
1014         if (index->i[0] < 0)
1015            chan->f[0] = 0.0f;
1016         else
1017            chan->f[0] = mach->Consts[index->i[0]][swizzle];
1018         if (index->i[1] < 0)
1019            chan->f[1] = 0.0f;
1020         else
1021            chan->f[1] = mach->Consts[index->i[1]][swizzle];
1022         if (index->i[2] < 0)
1023            chan->f[2] = 0.0f;
1024         else
1025            chan->f[2] = mach->Consts[index->i[2]][swizzle];
1026         if (index->i[3] < 0)
1027            chan->f[3] = 0.0f;
1028         else
1029            chan->f[3] = mach->Consts[index->i[3]][swizzle];
1030         break;
1031
1032      case TGSI_FILE_INPUT:
1033         chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
1034         chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
1035         chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
1036         chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
1037         break;
1038
1039      case TGSI_FILE_TEMPORARY:
1040         assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
1041         chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
1042         chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
1043         chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
1044         chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
1045         break;
1046
1047      case TGSI_FILE_IMMEDIATE:
1048         assert( index->i[0] < (int) mach->ImmLimit );
1049         chan->f[0] = mach->Imms[index->i[0]][swizzle];
1050         assert( index->i[1] < (int) mach->ImmLimit );
1051         chan->f[1] = mach->Imms[index->i[1]][swizzle];
1052         assert( index->i[2] < (int) mach->ImmLimit );
1053         chan->f[2] = mach->Imms[index->i[2]][swizzle];
1054         assert( index->i[3] < (int) mach->ImmLimit );
1055         chan->f[3] = mach->Imms[index->i[3]][swizzle];
1056         break;
1057
1058      case TGSI_FILE_ADDRESS:
1059         chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
1060         chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
1061         chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
1062         chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
1063         break;
1064
1065      case TGSI_FILE_OUTPUT:
1066         /* vertex/fragment output vars can be read too */
1067         chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
1068         chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
1069         chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
1070         chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
1071         break;
1072
1073      default:
1074         assert( 0 );
1075      }
1076      break;
1077
1078   case TGSI_EXTSWIZZLE_ZERO:
1079      *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
1080      break;
1081
1082   case TGSI_EXTSWIZZLE_ONE:
1083      *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
1084      break;
1085
1086   default:
1087      assert( 0 );
1088   }
1089}
1090
1091static void
1092fetch_source(
1093   const struct tgsi_exec_machine *mach,
1094   union tgsi_exec_channel *chan,
1095   const struct tgsi_full_src_register *reg,
1096   const uint chan_index )
1097{
1098   union tgsi_exec_channel index;
1099   uint swizzle;
1100
1101   /* We start with a direct index into a register file.
1102    *
1103    *    file[1],
1104    *    where:
1105    *       file = SrcRegister.File
1106    *       [1] = SrcRegister.Index
1107    */
1108   index.i[0] =
1109   index.i[1] =
1110   index.i[2] =
1111   index.i[3] = reg->SrcRegister.Index;
1112
1113   /* There is an extra source register that indirectly subscripts
1114    * a register file. The direct index now becomes an offset
1115    * that is being added to the indirect register.
1116    *
1117    *    file[ind[2].x+1],
1118    *    where:
1119    *       ind = SrcRegisterInd.File
1120    *       [2] = SrcRegisterInd.Index
1121    *       .x = SrcRegisterInd.SwizzleX
1122    */
1123   if (reg->SrcRegister.Indirect) {
1124      union tgsi_exec_channel index2;
1125      union tgsi_exec_channel indir_index;
1126      const uint execmask = mach->ExecMask;
1127      uint i;
1128
1129      /* which address register (always zero now) */
1130      index2.i[0] =
1131      index2.i[1] =
1132      index2.i[2] =
1133      index2.i[3] = reg->SrcRegisterInd.Index;
1134
1135      /* get current value of address register[swizzle] */
1136      swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
1137      fetch_src_file_channel(
1138         mach,
1139         reg->SrcRegisterInd.File,
1140         swizzle,
1141         &index2,
1142         &indir_index );
1143
1144      /* add value of address register to the offset */
1145      index.i[0] += (int) indir_index.f[0];
1146      index.i[1] += (int) indir_index.f[1];
1147      index.i[2] += (int) indir_index.f[2];
1148      index.i[3] += (int) indir_index.f[3];
1149
1150      /* for disabled execution channels, zero-out the index to
1151       * avoid using a potential garbage value.
1152       */
1153      for (i = 0; i < QUAD_SIZE; i++) {
1154         if ((execmask & (1 << i)) == 0)
1155            index.i[i] = 0;
1156      }
1157   }
1158
1159   /* There is an extra source register that is a second
1160    * subscript to a register file. Effectively it means that
1161    * the register file is actually a 2D array of registers.
1162    *
1163    *    file[1][3] == file[1*sizeof(file[1])+3],
1164    *    where:
1165    *       [3] = SrcRegisterDim.Index
1166    */
1167   if (reg->SrcRegister.Dimension) {
1168      /* The size of the first-order array depends on the register file type.
1169       * We need to multiply the index to the first array to get an effective,
1170       * "flat" index that points to the beginning of the second-order array.
1171       */
1172      switch (reg->SrcRegister.File) {
1173      case TGSI_FILE_INPUT:
1174         index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1175         index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1176         index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1177         index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
1178         break;
1179      case TGSI_FILE_CONSTANT:
1180         index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
1181         index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
1182         index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
1183         index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
1184         break;
1185      default:
1186         assert( 0 );
1187      }
1188
1189      index.i[0] += reg->SrcRegisterDim.Index;
1190      index.i[1] += reg->SrcRegisterDim.Index;
1191      index.i[2] += reg->SrcRegisterDim.Index;
1192      index.i[3] += reg->SrcRegisterDim.Index;
1193
1194      /* Again, the second subscript index can be addressed indirectly
1195       * identically to the first one.
1196       * Nothing stops us from indirectly addressing the indirect register,
1197       * but there is no need for that, so we won't exercise it.
1198       *
1199       *    file[1][ind[4].y+3],
1200       *    where:
1201       *       ind = SrcRegisterDimInd.File
1202       *       [4] = SrcRegisterDimInd.Index
1203       *       .y = SrcRegisterDimInd.SwizzleX
1204       */
1205      if (reg->SrcRegisterDim.Indirect) {
1206         union tgsi_exec_channel index2;
1207         union tgsi_exec_channel indir_index;
1208         const uint execmask = mach->ExecMask;
1209         uint i;
1210
1211         index2.i[0] =
1212         index2.i[1] =
1213         index2.i[2] =
1214         index2.i[3] = reg->SrcRegisterDimInd.Index;
1215
1216         swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
1217         fetch_src_file_channel(
1218            mach,
1219            reg->SrcRegisterDimInd.File,
1220            swizzle,
1221            &index2,
1222            &indir_index );
1223
1224         index.i[0] += (int) indir_index.f[0];
1225         index.i[1] += (int) indir_index.f[1];
1226         index.i[2] += (int) indir_index.f[2];
1227         index.i[3] += (int) indir_index.f[3];
1228
1229         /* for disabled execution channels, zero-out the index to
1230          * avoid using a potential garbage value.
1231          */
1232         for (i = 0; i < QUAD_SIZE; i++) {
1233            if ((execmask & (1 << i)) == 0)
1234               index.i[i] = 0;
1235         }
1236      }
1237
1238      /* If by any chance there was a need for a 3D array of register
1239       * files, we would have to check whether SrcRegisterDim is followed
1240       * by a dimension register and continue the saga.
1241       */
1242   }
1243
1244   swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
1245   fetch_src_file_channel(
1246      mach,
1247      reg->SrcRegister.File,
1248      swizzle,
1249      &index,
1250      chan );
1251
1252   switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
1253   case TGSI_UTIL_SIGN_CLEAR:
1254      micro_abs( chan, chan );
1255      break;
1256
1257   case TGSI_UTIL_SIGN_SET:
1258      micro_abs( chan, chan );
1259      micro_neg( chan, chan );
1260      break;
1261
1262   case TGSI_UTIL_SIGN_TOGGLE:
1263      micro_neg( chan, chan );
1264      break;
1265
1266   case TGSI_UTIL_SIGN_KEEP:
1267      break;
1268   }
1269
1270   if (reg->SrcRegisterExtMod.Complement) {
1271      micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
1272   }
1273}
1274
1275static void
1276store_dest(
1277   struct tgsi_exec_machine *mach,
1278   const union tgsi_exec_channel *chan,
1279   const struct tgsi_full_dst_register *reg,
1280   const struct tgsi_full_instruction *inst,
1281   uint chan_index )
1282{
1283   uint i;
1284   union tgsi_exec_channel null;
1285   union tgsi_exec_channel *dst;
1286   uint execmask = mach->ExecMask;
1287
1288   switch (reg->DstRegister.File) {
1289   case TGSI_FILE_NULL:
1290      dst = &null;
1291      break;
1292
1293   case TGSI_FILE_OUTPUT:
1294      dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1295                           + reg->DstRegister.Index].xyzw[chan_index];
1296      break;
1297
1298   case TGSI_FILE_TEMPORARY:
1299      assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
1300      dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
1301      break;
1302
1303   case TGSI_FILE_ADDRESS:
1304      dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
1305      break;
1306
1307   default:
1308      assert( 0 );
1309      return;
1310   }
1311
1312   if (inst->InstructionExtNv.CondFlowEnable) {
1313      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1314      uint swizzle;
1315      uint shift;
1316      uint mask;
1317      uint test;
1318
1319      /* Only CC0 supported.
1320       */
1321      assert( inst->InstructionExtNv.CondFlowIndex < 1 );
1322
1323      switch (chan_index) {
1324      case CHAN_X:
1325         swizzle = inst->InstructionExtNv.CondSwizzleX;
1326         break;
1327      case CHAN_Y:
1328         swizzle = inst->InstructionExtNv.CondSwizzleY;
1329         break;
1330      case CHAN_Z:
1331         swizzle = inst->InstructionExtNv.CondSwizzleZ;
1332         break;
1333      case CHAN_W:
1334         swizzle = inst->InstructionExtNv.CondSwizzleW;
1335         break;
1336      default:
1337         assert( 0 );
1338         return;
1339      }
1340
1341      switch (swizzle) {
1342      case TGSI_SWIZZLE_X:
1343         shift = TGSI_EXEC_CC_X_SHIFT;
1344         mask = TGSI_EXEC_CC_X_MASK;
1345         break;
1346      case TGSI_SWIZZLE_Y:
1347         shift = TGSI_EXEC_CC_Y_SHIFT;
1348         mask = TGSI_EXEC_CC_Y_MASK;
1349         break;
1350      case TGSI_SWIZZLE_Z:
1351         shift = TGSI_EXEC_CC_Z_SHIFT;
1352         mask = TGSI_EXEC_CC_Z_MASK;
1353         break;
1354      case TGSI_SWIZZLE_W:
1355         shift = TGSI_EXEC_CC_W_SHIFT;
1356         mask = TGSI_EXEC_CC_W_MASK;
1357         break;
1358      default:
1359         assert( 0 );
1360         return;
1361      }
1362
1363      switch (inst->InstructionExtNv.CondMask) {
1364      case TGSI_CC_GT:
1365         test = ~(TGSI_EXEC_CC_GT << shift) & mask;
1366         for (i = 0; i < QUAD_SIZE; i++)
1367            if (cc->u[i] & test)
1368               execmask &= ~(1 << i);
1369         break;
1370
1371      case TGSI_CC_EQ:
1372         test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
1373         for (i = 0; i < QUAD_SIZE; i++)
1374            if (cc->u[i] & test)
1375               execmask &= ~(1 << i);
1376         break;
1377
1378      case TGSI_CC_LT:
1379         test = ~(TGSI_EXEC_CC_LT << shift) & mask;
1380         for (i = 0; i < QUAD_SIZE; i++)
1381            if (cc->u[i] & test)
1382               execmask &= ~(1 << i);
1383         break;
1384
1385      case TGSI_CC_GE:
1386         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
1387         for (i = 0; i < QUAD_SIZE; i++)
1388            if (cc->u[i] & test)
1389               execmask &= ~(1 << i);
1390         break;
1391
1392      case TGSI_CC_LE:
1393         test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
1394         for (i = 0; i < QUAD_SIZE; i++)
1395            if (cc->u[i] & test)
1396               execmask &= ~(1 << i);
1397         break;
1398
1399      case TGSI_CC_NE:
1400         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
1401         for (i = 0; i < QUAD_SIZE; i++)
1402            if (cc->u[i] & test)
1403               execmask &= ~(1 << i);
1404         break;
1405
1406      case TGSI_CC_TR:
1407         break;
1408
1409      case TGSI_CC_FL:
1410         for (i = 0; i < QUAD_SIZE; i++)
1411            execmask &= ~(1 << i);
1412         break;
1413
1414      default:
1415         assert( 0 );
1416         return;
1417      }
1418   }
1419
1420   switch (inst->Instruction.Saturate) {
1421   case TGSI_SAT_NONE:
1422      for (i = 0; i < QUAD_SIZE; i++)
1423         if (execmask & (1 << i))
1424            dst->i[i] = chan->i[i];
1425      break;
1426
1427   case TGSI_SAT_ZERO_ONE:
1428      for (i = 0; i < QUAD_SIZE; i++)
1429         if (execmask & (1 << i)) {
1430            if (chan->f[i] < 0.0f)
1431               dst->f[i] = 0.0f;
1432            else if (chan->f[i] > 1.0f)
1433               dst->f[i] = 1.0f;
1434            else
1435               dst->i[i] = chan->i[i];
1436         }
1437      break;
1438
1439   case TGSI_SAT_MINUS_PLUS_ONE:
1440      for (i = 0; i < QUAD_SIZE; i++)
1441         if (execmask & (1 << i)) {
1442            if (chan->f[i] < -1.0f)
1443               dst->f[i] = -1.0f;
1444            else if (chan->f[i] > 1.0f)
1445               dst->f[i] = 1.0f;
1446            else
1447               dst->i[i] = chan->i[i];
1448         }
1449      break;
1450
1451   default:
1452      assert( 0 );
1453   }
1454
1455   if (inst->InstructionExtNv.CondDstUpdate) {
1456      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
1457      uint shift;
1458      uint mask;
1459
1460      /* Only CC0 supported.
1461       */
1462      assert( inst->InstructionExtNv.CondDstIndex < 1 );
1463
1464      switch (chan_index) {
1465      case CHAN_X:
1466         shift = TGSI_EXEC_CC_X_SHIFT;
1467         mask = ~TGSI_EXEC_CC_X_MASK;
1468         break;
1469      case CHAN_Y:
1470         shift = TGSI_EXEC_CC_Y_SHIFT;
1471         mask = ~TGSI_EXEC_CC_Y_MASK;
1472         break;
1473      case CHAN_Z:
1474         shift = TGSI_EXEC_CC_Z_SHIFT;
1475         mask = ~TGSI_EXEC_CC_Z_MASK;
1476         break;
1477      case CHAN_W:
1478         shift = TGSI_EXEC_CC_W_SHIFT;
1479         mask = ~TGSI_EXEC_CC_W_MASK;
1480         break;
1481      default:
1482         assert( 0 );
1483         return;
1484      }
1485
1486      for (i = 0; i < QUAD_SIZE; i++)
1487         if (execmask & (1 << i)) {
1488            cc->u[i] &= mask;
1489            if (dst->f[i] < 0.0f)
1490               cc->u[i] |= TGSI_EXEC_CC_LT << shift;
1491            else if (dst->f[i] > 0.0f)
1492               cc->u[i] |= TGSI_EXEC_CC_GT << shift;
1493            else if (dst->f[i] == 0.0f)
1494               cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
1495            else
1496               cc->u[i] |= TGSI_EXEC_CC_UN << shift;
1497         }
1498   }
1499}
1500
1501#define FETCH(VAL,INDEX,CHAN)\
1502    fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
1503
1504#define STORE(VAL,INDEX,CHAN)\
1505    store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
1506
1507
1508/**
1509 * Execute ARB-style KIL which is predicated by a src register.
1510 * Kill fragment if any of the four values is less than zero.
1511 */
1512static void
1513exec_kil(struct tgsi_exec_machine *mach,
1514         const struct tgsi_full_instruction *inst)
1515{
1516   uint uniquemask;
1517   uint chan_index;
1518   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1519   union tgsi_exec_channel r[1];
1520
1521   /* This mask stores component bits that were already tested. Note that
1522    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
1523    * tested. */
1524   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
1525
1526   for (chan_index = 0; chan_index < 4; chan_index++)
1527   {
1528      uint swizzle;
1529      uint i;
1530
1531      /* unswizzle channel */
1532      swizzle = tgsi_util_get_full_src_register_extswizzle (
1533                        &inst->FullSrcRegisters[0],
1534                        chan_index);
1535
1536      /* check if the component has not been already tested */
1537      if (uniquemask & (1 << swizzle))
1538         continue;
1539      uniquemask |= 1 << swizzle;
1540
1541      FETCH(&r[0], 0, chan_index);
1542      for (i = 0; i < 4; i++)
1543         if (r[0].f[i] < 0.0f)
1544            kilmask |= 1 << i;
1545   }
1546
1547   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1548}
1549
1550/**
1551 * Execute NVIDIA-style KIL which is predicated by a condition code.
1552 * Kill fragment if the condition code is TRUE.
1553 */
1554static void
1555exec_kilp(struct tgsi_exec_machine *mach,
1556          const struct tgsi_full_instruction *inst)
1557{
1558   uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1559
1560   if (inst->InstructionExtNv.CondFlowEnable) {
1561      uint swizzle[4];
1562      uint chan_index;
1563
1564      kilmask = 0x0;
1565
1566      swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
1567      swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
1568      swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
1569      swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
1570
1571      for (chan_index = 0; chan_index < 4; chan_index++)
1572      {
1573         uint i;
1574
1575         for (i = 0; i < 4; i++) {
1576            /* TODO: evaluate the condition code */
1577            if (0)
1578               kilmask |= 1 << i;
1579         }
1580      }
1581   }
1582   else {
1583      /* "unconditional" kil */
1584      kilmask = mach->ExecMask;
1585   }
1586   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1587}
1588
1589
1590/*
1591 * Fetch a four texture samples using STR texture coordinates.
1592 */
1593static void
1594fetch_texel( struct tgsi_sampler *sampler,
1595             const union tgsi_exec_channel *s,
1596             const union tgsi_exec_channel *t,
1597             const union tgsi_exec_channel *p,
1598             float lodbias,  /* XXX should be float[4] */
1599             union tgsi_exec_channel *r,
1600             union tgsi_exec_channel *g,
1601             union tgsi_exec_channel *b,
1602             union tgsi_exec_channel *a )
1603{
1604   uint j;
1605   float rgba[NUM_CHANNELS][QUAD_SIZE];
1606
1607   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
1608
1609   for (j = 0; j < 4; j++) {
1610      r->f[j] = rgba[0][j];
1611      g->f[j] = rgba[1][j];
1612      b->f[j] = rgba[2][j];
1613      a->f[j] = rgba[3][j];
1614   }
1615}
1616
1617
1618static void
1619exec_tex(struct tgsi_exec_machine *mach,
1620         const struct tgsi_full_instruction *inst,
1621         boolean biasLod,
1622         boolean projected)
1623{
1624   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1625   union tgsi_exec_channel r[4];
1626   uint chan_index;
1627   float lodBias;
1628
1629   /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
1630
1631   switch (inst->InstructionExtTexture.Texture) {
1632   case TGSI_TEXTURE_1D:
1633   case TGSI_TEXTURE_SHADOW1D:
1634
1635      FETCH(&r[0], 0, CHAN_X);
1636
1637      if (projected) {
1638         FETCH(&r[1], 0, CHAN_W);
1639         micro_div( &r[0], &r[0], &r[1] );
1640      }
1641
1642      if (biasLod) {
1643         FETCH(&r[1], 0, CHAN_W);
1644         lodBias = r[2].f[0];
1645      }
1646      else
1647         lodBias = 0.0;
1648
1649      fetch_texel(mach->Samplers[unit],
1650                  &r[0], &ZeroVec, &ZeroVec, lodBias,  /* S, T, P, BIAS */
1651                  &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
1652      break;
1653
1654   case TGSI_TEXTURE_2D:
1655   case TGSI_TEXTURE_RECT:
1656   case TGSI_TEXTURE_SHADOW2D:
1657   case TGSI_TEXTURE_SHADOWRECT:
1658
1659      FETCH(&r[0], 0, CHAN_X);
1660      FETCH(&r[1], 0, CHAN_Y);
1661      FETCH(&r[2], 0, CHAN_Z);
1662
1663      if (projected) {
1664         FETCH(&r[3], 0, CHAN_W);
1665         micro_div( &r[0], &r[0], &r[3] );
1666         micro_div( &r[1], &r[1], &r[3] );
1667         micro_div( &r[2], &r[2], &r[3] );
1668      }
1669
1670      if (biasLod) {
1671         FETCH(&r[3], 0, CHAN_W);
1672         lodBias = r[3].f[0];
1673      }
1674      else
1675         lodBias = 0.0;
1676
1677      fetch_texel(mach->Samplers[unit],
1678                  &r[0], &r[1], &r[2], lodBias,  /* inputs */
1679                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */
1680      break;
1681
1682   case TGSI_TEXTURE_3D:
1683   case TGSI_TEXTURE_CUBE:
1684
1685      FETCH(&r[0], 0, CHAN_X);
1686      FETCH(&r[1], 0, CHAN_Y);
1687      FETCH(&r[2], 0, CHAN_Z);
1688
1689      if (projected) {
1690         FETCH(&r[3], 0, CHAN_W);
1691         micro_div( &r[0], &r[0], &r[3] );
1692         micro_div( &r[1], &r[1], &r[3] );
1693         micro_div( &r[2], &r[2], &r[3] );
1694      }
1695
1696      if (biasLod) {
1697         FETCH(&r[3], 0, CHAN_W);
1698         lodBias = r[3].f[0];
1699      }
1700      else
1701         lodBias = 0.0;
1702
1703      fetch_texel(mach->Samplers[unit],
1704                  &r[0], &r[1], &r[2], lodBias,
1705                  &r[0], &r[1], &r[2], &r[3]);
1706      break;
1707
1708   default:
1709      assert (0);
1710   }
1711
1712   FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1713      STORE( &r[chan_index], 0, chan_index );
1714   }
1715}
1716
1717
1718/**
1719 * Evaluate a constant-valued coefficient at the position of the
1720 * current quad.
1721 */
1722static void
1723eval_constant_coef(
1724   struct tgsi_exec_machine *mach,
1725   unsigned attrib,
1726   unsigned chan )
1727{
1728   unsigned i;
1729
1730   for( i = 0; i < QUAD_SIZE; i++ ) {
1731      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
1732   }
1733}
1734
1735/**
1736 * Evaluate a linear-valued coefficient at the position of the
1737 * current quad.
1738 */
1739static void
1740eval_linear_coef(
1741   struct tgsi_exec_machine *mach,
1742   unsigned attrib,
1743   unsigned chan )
1744{
1745   const float x = mach->QuadPos.xyzw[0].f[0];
1746   const float y = mach->QuadPos.xyzw[1].f[0];
1747   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1748   const float dady = mach->InterpCoefs[attrib].dady[chan];
1749   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1750   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
1751   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
1752   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
1753   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
1754}
1755
1756/**
1757 * Evaluate a perspective-valued coefficient at the position of the
1758 * current quad.
1759 */
1760static void
1761eval_perspective_coef(
1762   struct tgsi_exec_machine *mach,
1763   unsigned attrib,
1764   unsigned chan )
1765{
1766   const float x = mach->QuadPos.xyzw[0].f[0];
1767   const float y = mach->QuadPos.xyzw[1].f[0];
1768   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
1769   const float dady = mach->InterpCoefs[attrib].dady[chan];
1770   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
1771   const float *w = mach->QuadPos.xyzw[3].f;
1772   /* divide by W here */
1773   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
1774   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
1775   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
1776   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
1777}
1778
1779
1780typedef void (* eval_coef_func)(
1781   struct tgsi_exec_machine *mach,
1782   unsigned attrib,
1783   unsigned chan );
1784
1785static void
1786exec_declaration(
1787   struct tgsi_exec_machine *mach,
1788   const struct tgsi_full_declaration *decl )
1789{
1790   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
1791      if( decl->Declaration.File == TGSI_FILE_INPUT ) {
1792         unsigned first, last, mask;
1793         eval_coef_func eval;
1794
1795         first = decl->DeclarationRange.First;
1796         last = decl->DeclarationRange.Last;
1797         mask = decl->Declaration.UsageMask;
1798
1799         switch( decl->Declaration.Interpolate ) {
1800         case TGSI_INTERPOLATE_CONSTANT:
1801            eval = eval_constant_coef;
1802            break;
1803
1804         case TGSI_INTERPOLATE_LINEAR:
1805            eval = eval_linear_coef;
1806            break;
1807
1808         case TGSI_INTERPOLATE_PERSPECTIVE:
1809            eval = eval_perspective_coef;
1810            break;
1811
1812         default:
1813            eval = NULL;
1814            assert( 0 );
1815         }
1816
1817         if( mask == TGSI_WRITEMASK_XYZW ) {
1818            unsigned i, j;
1819
1820            for( i = first; i <= last; i++ ) {
1821               for( j = 0; j < NUM_CHANNELS; j++ ) {
1822                  eval( mach, i, j );
1823               }
1824            }
1825         }
1826         else {
1827            unsigned i, j;
1828
1829            for( j = 0; j < NUM_CHANNELS; j++ ) {
1830               if( mask & (1 << j) ) {
1831                  for( i = first; i <= last; i++ ) {
1832                     eval( mach, i, j );
1833                  }
1834               }
1835            }
1836         }
1837      }
1838   }
1839}
1840
1841static void
1842exec_instruction(
1843   struct tgsi_exec_machine *mach,
1844   const struct tgsi_full_instruction *inst,
1845   int *pc )
1846{
1847   uint chan_index;
1848   union tgsi_exec_channel r[10];
1849
1850   (*pc)++;
1851
1852   switch (inst->Instruction.Opcode) {
1853   case TGSI_OPCODE_ARL:
1854   /* TGSI_OPCODE_FLOOR */
1855   /* TGSI_OPCODE_FLR */
1856      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1857         FETCH( &r[0], 0, chan_index );
1858         micro_flr( &r[0], &r[0] );
1859         STORE( &r[0], 0, chan_index );
1860      }
1861      break;
1862
1863   case TGSI_OPCODE_MOV:
1864   case TGSI_OPCODE_SWZ:
1865      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1866         FETCH( &r[0], 0, chan_index );
1867         STORE( &r[0], 0, chan_index );
1868      }
1869      break;
1870
1871   case TGSI_OPCODE_LIT:
1872      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1873         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
1874      }
1875
1876      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1877         FETCH( &r[0], 0, CHAN_X );
1878         if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1879            micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1880            STORE( &r[0], 0, CHAN_Y );
1881         }
1882
1883         if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1884            FETCH( &r[1], 0, CHAN_Y );
1885            micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1886
1887            FETCH( &r[2], 0, CHAN_W );
1888            micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
1889            micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
1890            micro_pow( &r[1], &r[1], &r[2] );
1891            micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
1892            STORE( &r[0], 0, CHAN_Z );
1893         }
1894      }
1895
1896      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1897         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1898      }
1899      break;
1900
1901   case TGSI_OPCODE_RCP:
1902   /* TGSI_OPCODE_RECIP */
1903      FETCH( &r[0], 0, CHAN_X );
1904      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1905      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1906         STORE( &r[0], 0, chan_index );
1907      }
1908      break;
1909
1910   case TGSI_OPCODE_RSQ:
1911   /* TGSI_OPCODE_RECIPSQRT */
1912      FETCH( &r[0], 0, CHAN_X );
1913      micro_abs( &r[0], &r[0] );
1914      micro_sqrt( &r[0], &r[0] );
1915      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
1916      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1917         STORE( &r[0], 0, chan_index );
1918      }
1919      break;
1920
1921   case TGSI_OPCODE_EXP:
1922      FETCH( &r[0], 0, CHAN_X );
1923      micro_flr( &r[1], &r[0] );  /* r1 = floor(r0) */
1924      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1925         micro_exp2( &r[2], &r[1] );       /* r2 = 2 ^ r1 */
1926         STORE( &r[2], 0, CHAN_X );        /* store r2 */
1927      }
1928      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1929         micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
1930         STORE( &r[2], 0, CHAN_Y );        /* store r2 */
1931      }
1932      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1933         micro_exp2( &r[2], &r[0] );       /* r2 = 2 ^ r0 */
1934         STORE( &r[2], 0, CHAN_Z );        /* store r2 */
1935      }
1936      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1937         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1938      }
1939      break;
1940
1941   case TGSI_OPCODE_LOG:
1942      FETCH( &r[0], 0, CHAN_X );
1943      micro_abs( &r[2], &r[0] );  /* r2 = abs(r0) */
1944      micro_lg2( &r[1], &r[2] );  /* r1 = lg2(r2) */
1945      micro_flr( &r[0], &r[1] );  /* r0 = floor(r1) */
1946      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
1947         STORE( &r[0], 0, CHAN_X );
1948      }
1949      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
1950         micro_exp2( &r[0], &r[0] );       /* r0 = 2 ^ r0 */
1951         micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
1952         STORE( &r[0], 0, CHAN_Y );
1953      }
1954      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
1955         STORE( &r[1], 0, CHAN_Z );
1956      }
1957      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
1958         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
1959      }
1960      break;
1961
1962   case TGSI_OPCODE_MUL:
1963      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
1964      {
1965         FETCH(&r[0], 0, chan_index);
1966         FETCH(&r[1], 1, chan_index);
1967
1968         micro_mul( &r[0], &r[0], &r[1] );
1969
1970         STORE(&r[0], 0, chan_index);
1971      }
1972      break;
1973
1974   case TGSI_OPCODE_ADD:
1975      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
1976         FETCH( &r[0], 0, chan_index );
1977         FETCH( &r[1], 1, chan_index );
1978         micro_add( &r[0], &r[0], &r[1] );
1979         STORE( &r[0], 0, chan_index );
1980      }
1981      break;
1982
1983   case TGSI_OPCODE_DP3:
1984   /* TGSI_OPCODE_DOT3 */
1985      FETCH( &r[0], 0, CHAN_X );
1986      FETCH( &r[1], 1, CHAN_X );
1987      micro_mul( &r[0], &r[0], &r[1] );
1988
1989      FETCH( &r[1], 0, CHAN_Y );
1990      FETCH( &r[2], 1, CHAN_Y );
1991      micro_mul( &r[1], &r[1], &r[2] );
1992      micro_add( &r[0], &r[0], &r[1] );
1993
1994      FETCH( &r[1], 0, CHAN_Z );
1995      FETCH( &r[2], 1, CHAN_Z );
1996      micro_mul( &r[1], &r[1], &r[2] );
1997      micro_add( &r[0], &r[0], &r[1] );
1998
1999      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2000         STORE( &r[0], 0, chan_index );
2001      }
2002      break;
2003
2004    case TGSI_OPCODE_DP4:
2005    /* TGSI_OPCODE_DOT4 */
2006       FETCH(&r[0], 0, CHAN_X);
2007       FETCH(&r[1], 1, CHAN_X);
2008
2009       micro_mul( &r[0], &r[0], &r[1] );
2010
2011       FETCH(&r[1], 0, CHAN_Y);
2012       FETCH(&r[2], 1, CHAN_Y);
2013
2014       micro_mul( &r[1], &r[1], &r[2] );
2015       micro_add( &r[0], &r[0], &r[1] );
2016
2017       FETCH(&r[1], 0, CHAN_Z);
2018       FETCH(&r[2], 1, CHAN_Z);
2019
2020       micro_mul( &r[1], &r[1], &r[2] );
2021       micro_add( &r[0], &r[0], &r[1] );
2022
2023       FETCH(&r[1], 0, CHAN_W);
2024       FETCH(&r[2], 1, CHAN_W);
2025
2026       micro_mul( &r[1], &r[1], &r[2] );
2027       micro_add( &r[0], &r[0], &r[1] );
2028
2029      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2030         STORE( &r[0], 0, chan_index );
2031      }
2032      break;
2033
2034   case TGSI_OPCODE_DST:
2035      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2036         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
2037      }
2038
2039      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2040         FETCH( &r[0], 0, CHAN_Y );
2041         FETCH( &r[1], 1, CHAN_Y);
2042         micro_mul( &r[0], &r[0], &r[1] );
2043         STORE( &r[0], 0, CHAN_Y );
2044      }
2045
2046      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2047         FETCH( &r[0], 0, CHAN_Z );
2048         STORE( &r[0], 0, CHAN_Z );
2049      }
2050
2051      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2052         FETCH( &r[0], 1, CHAN_W );
2053         STORE( &r[0], 0, CHAN_W );
2054      }
2055      break;
2056
2057   case TGSI_OPCODE_MIN:
2058      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2059         FETCH(&r[0], 0, chan_index);
2060         FETCH(&r[1], 1, chan_index);
2061
2062         /* XXX use micro_min()?? */
2063         micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
2064
2065         STORE(&r[0], 0, chan_index);
2066      }
2067      break;
2068
2069   case TGSI_OPCODE_MAX:
2070      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2071         FETCH(&r[0], 0, chan_index);
2072         FETCH(&r[1], 1, chan_index);
2073
2074         /* XXX use micro_max()?? */
2075         micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
2076
2077         STORE(&r[0], 0, chan_index );
2078      }
2079      break;
2080
2081   case TGSI_OPCODE_SLT:
2082   /* TGSI_OPCODE_SETLT */
2083      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2084         FETCH( &r[0], 0, chan_index );
2085         FETCH( &r[1], 1, chan_index );
2086         micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2087         STORE( &r[0], 0, chan_index );
2088      }
2089      break;
2090
2091   case TGSI_OPCODE_SGE:
2092   /* TGSI_OPCODE_SETGE */
2093      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2094         FETCH( &r[0], 0, chan_index );
2095         FETCH( &r[1], 1, chan_index );
2096         micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2097         STORE( &r[0], 0, chan_index );
2098      }
2099      break;
2100
2101   case TGSI_OPCODE_MAD:
2102   /* TGSI_OPCODE_MADD */
2103      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2104         FETCH( &r[0], 0, chan_index );
2105         FETCH( &r[1], 1, chan_index );
2106         micro_mul( &r[0], &r[0], &r[1] );
2107         FETCH( &r[1], 2, chan_index );
2108         micro_add( &r[0], &r[0], &r[1] );
2109         STORE( &r[0], 0, chan_index );
2110      }
2111      break;
2112
2113   case TGSI_OPCODE_SUB:
2114      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2115         FETCH(&r[0], 0, chan_index);
2116         FETCH(&r[1], 1, chan_index);
2117
2118         micro_sub( &r[0], &r[0], &r[1] );
2119
2120         STORE(&r[0], 0, chan_index);
2121      }
2122      break;
2123
2124   case TGSI_OPCODE_LERP:
2125   /* TGSI_OPCODE_LRP */
2126      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2127         FETCH(&r[0], 0, chan_index);
2128         FETCH(&r[1], 1, chan_index);
2129         FETCH(&r[2], 2, chan_index);
2130
2131         micro_sub( &r[1], &r[1], &r[2] );
2132         micro_mul( &r[0], &r[0], &r[1] );
2133         micro_add( &r[0], &r[0], &r[2] );
2134
2135         STORE(&r[0], 0, chan_index);
2136      }
2137      break;
2138
2139   case TGSI_OPCODE_CND:
2140      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2141         FETCH(&r[0], 0, chan_index);
2142         FETCH(&r[1], 1, chan_index);
2143         FETCH(&r[2], 2, chan_index);
2144         micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
2145         STORE(&r[0], 0, chan_index);
2146      }
2147      break;
2148
2149   case TGSI_OPCODE_CND0:
2150      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2151         FETCH(&r[0], 0, chan_index);
2152         FETCH(&r[1], 1, chan_index);
2153         FETCH(&r[2], 2, chan_index);
2154         micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
2155         STORE(&r[0], 0, chan_index);
2156      }
2157      break;
2158
2159   case TGSI_OPCODE_DOT2ADD:
2160   /* TGSI_OPCODE_DP2A */
2161      FETCH( &r[0], 0, CHAN_X );
2162      FETCH( &r[1], 1, CHAN_X );
2163      micro_mul( &r[0], &r[0], &r[1] );
2164
2165      FETCH( &r[1], 0, CHAN_Y );
2166      FETCH( &r[2], 1, CHAN_Y );
2167      micro_mul( &r[1], &r[1], &r[2] );
2168      micro_add( &r[0], &r[0], &r[1] );
2169
2170      FETCH( &r[2], 2, CHAN_X );
2171      micro_add( &r[0], &r[0], &r[2] );
2172
2173      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2174         STORE( &r[0], 0, chan_index );
2175      }
2176      break;
2177
2178   case TGSI_OPCODE_INDEX:
2179      /* XXX: considered for removal */
2180      assert (0);
2181      break;
2182
2183   case TGSI_OPCODE_NEGATE:
2184      /* XXX: considered for removal */
2185      assert (0);
2186      break;
2187
2188   case TGSI_OPCODE_FRAC:
2189   /* TGSI_OPCODE_FRC */
2190      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2191         FETCH( &r[0], 0, chan_index );
2192         micro_frc( &r[0], &r[0] );
2193         STORE( &r[0], 0, chan_index );
2194      }
2195      break;
2196
2197   case TGSI_OPCODE_CLAMP:
2198      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2199         FETCH(&r[0], 0, chan_index);
2200         FETCH(&r[1], 1, chan_index);
2201         micro_max(&r[0], &r[0], &r[1]);
2202         FETCH(&r[1], 2, chan_index);
2203         micro_min(&r[0], &r[0], &r[1]);
2204         STORE(&r[0], 0, chan_index);
2205      }
2206      break;
2207
2208   case TGSI_OPCODE_ROUND:
2209   case TGSI_OPCODE_ARR:
2210      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2211         FETCH( &r[0], 0, chan_index );
2212         micro_rnd( &r[0], &r[0] );
2213         STORE( &r[0], 0, chan_index );
2214      }
2215      break;
2216
2217   case TGSI_OPCODE_EXPBASE2:
2218   /* TGSI_OPCODE_EX2 */
2219      FETCH(&r[0], 0, CHAN_X);
2220
2221#if FAST_MATH
2222      micro_exp2( &r[0], &r[0] );
2223#else
2224      micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
2225#endif
2226
2227      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2228         STORE( &r[0], 0, chan_index );
2229      }
2230      break;
2231
2232   case TGSI_OPCODE_LOGBASE2:
2233   /* TGSI_OPCODE_LG2 */
2234      FETCH( &r[0], 0, CHAN_X );
2235      micro_lg2( &r[0], &r[0] );
2236      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2237         STORE( &r[0], 0, chan_index );
2238      }
2239      break;
2240
2241   case TGSI_OPCODE_POWER:
2242   /* TGSI_OPCODE_POW */
2243      FETCH(&r[0], 0, CHAN_X);
2244      FETCH(&r[1], 1, CHAN_X);
2245
2246      micro_pow( &r[0], &r[0], &r[1] );
2247
2248      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2249         STORE( &r[0], 0, chan_index );
2250      }
2251      break;
2252
2253   case TGSI_OPCODE_CROSSPRODUCT:
2254   /* TGSI_OPCODE_XPD */
2255      FETCH(&r[0], 0, CHAN_Y);
2256      FETCH(&r[1], 1, CHAN_Z);
2257
2258      micro_mul( &r[2], &r[0], &r[1] );
2259
2260      FETCH(&r[3], 0, CHAN_Z);
2261      FETCH(&r[4], 1, CHAN_Y);
2262
2263      micro_mul( &r[5], &r[3], &r[4] );
2264      micro_sub( &r[2], &r[2], &r[5] );
2265
2266      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
2267         STORE( &r[2], 0, CHAN_X );
2268      }
2269
2270      FETCH(&r[2], 1, CHAN_X);
2271
2272      micro_mul( &r[3], &r[3], &r[2] );
2273
2274      FETCH(&r[5], 0, CHAN_X);
2275
2276      micro_mul( &r[1], &r[1], &r[5] );
2277      micro_sub( &r[3], &r[3], &r[1] );
2278
2279      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
2280         STORE( &r[3], 0, CHAN_Y );
2281      }
2282
2283      micro_mul( &r[5], &r[5], &r[4] );
2284      micro_mul( &r[0], &r[0], &r[2] );
2285      micro_sub( &r[5], &r[5], &r[0] );
2286
2287      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
2288         STORE( &r[5], 0, CHAN_Z );
2289      }
2290
2291      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
2292         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2293      }
2294      break;
2295
2296    case TGSI_OPCODE_MULTIPLYMATRIX:
2297       /* XXX: considered for removal */
2298       assert (0);
2299       break;
2300
2301    case TGSI_OPCODE_ABS:
2302       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2303          FETCH(&r[0], 0, chan_index);
2304
2305          micro_abs( &r[0], &r[0] );
2306
2307          STORE(&r[0], 0, chan_index);
2308       }
2309       break;
2310
2311   case TGSI_OPCODE_RCC:
2312      FETCH(&r[0], 0, CHAN_X);
2313      micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
2314      micro_float_clamp(&r[0], &r[0]);
2315      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2316         STORE(&r[0], 0, chan_index);
2317      }
2318      break;
2319
2320   case TGSI_OPCODE_DPH:
2321      FETCH(&r[0], 0, CHAN_X);
2322      FETCH(&r[1], 1, CHAN_X);
2323
2324      micro_mul( &r[0], &r[0], &r[1] );
2325
2326      FETCH(&r[1], 0, CHAN_Y);
2327      FETCH(&r[2], 1, CHAN_Y);
2328
2329      micro_mul( &r[1], &r[1], &r[2] );
2330      micro_add( &r[0], &r[0], &r[1] );
2331
2332      FETCH(&r[1], 0, CHAN_Z);
2333      FETCH(&r[2], 1, CHAN_Z);
2334
2335      micro_mul( &r[1], &r[1], &r[2] );
2336      micro_add( &r[0], &r[0], &r[1] );
2337
2338      FETCH(&r[1], 1, CHAN_W);
2339
2340      micro_add( &r[0], &r[0], &r[1] );
2341
2342      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2343         STORE( &r[0], 0, chan_index );
2344      }
2345      break;
2346
2347   case TGSI_OPCODE_COS:
2348      FETCH(&r[0], 0, CHAN_X);
2349
2350      micro_cos( &r[0], &r[0] );
2351
2352      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2353         STORE( &r[0], 0, chan_index );
2354      }
2355      break;
2356
2357   case TGSI_OPCODE_DDX:
2358      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2359         FETCH( &r[0], 0, chan_index );
2360         micro_ddx( &r[0], &r[0] );
2361         STORE( &r[0], 0, chan_index );
2362      }
2363      break;
2364
2365   case TGSI_OPCODE_DDY:
2366      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2367         FETCH( &r[0], 0, chan_index );
2368         micro_ddy( &r[0], &r[0] );
2369         STORE( &r[0], 0, chan_index );
2370      }
2371      break;
2372
2373   case TGSI_OPCODE_KILP:
2374      exec_kilp (mach, inst);
2375      break;
2376
2377   case TGSI_OPCODE_KIL:
2378      exec_kil (mach, inst);
2379      break;
2380
2381   case TGSI_OPCODE_PK2H:
2382      assert (0);
2383      break;
2384
2385   case TGSI_OPCODE_PK2US:
2386      assert (0);
2387      break;
2388
2389   case TGSI_OPCODE_PK4B:
2390      assert (0);
2391      break;
2392
2393   case TGSI_OPCODE_PK4UB:
2394      assert (0);
2395      break;
2396
2397   case TGSI_OPCODE_RFL:
2398      if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2399          IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2400          IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2401         /* r0 = dp3(src0, src0) */
2402         FETCH(&r[2], 0, CHAN_X);
2403         micro_mul(&r[0], &r[2], &r[2]);
2404         FETCH(&r[4], 0, CHAN_Y);
2405         micro_mul(&r[8], &r[4], &r[4]);
2406         micro_add(&r[0], &r[0], &r[8]);
2407         FETCH(&r[6], 0, CHAN_Z);
2408         micro_mul(&r[8], &r[6], &r[6]);
2409         micro_add(&r[0], &r[0], &r[8]);
2410
2411         /* r1 = dp3(src0, src1) */
2412         FETCH(&r[3], 1, CHAN_X);
2413         micro_mul(&r[1], &r[2], &r[3]);
2414         FETCH(&r[5], 1, CHAN_Y);
2415         micro_mul(&r[8], &r[4], &r[5]);
2416         micro_add(&r[1], &r[1], &r[8]);
2417         FETCH(&r[7], 1, CHAN_Z);
2418         micro_mul(&r[8], &r[6], &r[7]);
2419         micro_add(&r[1], &r[1], &r[8]);
2420
2421         /* r1 = 2 * r1 / r0 */
2422         micro_add(&r[1], &r[1], &r[1]);
2423         micro_div(&r[1], &r[1], &r[0]);
2424
2425         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2426            micro_mul(&r[2], &r[2], &r[1]);
2427            micro_sub(&r[2], &r[2], &r[3]);
2428            STORE(&r[2], 0, CHAN_X);
2429         }
2430         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2431            micro_mul(&r[4], &r[4], &r[1]);
2432            micro_sub(&r[4], &r[4], &r[5]);
2433            STORE(&r[4], 0, CHAN_Y);
2434         }
2435         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2436            micro_mul(&r[6], &r[6], &r[1]);
2437            micro_sub(&r[6], &r[6], &r[7]);
2438            STORE(&r[6], 0, CHAN_Z);
2439         }
2440      }
2441      if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2442         STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2443      }
2444      break;
2445
2446   case TGSI_OPCODE_SEQ:
2447      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2448         FETCH( &r[0], 0, chan_index );
2449         FETCH( &r[1], 1, chan_index );
2450         micro_eq( &r[0], &r[0], &r[1],
2451                   &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
2452                   &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2453         STORE( &r[0], 0, chan_index );
2454      }
2455      break;
2456
2457   case TGSI_OPCODE_SFL:
2458      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2459         STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
2460      }
2461      break;
2462
2463   case TGSI_OPCODE_SGT:
2464      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2465         FETCH( &r[0], 0, chan_index );
2466         FETCH( &r[1], 1, chan_index );
2467         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2468         STORE( &r[0], 0, chan_index );
2469      }
2470      break;
2471
2472   case TGSI_OPCODE_SIN:
2473      FETCH( &r[0], 0, CHAN_X );
2474      micro_sin( &r[0], &r[0] );
2475      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2476         STORE( &r[0], 0, chan_index );
2477      }
2478      break;
2479
2480   case TGSI_OPCODE_SLE:
2481      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2482         FETCH( &r[0], 0, chan_index );
2483         FETCH( &r[1], 1, chan_index );
2484         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
2485         STORE( &r[0], 0, chan_index );
2486      }
2487      break;
2488
2489   case TGSI_OPCODE_SNE:
2490      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2491         FETCH( &r[0], 0, chan_index );
2492         FETCH( &r[1], 1, chan_index );
2493         micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
2494         STORE( &r[0], 0, chan_index );
2495      }
2496      break;
2497
2498   case TGSI_OPCODE_STR:
2499      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
2500         STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
2501      }
2502      break;
2503
2504   case TGSI_OPCODE_TEX:
2505      /* simple texture lookup */
2506      /* src[0] = texcoord */
2507      /* src[1] = sampler unit */
2508      exec_tex(mach, inst, FALSE, FALSE);
2509      break;
2510
2511   case TGSI_OPCODE_TXB:
2512      /* Texture lookup with lod bias */
2513      /* src[0] = texcoord (src[0].w = LOD bias) */
2514      /* src[1] = sampler unit */
2515      exec_tex(mach, inst, TRUE, FALSE);
2516      break;
2517
2518   case TGSI_OPCODE_TXD:
2519      /* Texture lookup with explict partial derivatives */
2520      /* src[0] = texcoord */
2521      /* src[1] = d[strq]/dx */
2522      /* src[2] = d[strq]/dy */
2523      /* src[3] = sampler unit */
2524      assert (0);
2525      break;
2526
2527   case TGSI_OPCODE_TXL:
2528      /* Texture lookup with explit LOD */
2529      /* src[0] = texcoord (src[0].w = LOD) */
2530      /* src[1] = sampler unit */
2531      exec_tex(mach, inst, TRUE, FALSE);
2532      break;
2533
2534   case TGSI_OPCODE_TXP:
2535      /* Texture lookup with projection */
2536      /* src[0] = texcoord (src[0].w = projection) */
2537      /* src[1] = sampler unit */
2538      exec_tex(mach, inst, FALSE, TRUE);
2539      break;
2540
2541   case TGSI_OPCODE_UP2H:
2542      assert (0);
2543      break;
2544
2545   case TGSI_OPCODE_UP2US:
2546      assert (0);
2547      break;
2548
2549   case TGSI_OPCODE_UP4B:
2550      assert (0);
2551      break;
2552
2553   case TGSI_OPCODE_UP4UB:
2554      assert (0);
2555      break;
2556
2557   case TGSI_OPCODE_X2D:
2558      FETCH(&r[0], 1, CHAN_X);
2559      FETCH(&r[1], 1, CHAN_Y);
2560      if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2561          IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2562         FETCH(&r[2], 2, CHAN_X);
2563         micro_mul(&r[2], &r[2], &r[0]);
2564         FETCH(&r[3], 2, CHAN_Y);
2565         micro_mul(&r[3], &r[3], &r[1]);
2566         micro_add(&r[2], &r[2], &r[3]);
2567         FETCH(&r[3], 0, CHAN_X);
2568         micro_add(&r[2], &r[2], &r[3]);
2569         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2570            STORE(&r[2], 0, CHAN_X);
2571         }
2572         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2573            STORE(&r[2], 0, CHAN_Z);
2574         }
2575      }
2576      if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2577          IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2578         FETCH(&r[2], 2, CHAN_Z);
2579         micro_mul(&r[2], &r[2], &r[0]);
2580         FETCH(&r[3], 2, CHAN_W);
2581         micro_mul(&r[3], &r[3], &r[1]);
2582         micro_add(&r[2], &r[2], &r[3]);
2583         FETCH(&r[3], 0, CHAN_Y);
2584         micro_add(&r[2], &r[2], &r[3]);
2585         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2586            STORE(&r[2], 0, CHAN_Y);
2587         }
2588         if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2589            STORE(&r[2], 0, CHAN_W);
2590         }
2591      }
2592      break;
2593
2594   case TGSI_OPCODE_ARA:
2595      assert (0);
2596      break;
2597
2598   case TGSI_OPCODE_BRA:
2599      assert (0);
2600      break;
2601
2602   case TGSI_OPCODE_CAL:
2603      /* skip the call if no execution channels are enabled */
2604      if (mach->ExecMask) {
2605         /* do the call */
2606
2607         /* push the Cond, Loop, Cont stacks */
2608         assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2609         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2610         assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2611         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2612         assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2613         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2614
2615         assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
2616         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
2617
2618         /* note that PC was already incremented above */
2619         mach->CallStack[mach->CallStackTop++] = *pc;
2620         *pc = inst->InstructionExtLabel.Label;
2621      }
2622      break;
2623
2624   case TGSI_OPCODE_RET:
2625      mach->FuncMask &= ~mach->ExecMask;
2626      UPDATE_EXEC_MASK(mach);
2627
2628      if (mach->FuncMask == 0x0) {
2629         /* really return now (otherwise, keep executing */
2630
2631         if (mach->CallStackTop == 0) {
2632            /* returning from main() */
2633            *pc = -1;
2634            return;
2635         }
2636         *pc = mach->CallStack[--mach->CallStackTop];
2637
2638         /* pop the Cond, Loop, Cont stacks */
2639         assert(mach->CondStackTop > 0);
2640         mach->CondMask = mach->CondStack[--mach->CondStackTop];
2641         assert(mach->LoopStackTop > 0);
2642         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2643         assert(mach->ContStackTop > 0);
2644         mach->ContMask = mach->ContStack[--mach->ContStackTop];
2645         assert(mach->FuncStackTop > 0);
2646         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
2647
2648         UPDATE_EXEC_MASK(mach);
2649      }
2650      break;
2651
2652   case TGSI_OPCODE_SSG:
2653   /* TGSI_OPCODE_SGN */
2654      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2655         FETCH( &r[0], 0, chan_index );
2656         micro_sgn( &r[0], &r[0] );
2657         STORE( &r[0], 0, chan_index );
2658      }
2659      break;
2660
2661   case TGSI_OPCODE_CMP:
2662      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2663         FETCH(&r[0], 0, chan_index);
2664         FETCH(&r[1], 1, chan_index);
2665         FETCH(&r[2], 2, chan_index);
2666
2667         micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
2668
2669         STORE(&r[0], 0, chan_index);
2670      }
2671      break;
2672
2673   case TGSI_OPCODE_SCS:
2674      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
2675         FETCH( &r[0], 0, CHAN_X );
2676         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2677            micro_cos(&r[1], &r[0]);
2678            STORE(&r[1], 0, CHAN_X);
2679         }
2680         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2681            micro_sin(&r[1], &r[0]);
2682            STORE(&r[1], 0, CHAN_Y);
2683         }
2684      }
2685      if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
2686         STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
2687      }
2688      if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
2689         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
2690      }
2691      break;
2692
2693   case TGSI_OPCODE_NRM:
2694      /* 3-component vector normalize */
2695      if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
2696         IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
2697         IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2698         /* r3 = sqrt(dp3(src0, src0)) */
2699         FETCH(&r[0], 0, CHAN_X);
2700         micro_mul(&r[3], &r[0], &r[0]);
2701         FETCH(&r[1], 0, CHAN_Y);
2702         micro_mul(&r[4], &r[1], &r[1]);
2703         micro_add(&r[3], &r[3], &r[4]);
2704         FETCH(&r[2], 0, CHAN_Z);
2705         micro_mul(&r[4], &r[2], &r[2]);
2706         micro_add(&r[3], &r[3], &r[4]);
2707         micro_sqrt(&r[3], &r[3]);
2708
2709         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
2710            micro_div(&r[0], &r[0], &r[3]);
2711            STORE(&r[0], 0, CHAN_X);
2712         }
2713         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
2714            micro_div(&r[1], &r[1], &r[3]);
2715            STORE(&r[1], 0, CHAN_Y);
2716         }
2717         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
2718            micro_div(&r[2], &r[2], &r[3]);
2719            STORE(&r[2], 0, CHAN_Z);
2720         }
2721      }
2722      if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
2723         STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
2724      }
2725      break;
2726
2727   case TGSI_OPCODE_NRM4:
2728      /* 4-component vector normalize */
2729      {
2730         union tgsi_exec_channel tmp, dot;
2731
2732         /* tmp = dp4(src0, src0): */
2733         FETCH( &r[0], 0, CHAN_X );
2734         micro_mul( &tmp, &r[0], &r[0] );
2735
2736         FETCH( &r[1], 0, CHAN_Y );
2737         micro_mul( &dot, &r[1], &r[1] );
2738         micro_add( &tmp, &tmp, &dot );
2739
2740         FETCH( &r[2], 0, CHAN_Z );
2741         micro_mul( &dot, &r[2], &r[2] );
2742         micro_add( &tmp, &tmp, &dot );
2743
2744         FETCH( &r[3], 0, CHAN_W );
2745         micro_mul( &dot, &r[3], &r[3] );
2746         micro_add( &tmp, &tmp, &dot );
2747
2748         /* tmp = 1 / sqrt(tmp) */
2749         micro_sqrt( &tmp, &tmp );
2750         micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
2751
2752         FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2753            /* chan = chan * tmp */
2754            micro_mul( &r[chan_index], &tmp, &r[chan_index] );
2755            STORE( &r[chan_index], 0, chan_index );
2756         }
2757      }
2758      break;
2759
2760   case TGSI_OPCODE_DIV:
2761      assert( 0 );
2762      break;
2763
2764   case TGSI_OPCODE_DP2:
2765      FETCH( &r[0], 0, CHAN_X );
2766      FETCH( &r[1], 1, CHAN_X );
2767      micro_mul( &r[0], &r[0], &r[1] );
2768
2769      FETCH( &r[1], 0, CHAN_Y );
2770      FETCH( &r[2], 1, CHAN_Y );
2771      micro_mul( &r[1], &r[1], &r[2] );
2772      micro_add( &r[0], &r[0], &r[1] );
2773
2774      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2775         STORE( &r[0], 0, chan_index );
2776      }
2777      break;
2778
2779   case TGSI_OPCODE_IF:
2780      /* push CondMask */
2781      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
2782      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
2783      FETCH( &r[0], 0, CHAN_X );
2784      /* update CondMask */
2785      if( ! r[0].u[0] ) {
2786         mach->CondMask &= ~0x1;
2787      }
2788      if( ! r[0].u[1] ) {
2789         mach->CondMask &= ~0x2;
2790      }
2791      if( ! r[0].u[2] ) {
2792         mach->CondMask &= ~0x4;
2793      }
2794      if( ! r[0].u[3] ) {
2795         mach->CondMask &= ~0x8;
2796      }
2797      UPDATE_EXEC_MASK(mach);
2798      /* Todo: If CondMask==0, jump to ELSE */
2799      break;
2800
2801   case TGSI_OPCODE_ELSE:
2802      /* invert CondMask wrt previous mask */
2803      {
2804         uint prevMask;
2805         assert(mach->CondStackTop > 0);
2806         prevMask = mach->CondStack[mach->CondStackTop - 1];
2807         mach->CondMask = ~mach->CondMask & prevMask;
2808         UPDATE_EXEC_MASK(mach);
2809         /* Todo: If CondMask==0, jump to ENDIF */
2810      }
2811      break;
2812
2813   case TGSI_OPCODE_ENDIF:
2814      /* pop CondMask */
2815      assert(mach->CondStackTop > 0);
2816      mach->CondMask = mach->CondStack[--mach->CondStackTop];
2817      UPDATE_EXEC_MASK(mach);
2818      break;
2819
2820   case TGSI_OPCODE_END:
2821      /* halt execution */
2822      *pc = -1;
2823      break;
2824
2825   case TGSI_OPCODE_REP:
2826      assert (0);
2827      break;
2828
2829   case TGSI_OPCODE_ENDREP:
2830       assert (0);
2831       break;
2832
2833   case TGSI_OPCODE_PUSHA:
2834      assert (0);
2835      break;
2836
2837   case TGSI_OPCODE_POPA:
2838      assert (0);
2839      break;
2840
2841   case TGSI_OPCODE_CEIL:
2842      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2843         FETCH( &r[0], 0, chan_index );
2844         micro_ceil( &r[0], &r[0] );
2845         STORE( &r[0], 0, chan_index );
2846      }
2847      break;
2848
2849   case TGSI_OPCODE_I2F:
2850      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2851         FETCH( &r[0], 0, chan_index );
2852         micro_i2f( &r[0], &r[0] );
2853         STORE( &r[0], 0, chan_index );
2854      }
2855      break;
2856
2857   case TGSI_OPCODE_NOT:
2858      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2859         FETCH( &r[0], 0, chan_index );
2860         micro_not( &r[0], &r[0] );
2861         STORE( &r[0], 0, chan_index );
2862      }
2863      break;
2864
2865   case TGSI_OPCODE_TRUNC:
2866      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2867         FETCH( &r[0], 0, chan_index );
2868         micro_trunc( &r[0], &r[0] );
2869         STORE( &r[0], 0, chan_index );
2870      }
2871      break;
2872
2873   case TGSI_OPCODE_SHL:
2874      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2875         FETCH( &r[0], 0, chan_index );
2876         FETCH( &r[1], 1, chan_index );
2877         micro_shl( &r[0], &r[0], &r[1] );
2878         STORE( &r[0], 0, chan_index );
2879      }
2880      break;
2881
2882   case TGSI_OPCODE_SHR:
2883      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2884         FETCH( &r[0], 0, chan_index );
2885         FETCH( &r[1], 1, chan_index );
2886         micro_ishr( &r[0], &r[0], &r[1] );
2887         STORE( &r[0], 0, chan_index );
2888      }
2889      break;
2890
2891   case TGSI_OPCODE_AND:
2892      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2893         FETCH( &r[0], 0, chan_index );
2894         FETCH( &r[1], 1, chan_index );
2895         micro_and( &r[0], &r[0], &r[1] );
2896         STORE( &r[0], 0, chan_index );
2897      }
2898      break;
2899
2900   case TGSI_OPCODE_OR:
2901      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2902         FETCH( &r[0], 0, chan_index );
2903         FETCH( &r[1], 1, chan_index );
2904         micro_or( &r[0], &r[0], &r[1] );
2905         STORE( &r[0], 0, chan_index );
2906      }
2907      break;
2908
2909   case TGSI_OPCODE_MOD:
2910      assert (0);
2911      break;
2912
2913   case TGSI_OPCODE_XOR:
2914      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
2915         FETCH( &r[0], 0, chan_index );
2916         FETCH( &r[1], 1, chan_index );
2917         micro_xor( &r[0], &r[0], &r[1] );
2918         STORE( &r[0], 0, chan_index );
2919      }
2920      break;
2921
2922   case TGSI_OPCODE_SAD:
2923      assert (0);
2924      break;
2925
2926   case TGSI_OPCODE_TXF:
2927      assert (0);
2928      break;
2929
2930   case TGSI_OPCODE_TXQ:
2931      assert (0);
2932      break;
2933
2934   case TGSI_OPCODE_EMIT:
2935      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
2936      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2937      break;
2938
2939   case TGSI_OPCODE_ENDPRIM:
2940      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
2941      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
2942      break;
2943
2944   case TGSI_OPCODE_LOOP:
2945      /* fall-through (for now) */
2946   case TGSI_OPCODE_BGNLOOP2:
2947      /* push LoopMask and ContMasks */
2948      assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2949      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
2950      assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
2951      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
2952      break;
2953
2954   case TGSI_OPCODE_ENDLOOP:
2955      /* fall-through (for now at least) */
2956   case TGSI_OPCODE_ENDLOOP2:
2957      /* Restore ContMask, but don't pop */
2958      assert(mach->ContStackTop > 0);
2959      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
2960      UPDATE_EXEC_MASK(mach);
2961      if (mach->ExecMask) {
2962         /* repeat loop: jump to instruction just past BGNLOOP */
2963         *pc = inst->InstructionExtLabel.Label + 1;
2964      }
2965      else {
2966         /* exit loop: pop LoopMask */
2967         assert(mach->LoopStackTop > 0);
2968         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
2969         /* pop ContMask */
2970         assert(mach->ContStackTop > 0);
2971         mach->ContMask = mach->ContStack[--mach->ContStackTop];
2972      }
2973      UPDATE_EXEC_MASK(mach);
2974      break;
2975
2976   case TGSI_OPCODE_BRK:
2977      /* turn off loop channels for each enabled exec channel */
2978      mach->LoopMask &= ~mach->ExecMask;
2979      /* Todo: if mach->LoopMask == 0, jump to end of loop */
2980      UPDATE_EXEC_MASK(mach);
2981      break;
2982
2983   case TGSI_OPCODE_CONT:
2984      /* turn off cont channels for each enabled exec channel */
2985      mach->ContMask &= ~mach->ExecMask;
2986      /* Todo: if mach->LoopMask == 0, jump to end of loop */
2987      UPDATE_EXEC_MASK(mach);
2988      break;
2989
2990   case TGSI_OPCODE_BGNSUB:
2991      /* no-op */
2992      break;
2993
2994   case TGSI_OPCODE_ENDSUB:
2995      /* no-op */
2996      break;
2997
2998   case TGSI_OPCODE_NOISE1:
2999      assert( 0 );
3000      break;
3001
3002   case TGSI_OPCODE_NOISE2:
3003      assert( 0 );
3004      break;
3005
3006   case TGSI_OPCODE_NOISE3:
3007      assert( 0 );
3008      break;
3009
3010   case TGSI_OPCODE_NOISE4:
3011      assert( 0 );
3012      break;
3013
3014   case TGSI_OPCODE_NOP:
3015      break;
3016
3017   default:
3018      assert( 0 );
3019   }
3020}
3021
3022
3023/**
3024 * Run TGSI interpreter.
3025 * \return bitmask of "alive" quad components
3026 */
3027uint
3028tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
3029{
3030   uint i;
3031   int pc = 0;
3032
3033   mach->CondMask = 0xf;
3034   mach->LoopMask = 0xf;
3035   mach->ContMask = 0xf;
3036   mach->FuncMask = 0xf;
3037   mach->ExecMask = 0xf;
3038
3039   mach->CondStackTop = 0; /* temporarily subvert this assertion */
3040   assert(mach->CondStackTop == 0);
3041   assert(mach->LoopStackTop == 0);
3042   assert(mach->ContStackTop == 0);
3043   assert(mach->CallStackTop == 0);
3044
3045   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
3046   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
3047
3048   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
3049      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
3050      mach->Primitives[0] = 0;
3051   }
3052
3053   for (i = 0; i < QUAD_SIZE; i++) {
3054      mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
3055         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
3056         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
3057         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
3058         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
3059   }
3060
3061   /* execute declarations (interpolants) */
3062   for (i = 0; i < mach->NumDeclarations; i++) {
3063      exec_declaration( mach, mach->Declarations+i );
3064   }
3065
3066   /* execute instructions, until pc is set to -1 */
3067   while (pc != -1) {
3068      assert(pc < (int) mach->NumInstructions);
3069      exec_instruction( mach, mach->Instructions + pc, &pc );
3070   }
3071
3072#if 0
3073   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
3074   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
3075      /*
3076       * Scale back depth component.
3077       */
3078      for (i = 0; i < 4; i++)
3079         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
3080   }
3081#endif
3082
3083   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3084}
3085