tgsi_exec.c revision 3673189326e348eb91e354017703fdfd9d6d8184
1600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang/************************************************************************** 2600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * 3600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * All Rights Reserved. 5600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * 6600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Permission is hereby granted, free of charge, to any person obtaining a 7600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * copy of this software and associated documentation files (the 8600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * "Software"), to deal in the Software without restriction, including 9600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * without limitation the rights to use, copy, modify, merge, publish, 10600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * distribute, sub license, and/or sell copies of the Software, and to 11600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * permit persons to whom the Software is furnished to do so, subject to 12600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * the following conditions: 13600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * 14600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * The above copyright notice and this permission notice (including the 15600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * next paragraph) shall be included in all copies or substantial portions 16600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * of the Software. 17600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * 18600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * 26600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang **************************************************************************/ 27600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 28600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang/** 29600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * TGSI interpreter/executor. 30600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * 31600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Flow control information: 32600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * 33600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * care since a condition may be true for some quad components but false 36600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * for other components. 37600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * 38600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * We basically execute all statements (even if they're in the part of 39600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * an IF/ELSE clause that's "not taken") and use a special mask to 40600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * control writing to destination registers. This is the ExecMask. 41600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * See store_dest(). 42600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * 43600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * The ExecMask is computed from three other masks (CondMask, LoopMask and 44600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * ContMask) which are controlled by the flow control instructions (namely: 45600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * 47600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * 48600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Authors: 49600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Michal Krol 50600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Brian Paul 51600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang */ 52600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 53600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "pipe/p_compiler.h" 54600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "pipe/p_state.h" 55600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "pipe/p_shader_tokens.h" 56600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "tgsi/tgsi_parse.h" 57600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "tgsi/tgsi_util.h" 58600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "tgsi_exec.h" 59600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "util/u_memory.h" 60600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#include "util/u_math.h" 61600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 62600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define FAST_MATH 1 63600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 64600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TILE_TOP_LEFT 0 65600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TILE_TOP_RIGHT 1 66600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TILE_BOTTOM_LEFT 2 67600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TILE_BOTTOM_RIGHT 3 68600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 69600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define CHAN_X 0 70600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define CHAN_Y 1 71600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define CHAN_Z 2 72600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define CHAN_W 3 73600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 74600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang/* 75600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Shorthand locations of various utility registers (_I = Index, _C = Channel) 76600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang */ 77600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 78600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 79600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 80600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 81600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 82600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 83600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 84600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 85600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 86600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 87600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 88600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 89600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_128_I TGSI_EXEC_TEMP_128_I 90600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_128_C TGSI_EXEC_TEMP_128_C 91600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 92600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 93600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 94600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 95600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 96600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 97600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 98600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 99600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 100600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 101600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 102600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 103600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 104600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 105600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define TEMP_R0 TGSI_EXEC_TEMP_R0 106600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 107600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define IS_CHANNEL_ENABLED(INST, CHAN)\ 108600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 109600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 110600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 111600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 112600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 113600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 114600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 115600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (IS_CHANNEL_ENABLED( INST, CHAN )) 116600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 117600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 118600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 119600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (IS_CHANNEL_ENABLED2( INST, CHAN )) 120600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 121600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 122600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang/** The execution mask depends on the conditional mask and the loop mask */ 123600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#define UPDATE_EXEC_MASK(MACH) \ 124600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 125600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 126600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 127600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic const union tgsi_exec_channel ZeroVec = 128600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang { { 0.0, 0.0, 0.0, 0.0 } }; 129600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 130600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang/** 131600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * Initialize machine state by expanding tokens to full instructions, 132600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * allocating temporary storage, setting up constants, etc. 133600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * After this, we can call tgsi_exec_machine_run() many times. 134600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang */ 135600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangvoid 136600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangtgsi_exec_machine_bind_shader( 137600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang struct tgsi_exec_machine *mach, 138600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const struct tgsi_token *tokens, 139600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang uint numSamplers, 140600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang struct tgsi_sampler **samplers) 141600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 142600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang uint k; 143600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang struct tgsi_parse_context parse; 144600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang struct tgsi_exec_labels *labels = &mach->Labels; 145600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang struct tgsi_full_instruction *instructions; 146600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang struct tgsi_full_declaration *declarations; 147600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang uint maxInstructions = 10, numInstructions = 0; 148600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang uint maxDeclarations = 10, numDeclarations = 0; 149600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang uint instno = 0; 150600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 151600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0 152600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang tgsi_dump(tokens, 0); 153600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 154600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 155600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang util_init_math(); 156600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 157600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Tokens = tokens; 158600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Samplers = samplers; 159600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 160600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang k = tgsi_parse_init (&parse, mach->Tokens); 161600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (k != TGSI_PARSE_OK) { 162600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang debug_printf( "Problem parsing!\n" ); 163600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang return; 164600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 165600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 166600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Processor = parse.FullHeader.Processor.Processor; 167600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->ImmLimit = 0; 168600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang labels->count = 0; 169600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 170600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang declarations = (struct tgsi_full_declaration *) 171600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 172600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 173600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (!declarations) { 174600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang return; 175600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 176600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 177600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang instructions = (struct tgsi_full_instruction *) 178600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 179600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 180600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (!instructions) { 181600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang FREE( declarations ); 182600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang return; 183600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 184600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 185600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang while( !tgsi_parse_end_of_tokens( &parse ) ) { 186600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang uint pointer = parse.Position; 187600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang uint i; 188600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 189600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang tgsi_parse_token( &parse ); 190600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang switch( parse.FullToken.Token.Type ) { 191600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang case TGSI_TOKEN_TYPE_DECLARATION: 192600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang /* save expanded declaration */ 193600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (numDeclarations == maxDeclarations) { 194600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang declarations = REALLOC(declarations, 195600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang maxDeclarations 196600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * sizeof(struct tgsi_full_declaration), 197600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang (maxDeclarations + 10) 198600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * sizeof(struct tgsi_full_declaration)); 199600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang maxDeclarations += 10; 200600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 201600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang memcpy(declarations + numDeclarations, 202600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang &parse.FullToken.FullDeclaration, 203600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang sizeof(declarations[0])); 204600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang numDeclarations++; 205600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang break; 206600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 207600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang case TGSI_TOKEN_TYPE_IMMEDIATE: 208600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang { 209600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 210600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang assert( size % 4 == 0 ); 211600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); 212600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 213600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang for( i = 0; i < size; i++ ) { 214600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Imms[mach->ImmLimit + i / 4][i % 4] = 215600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; 216600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 217600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->ImmLimit += size / 4; 218600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 219600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang break; 220600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 221600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang case TGSI_TOKEN_TYPE_INSTRUCTION: 222600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang assert( labels->count < MAX_LABELS ); 223600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 224600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang labels->labels[labels->count][0] = instno; 225600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang labels->labels[labels->count][1] = pointer; 226600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang labels->count++; 227600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 228600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang /* save expanded instruction */ 229600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (numInstructions == maxInstructions) { 230600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang instructions = REALLOC(instructions, 231600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang maxInstructions 232600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * sizeof(struct tgsi_full_instruction), 233600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang (maxInstructions + 10) 234600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang * sizeof(struct tgsi_full_instruction)); 235600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang maxInstructions += 10; 236600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 237600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang memcpy(instructions + numInstructions, 238600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang &parse.FullToken.FullInstruction, 239600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang sizeof(instructions[0])); 240600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang numInstructions++; 241600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang break; 242600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 243600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang default: 244600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang assert( 0 ); 245600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 246600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 247600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang tgsi_parse_free (&parse); 248600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 249600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (mach->Declarations) { 250600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang FREE( mach->Declarations ); 251600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 252600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Declarations = declarations; 253600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->NumDeclarations = numDeclarations; 254600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 255600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (mach->Instructions) { 256600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang FREE( mach->Instructions ); 257600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 258600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Instructions = instructions; 259600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->NumInstructions = numInstructions; 260600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 261600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 262600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 263600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangvoid 264600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangtgsi_exec_machine_init( 265600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang struct tgsi_exec_machine *mach ) 266600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 267600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang uint i; 268600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 269600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); 270600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 271600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 272600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang /* Setup constants. */ 273600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang for( i = 0; i < 4; i++ ) { 274600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 275600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 276600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 277600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 278600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 279600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 280600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 281600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 282600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 283600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 284600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 285600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 286600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 287600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 288600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangvoid 289600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangtgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) 290600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 291600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (mach->Instructions) { 292600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang FREE(mach->Instructions); 293600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Instructions = NULL; 294600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->NumInstructions = 0; 295600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 296600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (mach->Declarations) { 297600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang FREE(mach->Declarations); 298600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->Declarations = NULL; 299600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang mach->NumDeclarations = 0; 300600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 301600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 302600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 303600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 304600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 305600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_abs( 306600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 307600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src ) 308600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 309600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = fabsf( src->f[0] ); 310600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = fabsf( src->f[1] ); 311600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = fabsf( src->f[2] ); 312600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = fabsf( src->f[3] ); 313600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 314600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 315600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 316600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_add( 317600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 318600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 319600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1 ) 320600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 321600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = src0->f[0] + src1->f[0]; 322600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = src0->f[1] + src1->f[1]; 323600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = src0->f[2] + src1->f[2]; 324600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = src0->f[3] + src1->f[3]; 325600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 326600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 327600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0 328600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 329600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_iadd( 330600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 331600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 332600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1 ) 333600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 334600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[0] = src0->i[0] + src1->i[0]; 335600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[1] = src0->i[1] + src1->i[1]; 336600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[2] = src0->i[2] + src1->i[2]; 337600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[3] = src0->i[3] + src1->i[3]; 338600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 339600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 340600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 341600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 342600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_and( 343600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 344600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 345600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1 ) 346600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 347600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[0] = src0->u[0] & src1->u[0]; 348600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[1] = src0->u[1] & src1->u[1]; 349600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[2] = src0->u[2] & src1->u[2]; 350600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[3] = src0->u[3] & src1->u[3]; 351600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 352600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 353600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 354600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_ceil( 355600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 356600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src ) 357600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 358600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = ceilf( src->f[0] ); 359600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = ceilf( src->f[1] ); 360600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = ceilf( src->f[2] ); 361600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = ceilf( src->f[3] ); 362600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 363600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 364600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 365600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_cos( 366600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 367600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src ) 368600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 369600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = cosf( src->f[0] ); 370600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = cosf( src->f[1] ); 371600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = cosf( src->f[2] ); 372600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = cosf( src->f[3] ); 373600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 374600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 375600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 376600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_ddx( 377600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 378600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src ) 379600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 380600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = 381600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = 382600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = 383600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 384600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 385600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 386600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 387600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_ddy( 388600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 389600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src ) 390600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 391600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = 392600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = 393600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = 394600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 395600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 396600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 397600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 398600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_div( 399600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 400600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 401600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1 ) 402600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 403600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (src1->f[0] != 0) { 404600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = src0->f[0] / src1->f[0]; 405600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 406600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (src1->f[1] != 0) { 407600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = src0->f[1] / src1->f[1]; 408600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 409600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (src1->f[2] != 0) { 410600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = src0->f[2] / src1->f[2]; 411600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 412600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (src1->f[3] != 0) { 413600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = src0->f[3] / src1->f[3]; 414600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 415600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 416600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 417600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0 418600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 419600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_udiv( 420600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 421600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 422600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1 ) 423600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 424600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[0] = src0->u[0] / src1->u[0]; 425600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[1] = src0->u[1] / src1->u[1]; 426600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[2] = src0->u[2] / src1->u[2]; 427600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[3] = src0->u[3] / src1->u[3]; 428600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 429600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 430600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 431600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 432600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_eq( 433600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 434600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 435600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1, 436600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src2, 437600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src3 ) 438600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 439600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 440600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 441600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 442600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 443600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 444600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 445600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0 446600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 447600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_ieq( 448600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 449600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 450600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1, 451600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src2, 452600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src3 ) 453600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 454600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 455600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 456600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 457600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 458600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 459600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 460600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 461600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 462600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_exp2( 463600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 464600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src) 465600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 466600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if FAST_MATH 467600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = util_fast_exp2( src->f[0] ); 468600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = util_fast_exp2( src->f[1] ); 469600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = util_fast_exp2( src->f[2] ); 470600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = util_fast_exp2( src->f[3] ); 471600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#else 472600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = powf( 2.0f, src->f[0] ); 473600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = powf( 2.0f, src->f[1] ); 474600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = powf( 2.0f, src->f[2] ); 475600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = powf( 2.0f, src->f[3] ); 476600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 477600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 478600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 479600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0 480600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 481600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_f2ut( 482600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 483600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src ) 484600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 485600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[0] = (uint) src->f[0]; 486600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[1] = (uint) src->f[1]; 487600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[2] = (uint) src->f[2]; 488600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[3] = (uint) src->f[3]; 489600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 490600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 491600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 492600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 493600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_float_clamp(union tgsi_exec_channel *dst, 494600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src) 495600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 496600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang uint i; 497600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 498600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang for (i = 0; i < 4; i++) { 499600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (src->f[i] > 0.0f) { 500600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (src->f[i] > 1.884467e+019f) 501600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[i] = 1.884467e+019f; 502600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang else if (src->f[i] < 5.42101e-020f) 503600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[i] = 5.42101e-020f; 504600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang else 505600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[i] = src->f[i]; 506600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 507600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang else { 508600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang if (src->f[i] < -1.884467e+019f) 509600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[i] = -1.884467e+019f; 510600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang else if (src->f[i] > -5.42101e-020f) 511600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[i] = -5.42101e-020f; 512600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang else 513600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[i] = src->f[i]; 514600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 515600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang } 516600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 517600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 518600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 519600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_flr( 520600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 521600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src ) 522600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 523600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = floorf( src->f[0] ); 524600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = floorf( src->f[1] ); 525600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = floorf( src->f[2] ); 526600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = floorf( src->f[3] ); 527600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 528600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 529600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 530600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_frc( 531600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 532600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src ) 533600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 534600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = src->f[0] - floorf( src->f[0] ); 535600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = src->f[1] - floorf( src->f[1] ); 536600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = src->f[2] - floorf( src->f[2] ); 537600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = src->f[3] - floorf( src->f[3] ); 538600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 539600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 540600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 541600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_i2f( 542600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 543600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src ) 544600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 545600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = (float) src->i[0]; 546600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = (float) src->i[1]; 547600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = (float) src->i[2]; 548600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = (float) src->i[3]; 549600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 550600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 551600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 552600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_lg2( 553600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 554600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src ) 555600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 556600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if FAST_MATH 557600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = util_fast_log2( src->f[0] ); 558600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = util_fast_log2( src->f[1] ); 559600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = util_fast_log2( src->f[2] ); 560600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = util_fast_log2( src->f[3] ); 561600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#else 562600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = logf( src->f[0] ) * 1.442695f; 563600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = logf( src->f[1] ) * 1.442695f; 564600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = logf( src->f[2] ) * 1.442695f; 565600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = logf( src->f[3] ) * 1.442695f; 566600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 567600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 568600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 569600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 570600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_le( 571600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 572600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 573600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1, 574600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src2, 575600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src3 ) 576600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 577600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 578600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 579600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 580600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 581600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 582600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 583600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 584600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_lt( 585600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 586600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 587600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1, 588600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src2, 589600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src3 ) 590600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 591600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 592600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 593600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 594600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 595600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 596600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 597600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0 598600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 599600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_ilt( 600600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 601600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 602600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1, 603600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src2, 604600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src3 ) 605600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 606600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 607600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 608600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 609600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 610600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 611600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 612600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 613600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0 614600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 615600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_ult( 616600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 617600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 618600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1, 619600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src2, 620600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src3 ) 621600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 622600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 623600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 624600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 625600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 626600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 627600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 628600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 629600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 630600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_max( 631600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 632600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 633600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1 ) 634600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 635600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 636600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 637600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 638600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 639600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 640600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 641600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0 642600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 643600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_imax( 644600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 645600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 646600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1 ) 647600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 648600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 649600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 650600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 651600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 652600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 653600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 654600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 655600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0 656600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 657600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_umax( 658600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 659600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 660600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1 ) 661600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 662600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 663600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 664600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 665600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 666600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 667600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 668600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 669600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 670600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_min( 671600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 672600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 673600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1 ) 674600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 675600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 676600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 677600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 678600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 679600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 680600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 681600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0 682600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 683600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_imin( 684600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 685600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 686600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1 ) 687600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 688600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 689600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 690600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 691600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 692600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 693600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 694600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 695600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0 696600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 697600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_umin( 698600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 699600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 700600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1 ) 701600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 702600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 703600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 704600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 705600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 706600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 707600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 708600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 709600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0 710600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 711600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_umod( 712600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 713600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 714600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1 ) 715600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 716600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[0] = src0->u[0] % src1->u[0]; 717600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[1] = src0->u[1] % src1->u[1]; 718600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[2] = src0->u[2] % src1->u[2]; 719600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->u[3] = src0->u[3] % src1->u[3]; 720600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 721600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#endif 722600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 723600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangstatic void 724600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wangmicro_mul( 725600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang union tgsi_exec_channel *dst, 726600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src0, 727600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang const union tgsi_exec_channel *src1 ) 728600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang{ 729600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[0] = src0->f[0] * src1->f[0]; 730600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[1] = src0->f[1] * src1->f[1]; 731600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[2] = src0->f[2] * src1->f[2]; 732600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang dst->f[3] = src0->f[3] * src1->f[3]; 733600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang} 734600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang 735600c7a4bbc7348167293eac928192e695b4ad5baChung-yih Wang#if 0 736static void 737micro_imul( 738 union tgsi_exec_channel *dst, 739 const union tgsi_exec_channel *src0, 740 const union tgsi_exec_channel *src1 ) 741{ 742 dst->i[0] = src0->i[0] * src1->i[0]; 743 dst->i[1] = src0->i[1] * src1->i[1]; 744 dst->i[2] = src0->i[2] * src1->i[2]; 745 dst->i[3] = src0->i[3] * src1->i[3]; 746} 747#endif 748 749#if 0 750static void 751micro_imul64( 752 union tgsi_exec_channel *dst0, 753 union tgsi_exec_channel *dst1, 754 const union tgsi_exec_channel *src0, 755 const union tgsi_exec_channel *src1 ) 756{ 757 dst1->i[0] = src0->i[0] * src1->i[0]; 758 dst1->i[1] = src0->i[1] * src1->i[1]; 759 dst1->i[2] = src0->i[2] * src1->i[2]; 760 dst1->i[3] = src0->i[3] * src1->i[3]; 761 dst0->i[0] = 0; 762 dst0->i[1] = 0; 763 dst0->i[2] = 0; 764 dst0->i[3] = 0; 765} 766#endif 767 768#if 0 769static void 770micro_umul64( 771 union tgsi_exec_channel *dst0, 772 union tgsi_exec_channel *dst1, 773 const union tgsi_exec_channel *src0, 774 const union tgsi_exec_channel *src1 ) 775{ 776 dst1->u[0] = src0->u[0] * src1->u[0]; 777 dst1->u[1] = src0->u[1] * src1->u[1]; 778 dst1->u[2] = src0->u[2] * src1->u[2]; 779 dst1->u[3] = src0->u[3] * src1->u[3]; 780 dst0->u[0] = 0; 781 dst0->u[1] = 0; 782 dst0->u[2] = 0; 783 dst0->u[3] = 0; 784} 785#endif 786 787 788#if 0 789static void 790micro_movc( 791 union tgsi_exec_channel *dst, 792 const union tgsi_exec_channel *src0, 793 const union tgsi_exec_channel *src1, 794 const union tgsi_exec_channel *src2 ) 795{ 796 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 797 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 798 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 799 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 800} 801#endif 802 803static void 804micro_neg( 805 union tgsi_exec_channel *dst, 806 const union tgsi_exec_channel *src ) 807{ 808 dst->f[0] = -src->f[0]; 809 dst->f[1] = -src->f[1]; 810 dst->f[2] = -src->f[2]; 811 dst->f[3] = -src->f[3]; 812} 813 814#if 0 815static void 816micro_ineg( 817 union tgsi_exec_channel *dst, 818 const union tgsi_exec_channel *src ) 819{ 820 dst->i[0] = -src->i[0]; 821 dst->i[1] = -src->i[1]; 822 dst->i[2] = -src->i[2]; 823 dst->i[3] = -src->i[3]; 824} 825#endif 826 827static void 828micro_not( 829 union tgsi_exec_channel *dst, 830 const union tgsi_exec_channel *src ) 831{ 832 dst->u[0] = ~src->u[0]; 833 dst->u[1] = ~src->u[1]; 834 dst->u[2] = ~src->u[2]; 835 dst->u[3] = ~src->u[3]; 836} 837 838static void 839micro_or( 840 union tgsi_exec_channel *dst, 841 const union tgsi_exec_channel *src0, 842 const union tgsi_exec_channel *src1 ) 843{ 844 dst->u[0] = src0->u[0] | src1->u[0]; 845 dst->u[1] = src0->u[1] | src1->u[1]; 846 dst->u[2] = src0->u[2] | src1->u[2]; 847 dst->u[3] = src0->u[3] | src1->u[3]; 848} 849 850static void 851micro_pow( 852 union tgsi_exec_channel *dst, 853 const union tgsi_exec_channel *src0, 854 const union tgsi_exec_channel *src1 ) 855{ 856#if FAST_MATH 857 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 858 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 859 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 860 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 861#else 862 dst->f[0] = powf( src0->f[0], src1->f[0] ); 863 dst->f[1] = powf( src0->f[1], src1->f[1] ); 864 dst->f[2] = powf( src0->f[2], src1->f[2] ); 865 dst->f[3] = powf( src0->f[3], src1->f[3] ); 866#endif 867} 868 869static void 870micro_rnd( 871 union tgsi_exec_channel *dst, 872 const union tgsi_exec_channel *src ) 873{ 874 dst->f[0] = floorf( src->f[0] + 0.5f ); 875 dst->f[1] = floorf( src->f[1] + 0.5f ); 876 dst->f[2] = floorf( src->f[2] + 0.5f ); 877 dst->f[3] = floorf( src->f[3] + 0.5f ); 878} 879 880static void 881micro_sgn( 882 union tgsi_exec_channel *dst, 883 const union tgsi_exec_channel *src ) 884{ 885 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 886 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 887 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 888 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 889} 890 891static void 892micro_shl( 893 union tgsi_exec_channel *dst, 894 const union tgsi_exec_channel *src0, 895 const union tgsi_exec_channel *src1 ) 896{ 897 dst->i[0] = src0->i[0] << src1->i[0]; 898 dst->i[1] = src0->i[1] << src1->i[1]; 899 dst->i[2] = src0->i[2] << src1->i[2]; 900 dst->i[3] = src0->i[3] << src1->i[3]; 901} 902 903static void 904micro_ishr( 905 union tgsi_exec_channel *dst, 906 const union tgsi_exec_channel *src0, 907 const union tgsi_exec_channel *src1 ) 908{ 909 dst->i[0] = src0->i[0] >> src1->i[0]; 910 dst->i[1] = src0->i[1] >> src1->i[1]; 911 dst->i[2] = src0->i[2] >> src1->i[2]; 912 dst->i[3] = src0->i[3] >> src1->i[3]; 913} 914 915static void 916micro_trunc( 917 union tgsi_exec_channel *dst, 918 const union tgsi_exec_channel *src0 ) 919{ 920 dst->f[0] = (float) (int) src0->f[0]; 921 dst->f[1] = (float) (int) src0->f[1]; 922 dst->f[2] = (float) (int) src0->f[2]; 923 dst->f[3] = (float) (int) src0->f[3]; 924} 925 926#if 0 927static void 928micro_ushr( 929 union tgsi_exec_channel *dst, 930 const union tgsi_exec_channel *src0, 931 const union tgsi_exec_channel *src1 ) 932{ 933 dst->u[0] = src0->u[0] >> src1->u[0]; 934 dst->u[1] = src0->u[1] >> src1->u[1]; 935 dst->u[2] = src0->u[2] >> src1->u[2]; 936 dst->u[3] = src0->u[3] >> src1->u[3]; 937} 938#endif 939 940static void 941micro_sin( 942 union tgsi_exec_channel *dst, 943 const union tgsi_exec_channel *src ) 944{ 945 dst->f[0] = sinf( src->f[0] ); 946 dst->f[1] = sinf( src->f[1] ); 947 dst->f[2] = sinf( src->f[2] ); 948 dst->f[3] = sinf( src->f[3] ); 949} 950 951static void 952micro_sqrt( union tgsi_exec_channel *dst, 953 const union tgsi_exec_channel *src ) 954{ 955 dst->f[0] = sqrtf( src->f[0] ); 956 dst->f[1] = sqrtf( src->f[1] ); 957 dst->f[2] = sqrtf( src->f[2] ); 958 dst->f[3] = sqrtf( src->f[3] ); 959} 960 961static void 962micro_sub( 963 union tgsi_exec_channel *dst, 964 const union tgsi_exec_channel *src0, 965 const union tgsi_exec_channel *src1 ) 966{ 967 dst->f[0] = src0->f[0] - src1->f[0]; 968 dst->f[1] = src0->f[1] - src1->f[1]; 969 dst->f[2] = src0->f[2] - src1->f[2]; 970 dst->f[3] = src0->f[3] - src1->f[3]; 971} 972 973#if 0 974static void 975micro_u2f( 976 union tgsi_exec_channel *dst, 977 const union tgsi_exec_channel *src ) 978{ 979 dst->f[0] = (float) src->u[0]; 980 dst->f[1] = (float) src->u[1]; 981 dst->f[2] = (float) src->u[2]; 982 dst->f[3] = (float) src->u[3]; 983} 984#endif 985 986static void 987micro_xor( 988 union tgsi_exec_channel *dst, 989 const union tgsi_exec_channel *src0, 990 const union tgsi_exec_channel *src1 ) 991{ 992 dst->u[0] = src0->u[0] ^ src1->u[0]; 993 dst->u[1] = src0->u[1] ^ src1->u[1]; 994 dst->u[2] = src0->u[2] ^ src1->u[2]; 995 dst->u[3] = src0->u[3] ^ src1->u[3]; 996} 997 998static void 999fetch_src_file_channel( 1000 const struct tgsi_exec_machine *mach, 1001 const uint file, 1002 const uint swizzle, 1003 const union tgsi_exec_channel *index, 1004 union tgsi_exec_channel *chan ) 1005{ 1006 switch( swizzle ) { 1007 case TGSI_EXTSWIZZLE_X: 1008 case TGSI_EXTSWIZZLE_Y: 1009 case TGSI_EXTSWIZZLE_Z: 1010 case TGSI_EXTSWIZZLE_W: 1011 switch( file ) { 1012 case TGSI_FILE_CONSTANT: 1013 assert(mach->Consts); 1014 if (index->i[0] < 0) 1015 chan->f[0] = 0.0f; 1016 else 1017 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 1018 if (index->i[1] < 0) 1019 chan->f[1] = 0.0f; 1020 else 1021 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 1022 if (index->i[2] < 0) 1023 chan->f[2] = 0.0f; 1024 else 1025 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 1026 if (index->i[3] < 0) 1027 chan->f[3] = 0.0f; 1028 else 1029 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1030 break; 1031 1032 case TGSI_FILE_INPUT: 1033 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1034 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1035 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1036 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1037 break; 1038 1039 case TGSI_FILE_TEMPORARY: 1040 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1041 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1042 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1043 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1044 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1045 break; 1046 1047 case TGSI_FILE_IMMEDIATE: 1048 assert( index->i[0] < (int) mach->ImmLimit ); 1049 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1050 assert( index->i[1] < (int) mach->ImmLimit ); 1051 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1052 assert( index->i[2] < (int) mach->ImmLimit ); 1053 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1054 assert( index->i[3] < (int) mach->ImmLimit ); 1055 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1056 break; 1057 1058 case TGSI_FILE_ADDRESS: 1059 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1060 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1061 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1062 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1063 break; 1064 1065 case TGSI_FILE_OUTPUT: 1066 /* vertex/fragment output vars can be read too */ 1067 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1068 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1069 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1070 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1071 break; 1072 1073 default: 1074 assert( 0 ); 1075 } 1076 break; 1077 1078 case TGSI_EXTSWIZZLE_ZERO: 1079 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; 1080 break; 1081 1082 case TGSI_EXTSWIZZLE_ONE: 1083 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; 1084 break; 1085 1086 default: 1087 assert( 0 ); 1088 } 1089} 1090 1091static void 1092fetch_source( 1093 const struct tgsi_exec_machine *mach, 1094 union tgsi_exec_channel *chan, 1095 const struct tgsi_full_src_register *reg, 1096 const uint chan_index ) 1097{ 1098 union tgsi_exec_channel index; 1099 uint swizzle; 1100 1101 /* We start with a direct index into a register file. 1102 * 1103 * file[1], 1104 * where: 1105 * file = SrcRegister.File 1106 * [1] = SrcRegister.Index 1107 */ 1108 index.i[0] = 1109 index.i[1] = 1110 index.i[2] = 1111 index.i[3] = reg->SrcRegister.Index; 1112 1113 /* There is an extra source register that indirectly subscripts 1114 * a register file. The direct index now becomes an offset 1115 * that is being added to the indirect register. 1116 * 1117 * file[ind[2].x+1], 1118 * where: 1119 * ind = SrcRegisterInd.File 1120 * [2] = SrcRegisterInd.Index 1121 * .x = SrcRegisterInd.SwizzleX 1122 */ 1123 if (reg->SrcRegister.Indirect) { 1124 union tgsi_exec_channel index2; 1125 union tgsi_exec_channel indir_index; 1126 const uint execmask = mach->ExecMask; 1127 uint i; 1128 1129 /* which address register (always zero now) */ 1130 index2.i[0] = 1131 index2.i[1] = 1132 index2.i[2] = 1133 index2.i[3] = reg->SrcRegisterInd.Index; 1134 1135 /* get current value of address register[swizzle] */ 1136 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1137 fetch_src_file_channel( 1138 mach, 1139 reg->SrcRegisterInd.File, 1140 swizzle, 1141 &index2, 1142 &indir_index ); 1143 1144 /* add value of address register to the offset */ 1145 index.i[0] += (int) indir_index.f[0]; 1146 index.i[1] += (int) indir_index.f[1]; 1147 index.i[2] += (int) indir_index.f[2]; 1148 index.i[3] += (int) indir_index.f[3]; 1149 1150 /* for disabled execution channels, zero-out the index to 1151 * avoid using a potential garbage value. 1152 */ 1153 for (i = 0; i < QUAD_SIZE; i++) { 1154 if ((execmask & (1 << i)) == 0) 1155 index.i[i] = 0; 1156 } 1157 } 1158 1159 /* There is an extra source register that is a second 1160 * subscript to a register file. Effectively it means that 1161 * the register file is actually a 2D array of registers. 1162 * 1163 * file[1][3] == file[1*sizeof(file[1])+3], 1164 * where: 1165 * [3] = SrcRegisterDim.Index 1166 */ 1167 if (reg->SrcRegister.Dimension) { 1168 /* The size of the first-order array depends on the register file type. 1169 * We need to multiply the index to the first array to get an effective, 1170 * "flat" index that points to the beginning of the second-order array. 1171 */ 1172 switch (reg->SrcRegister.File) { 1173 case TGSI_FILE_INPUT: 1174 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1175 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1176 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1177 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1178 break; 1179 case TGSI_FILE_CONSTANT: 1180 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1181 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1182 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1183 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1184 break; 1185 default: 1186 assert( 0 ); 1187 } 1188 1189 index.i[0] += reg->SrcRegisterDim.Index; 1190 index.i[1] += reg->SrcRegisterDim.Index; 1191 index.i[2] += reg->SrcRegisterDim.Index; 1192 index.i[3] += reg->SrcRegisterDim.Index; 1193 1194 /* Again, the second subscript index can be addressed indirectly 1195 * identically to the first one. 1196 * Nothing stops us from indirectly addressing the indirect register, 1197 * but there is no need for that, so we won't exercise it. 1198 * 1199 * file[1][ind[4].y+3], 1200 * where: 1201 * ind = SrcRegisterDimInd.File 1202 * [4] = SrcRegisterDimInd.Index 1203 * .y = SrcRegisterDimInd.SwizzleX 1204 */ 1205 if (reg->SrcRegisterDim.Indirect) { 1206 union tgsi_exec_channel index2; 1207 union tgsi_exec_channel indir_index; 1208 const uint execmask = mach->ExecMask; 1209 uint i; 1210 1211 index2.i[0] = 1212 index2.i[1] = 1213 index2.i[2] = 1214 index2.i[3] = reg->SrcRegisterDimInd.Index; 1215 1216 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1217 fetch_src_file_channel( 1218 mach, 1219 reg->SrcRegisterDimInd.File, 1220 swizzle, 1221 &index2, 1222 &indir_index ); 1223 1224 index.i[0] += (int) indir_index.f[0]; 1225 index.i[1] += (int) indir_index.f[1]; 1226 index.i[2] += (int) indir_index.f[2]; 1227 index.i[3] += (int) indir_index.f[3]; 1228 1229 /* for disabled execution channels, zero-out the index to 1230 * avoid using a potential garbage value. 1231 */ 1232 for (i = 0; i < QUAD_SIZE; i++) { 1233 if ((execmask & (1 << i)) == 0) 1234 index.i[i] = 0; 1235 } 1236 } 1237 1238 /* If by any chance there was a need for a 3D array of register 1239 * files, we would have to check whether SrcRegisterDim is followed 1240 * by a dimension register and continue the saga. 1241 */ 1242 } 1243 1244 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); 1245 fetch_src_file_channel( 1246 mach, 1247 reg->SrcRegister.File, 1248 swizzle, 1249 &index, 1250 chan ); 1251 1252 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1253 case TGSI_UTIL_SIGN_CLEAR: 1254 micro_abs( chan, chan ); 1255 break; 1256 1257 case TGSI_UTIL_SIGN_SET: 1258 micro_abs( chan, chan ); 1259 micro_neg( chan, chan ); 1260 break; 1261 1262 case TGSI_UTIL_SIGN_TOGGLE: 1263 micro_neg( chan, chan ); 1264 break; 1265 1266 case TGSI_UTIL_SIGN_KEEP: 1267 break; 1268 } 1269 1270 if (reg->SrcRegisterExtMod.Complement) { 1271 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1272 } 1273} 1274 1275static void 1276store_dest( 1277 struct tgsi_exec_machine *mach, 1278 const union tgsi_exec_channel *chan, 1279 const struct tgsi_full_dst_register *reg, 1280 const struct tgsi_full_instruction *inst, 1281 uint chan_index ) 1282{ 1283 uint i; 1284 union tgsi_exec_channel null; 1285 union tgsi_exec_channel *dst; 1286 uint execmask = mach->ExecMask; 1287 1288 switch (reg->DstRegister.File) { 1289 case TGSI_FILE_NULL: 1290 dst = &null; 1291 break; 1292 1293 case TGSI_FILE_OUTPUT: 1294 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1295 + reg->DstRegister.Index].xyzw[chan_index]; 1296 break; 1297 1298 case TGSI_FILE_TEMPORARY: 1299 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS ); 1300 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; 1301 break; 1302 1303 case TGSI_FILE_ADDRESS: 1304 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; 1305 break; 1306 1307 default: 1308 assert( 0 ); 1309 return; 1310 } 1311 1312 if (inst->InstructionExtNv.CondFlowEnable) { 1313 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1314 uint swizzle; 1315 uint shift; 1316 uint mask; 1317 uint test; 1318 1319 /* Only CC0 supported. 1320 */ 1321 assert( inst->InstructionExtNv.CondFlowIndex < 1 ); 1322 1323 switch (chan_index) { 1324 case CHAN_X: 1325 swizzle = inst->InstructionExtNv.CondSwizzleX; 1326 break; 1327 case CHAN_Y: 1328 swizzle = inst->InstructionExtNv.CondSwizzleY; 1329 break; 1330 case CHAN_Z: 1331 swizzle = inst->InstructionExtNv.CondSwizzleZ; 1332 break; 1333 case CHAN_W: 1334 swizzle = inst->InstructionExtNv.CondSwizzleW; 1335 break; 1336 default: 1337 assert( 0 ); 1338 return; 1339 } 1340 1341 switch (swizzle) { 1342 case TGSI_SWIZZLE_X: 1343 shift = TGSI_EXEC_CC_X_SHIFT; 1344 mask = TGSI_EXEC_CC_X_MASK; 1345 break; 1346 case TGSI_SWIZZLE_Y: 1347 shift = TGSI_EXEC_CC_Y_SHIFT; 1348 mask = TGSI_EXEC_CC_Y_MASK; 1349 break; 1350 case TGSI_SWIZZLE_Z: 1351 shift = TGSI_EXEC_CC_Z_SHIFT; 1352 mask = TGSI_EXEC_CC_Z_MASK; 1353 break; 1354 case TGSI_SWIZZLE_W: 1355 shift = TGSI_EXEC_CC_W_SHIFT; 1356 mask = TGSI_EXEC_CC_W_MASK; 1357 break; 1358 default: 1359 assert( 0 ); 1360 return; 1361 } 1362 1363 switch (inst->InstructionExtNv.CondMask) { 1364 case TGSI_CC_GT: 1365 test = ~(TGSI_EXEC_CC_GT << shift) & mask; 1366 for (i = 0; i < QUAD_SIZE; i++) 1367 if (cc->u[i] & test) 1368 execmask &= ~(1 << i); 1369 break; 1370 1371 case TGSI_CC_EQ: 1372 test = ~(TGSI_EXEC_CC_EQ << shift) & mask; 1373 for (i = 0; i < QUAD_SIZE; i++) 1374 if (cc->u[i] & test) 1375 execmask &= ~(1 << i); 1376 break; 1377 1378 case TGSI_CC_LT: 1379 test = ~(TGSI_EXEC_CC_LT << shift) & mask; 1380 for (i = 0; i < QUAD_SIZE; i++) 1381 if (cc->u[i] & test) 1382 execmask &= ~(1 << i); 1383 break; 1384 1385 case TGSI_CC_GE: 1386 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; 1387 for (i = 0; i < QUAD_SIZE; i++) 1388 if (cc->u[i] & test) 1389 execmask &= ~(1 << i); 1390 break; 1391 1392 case TGSI_CC_LE: 1393 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; 1394 for (i = 0; i < QUAD_SIZE; i++) 1395 if (cc->u[i] & test) 1396 execmask &= ~(1 << i); 1397 break; 1398 1399 case TGSI_CC_NE: 1400 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; 1401 for (i = 0; i < QUAD_SIZE; i++) 1402 if (cc->u[i] & test) 1403 execmask &= ~(1 << i); 1404 break; 1405 1406 case TGSI_CC_TR: 1407 break; 1408 1409 case TGSI_CC_FL: 1410 for (i = 0; i < QUAD_SIZE; i++) 1411 execmask &= ~(1 << i); 1412 break; 1413 1414 default: 1415 assert( 0 ); 1416 return; 1417 } 1418 } 1419 1420 switch (inst->Instruction.Saturate) { 1421 case TGSI_SAT_NONE: 1422 for (i = 0; i < QUAD_SIZE; i++) 1423 if (execmask & (1 << i)) 1424 dst->i[i] = chan->i[i]; 1425 break; 1426 1427 case TGSI_SAT_ZERO_ONE: 1428 for (i = 0; i < QUAD_SIZE; i++) 1429 if (execmask & (1 << i)) { 1430 if (chan->f[i] < 0.0f) 1431 dst->f[i] = 0.0f; 1432 else if (chan->f[i] > 1.0f) 1433 dst->f[i] = 1.0f; 1434 else 1435 dst->i[i] = chan->i[i]; 1436 } 1437 break; 1438 1439 case TGSI_SAT_MINUS_PLUS_ONE: 1440 for (i = 0; i < QUAD_SIZE; i++) 1441 if (execmask & (1 << i)) { 1442 if (chan->f[i] < -1.0f) 1443 dst->f[i] = -1.0f; 1444 else if (chan->f[i] > 1.0f) 1445 dst->f[i] = 1.0f; 1446 else 1447 dst->i[i] = chan->i[i]; 1448 } 1449 break; 1450 1451 default: 1452 assert( 0 ); 1453 } 1454 1455 if (inst->InstructionExtNv.CondDstUpdate) { 1456 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1457 uint shift; 1458 uint mask; 1459 1460 /* Only CC0 supported. 1461 */ 1462 assert( inst->InstructionExtNv.CondDstIndex < 1 ); 1463 1464 switch (chan_index) { 1465 case CHAN_X: 1466 shift = TGSI_EXEC_CC_X_SHIFT; 1467 mask = ~TGSI_EXEC_CC_X_MASK; 1468 break; 1469 case CHAN_Y: 1470 shift = TGSI_EXEC_CC_Y_SHIFT; 1471 mask = ~TGSI_EXEC_CC_Y_MASK; 1472 break; 1473 case CHAN_Z: 1474 shift = TGSI_EXEC_CC_Z_SHIFT; 1475 mask = ~TGSI_EXEC_CC_Z_MASK; 1476 break; 1477 case CHAN_W: 1478 shift = TGSI_EXEC_CC_W_SHIFT; 1479 mask = ~TGSI_EXEC_CC_W_MASK; 1480 break; 1481 default: 1482 assert( 0 ); 1483 return; 1484 } 1485 1486 for (i = 0; i < QUAD_SIZE; i++) 1487 if (execmask & (1 << i)) { 1488 cc->u[i] &= mask; 1489 if (dst->f[i] < 0.0f) 1490 cc->u[i] |= TGSI_EXEC_CC_LT << shift; 1491 else if (dst->f[i] > 0.0f) 1492 cc->u[i] |= TGSI_EXEC_CC_GT << shift; 1493 else if (dst->f[i] == 0.0f) 1494 cc->u[i] |= TGSI_EXEC_CC_EQ << shift; 1495 else 1496 cc->u[i] |= TGSI_EXEC_CC_UN << shift; 1497 } 1498 } 1499} 1500 1501#define FETCH(VAL,INDEX,CHAN)\ 1502 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1503 1504#define STORE(VAL,INDEX,CHAN)\ 1505 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1506 1507 1508/** 1509 * Execute ARB-style KIL which is predicated by a src register. 1510 * Kill fragment if any of the four values is less than zero. 1511 */ 1512static void 1513exec_kil(struct tgsi_exec_machine *mach, 1514 const struct tgsi_full_instruction *inst) 1515{ 1516 uint uniquemask; 1517 uint chan_index; 1518 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1519 union tgsi_exec_channel r[1]; 1520 1521 /* This mask stores component bits that were already tested. Note that 1522 * we test if the value is less than zero, so 1.0 and 0.0 need not to be 1523 * tested. */ 1524 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); 1525 1526 for (chan_index = 0; chan_index < 4; chan_index++) 1527 { 1528 uint swizzle; 1529 uint i; 1530 1531 /* unswizzle channel */ 1532 swizzle = tgsi_util_get_full_src_register_extswizzle ( 1533 &inst->FullSrcRegisters[0], 1534 chan_index); 1535 1536 /* check if the component has not been already tested */ 1537 if (uniquemask & (1 << swizzle)) 1538 continue; 1539 uniquemask |= 1 << swizzle; 1540 1541 FETCH(&r[0], 0, chan_index); 1542 for (i = 0; i < 4; i++) 1543 if (r[0].f[i] < 0.0f) 1544 kilmask |= 1 << i; 1545 } 1546 1547 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1548} 1549 1550/** 1551 * Execute NVIDIA-style KIL which is predicated by a condition code. 1552 * Kill fragment if the condition code is TRUE. 1553 */ 1554static void 1555exec_kilp(struct tgsi_exec_machine *mach, 1556 const struct tgsi_full_instruction *inst) 1557{ 1558 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1559 1560 if (inst->InstructionExtNv.CondFlowEnable) { 1561 uint swizzle[4]; 1562 uint chan_index; 1563 1564 kilmask = 0x0; 1565 1566 swizzle[0] = inst->InstructionExtNv.CondSwizzleX; 1567 swizzle[1] = inst->InstructionExtNv.CondSwizzleY; 1568 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; 1569 swizzle[3] = inst->InstructionExtNv.CondSwizzleW; 1570 1571 for (chan_index = 0; chan_index < 4; chan_index++) 1572 { 1573 uint i; 1574 1575 for (i = 0; i < 4; i++) { 1576 /* TODO: evaluate the condition code */ 1577 if (0) 1578 kilmask |= 1 << i; 1579 } 1580 } 1581 } 1582 else { 1583 /* "unconditional" kil */ 1584 kilmask = mach->ExecMask; 1585 } 1586 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1587} 1588 1589 1590/* 1591 * Fetch a four texture samples using STR texture coordinates. 1592 */ 1593static void 1594fetch_texel( struct tgsi_sampler *sampler, 1595 const union tgsi_exec_channel *s, 1596 const union tgsi_exec_channel *t, 1597 const union tgsi_exec_channel *p, 1598 float lodbias, /* XXX should be float[4] */ 1599 union tgsi_exec_channel *r, 1600 union tgsi_exec_channel *g, 1601 union tgsi_exec_channel *b, 1602 union tgsi_exec_channel *a ) 1603{ 1604 uint j; 1605 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1606 1607 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1608 1609 for (j = 0; j < 4; j++) { 1610 r->f[j] = rgba[0][j]; 1611 g->f[j] = rgba[1][j]; 1612 b->f[j] = rgba[2][j]; 1613 a->f[j] = rgba[3][j]; 1614 } 1615} 1616 1617 1618static void 1619exec_tex(struct tgsi_exec_machine *mach, 1620 const struct tgsi_full_instruction *inst, 1621 boolean biasLod, 1622 boolean projected) 1623{ 1624 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1625 union tgsi_exec_channel r[4]; 1626 uint chan_index; 1627 float lodBias; 1628 1629 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1630 1631 switch (inst->InstructionExtTexture.Texture) { 1632 case TGSI_TEXTURE_1D: 1633 case TGSI_TEXTURE_SHADOW1D: 1634 1635 FETCH(&r[0], 0, CHAN_X); 1636 1637 if (projected) { 1638 FETCH(&r[1], 0, CHAN_W); 1639 micro_div( &r[0], &r[0], &r[1] ); 1640 } 1641 1642 if (biasLod) { 1643 FETCH(&r[1], 0, CHAN_W); 1644 lodBias = r[2].f[0]; 1645 } 1646 else 1647 lodBias = 0.0; 1648 1649 fetch_texel(mach->Samplers[unit], 1650 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ 1651 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1652 break; 1653 1654 case TGSI_TEXTURE_2D: 1655 case TGSI_TEXTURE_RECT: 1656 case TGSI_TEXTURE_SHADOW2D: 1657 case TGSI_TEXTURE_SHADOWRECT: 1658 1659 FETCH(&r[0], 0, CHAN_X); 1660 FETCH(&r[1], 0, CHAN_Y); 1661 FETCH(&r[2], 0, CHAN_Z); 1662 1663 if (projected) { 1664 FETCH(&r[3], 0, CHAN_W); 1665 micro_div( &r[0], &r[0], &r[3] ); 1666 micro_div( &r[1], &r[1], &r[3] ); 1667 micro_div( &r[2], &r[2], &r[3] ); 1668 } 1669 1670 if (biasLod) { 1671 FETCH(&r[3], 0, CHAN_W); 1672 lodBias = r[3].f[0]; 1673 } 1674 else 1675 lodBias = 0.0; 1676 1677 fetch_texel(mach->Samplers[unit], 1678 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1679 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1680 break; 1681 1682 case TGSI_TEXTURE_3D: 1683 case TGSI_TEXTURE_CUBE: 1684 1685 FETCH(&r[0], 0, CHAN_X); 1686 FETCH(&r[1], 0, CHAN_Y); 1687 FETCH(&r[2], 0, CHAN_Z); 1688 1689 if (projected) { 1690 FETCH(&r[3], 0, CHAN_W); 1691 micro_div( &r[0], &r[0], &r[3] ); 1692 micro_div( &r[1], &r[1], &r[3] ); 1693 micro_div( &r[2], &r[2], &r[3] ); 1694 } 1695 1696 if (biasLod) { 1697 FETCH(&r[3], 0, CHAN_W); 1698 lodBias = r[3].f[0]; 1699 } 1700 else 1701 lodBias = 0.0; 1702 1703 fetch_texel(mach->Samplers[unit], 1704 &r[0], &r[1], &r[2], lodBias, 1705 &r[0], &r[1], &r[2], &r[3]); 1706 break; 1707 1708 default: 1709 assert (0); 1710 } 1711 1712 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1713 STORE( &r[chan_index], 0, chan_index ); 1714 } 1715} 1716 1717 1718/** 1719 * Evaluate a constant-valued coefficient at the position of the 1720 * current quad. 1721 */ 1722static void 1723eval_constant_coef( 1724 struct tgsi_exec_machine *mach, 1725 unsigned attrib, 1726 unsigned chan ) 1727{ 1728 unsigned i; 1729 1730 for( i = 0; i < QUAD_SIZE; i++ ) { 1731 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1732 } 1733} 1734 1735/** 1736 * Evaluate a linear-valued coefficient at the position of the 1737 * current quad. 1738 */ 1739static void 1740eval_linear_coef( 1741 struct tgsi_exec_machine *mach, 1742 unsigned attrib, 1743 unsigned chan ) 1744{ 1745 const float x = mach->QuadPos.xyzw[0].f[0]; 1746 const float y = mach->QuadPos.xyzw[1].f[0]; 1747 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1748 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1749 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1750 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1751 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1752 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1753 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1754} 1755 1756/** 1757 * Evaluate a perspective-valued coefficient at the position of the 1758 * current quad. 1759 */ 1760static void 1761eval_perspective_coef( 1762 struct tgsi_exec_machine *mach, 1763 unsigned attrib, 1764 unsigned chan ) 1765{ 1766 const float x = mach->QuadPos.xyzw[0].f[0]; 1767 const float y = mach->QuadPos.xyzw[1].f[0]; 1768 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1769 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1770 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1771 const float *w = mach->QuadPos.xyzw[3].f; 1772 /* divide by W here */ 1773 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1774 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1775 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1776 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1777} 1778 1779 1780typedef void (* eval_coef_func)( 1781 struct tgsi_exec_machine *mach, 1782 unsigned attrib, 1783 unsigned chan ); 1784 1785static void 1786exec_declaration( 1787 struct tgsi_exec_machine *mach, 1788 const struct tgsi_full_declaration *decl ) 1789{ 1790 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 1791 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1792 unsigned first, last, mask; 1793 eval_coef_func eval; 1794 1795 first = decl->DeclarationRange.First; 1796 last = decl->DeclarationRange.Last; 1797 mask = decl->Declaration.UsageMask; 1798 1799 switch( decl->Declaration.Interpolate ) { 1800 case TGSI_INTERPOLATE_CONSTANT: 1801 eval = eval_constant_coef; 1802 break; 1803 1804 case TGSI_INTERPOLATE_LINEAR: 1805 eval = eval_linear_coef; 1806 break; 1807 1808 case TGSI_INTERPOLATE_PERSPECTIVE: 1809 eval = eval_perspective_coef; 1810 break; 1811 1812 default: 1813 eval = NULL; 1814 assert( 0 ); 1815 } 1816 1817 if( mask == TGSI_WRITEMASK_XYZW ) { 1818 unsigned i, j; 1819 1820 for( i = first; i <= last; i++ ) { 1821 for( j = 0; j < NUM_CHANNELS; j++ ) { 1822 eval( mach, i, j ); 1823 } 1824 } 1825 } 1826 else { 1827 unsigned i, j; 1828 1829 for( j = 0; j < NUM_CHANNELS; j++ ) { 1830 if( mask & (1 << j) ) { 1831 for( i = first; i <= last; i++ ) { 1832 eval( mach, i, j ); 1833 } 1834 } 1835 } 1836 } 1837 } 1838 } 1839} 1840 1841static void 1842exec_instruction( 1843 struct tgsi_exec_machine *mach, 1844 const struct tgsi_full_instruction *inst, 1845 int *pc ) 1846{ 1847 uint chan_index; 1848 union tgsi_exec_channel r[10]; 1849 1850 (*pc)++; 1851 1852 switch (inst->Instruction.Opcode) { 1853 case TGSI_OPCODE_ARL: 1854 /* TGSI_OPCODE_FLOOR */ 1855 /* TGSI_OPCODE_FLR */ 1856 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1857 FETCH( &r[0], 0, chan_index ); 1858 micro_flr( &r[0], &r[0] ); 1859 STORE( &r[0], 0, chan_index ); 1860 } 1861 break; 1862 1863 case TGSI_OPCODE_MOV: 1864 case TGSI_OPCODE_SWZ: 1865 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1866 FETCH( &r[0], 0, chan_index ); 1867 STORE( &r[0], 0, chan_index ); 1868 } 1869 break; 1870 1871 case TGSI_OPCODE_LIT: 1872 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1873 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 1874 } 1875 1876 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1877 FETCH( &r[0], 0, CHAN_X ); 1878 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1879 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1880 STORE( &r[0], 0, CHAN_Y ); 1881 } 1882 1883 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1884 FETCH( &r[1], 0, CHAN_Y ); 1885 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1886 1887 FETCH( &r[2], 0, CHAN_W ); 1888 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 1889 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 1890 micro_pow( &r[1], &r[1], &r[2] ); 1891 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1892 STORE( &r[0], 0, CHAN_Z ); 1893 } 1894 } 1895 1896 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1897 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1898 } 1899 break; 1900 1901 case TGSI_OPCODE_RCP: 1902 /* TGSI_OPCODE_RECIP */ 1903 FETCH( &r[0], 0, CHAN_X ); 1904 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1905 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1906 STORE( &r[0], 0, chan_index ); 1907 } 1908 break; 1909 1910 case TGSI_OPCODE_RSQ: 1911 /* TGSI_OPCODE_RECIPSQRT */ 1912 FETCH( &r[0], 0, CHAN_X ); 1913 micro_abs( &r[0], &r[0] ); 1914 micro_sqrt( &r[0], &r[0] ); 1915 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1916 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1917 STORE( &r[0], 0, chan_index ); 1918 } 1919 break; 1920 1921 case TGSI_OPCODE_EXP: 1922 FETCH( &r[0], 0, CHAN_X ); 1923 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 1924 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1925 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 1926 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 1927 } 1928 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1929 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 1930 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 1931 } 1932 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1933 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 1934 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 1935 } 1936 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1937 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1938 } 1939 break; 1940 1941 case TGSI_OPCODE_LOG: 1942 FETCH( &r[0], 0, CHAN_X ); 1943 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 1944 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 1945 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 1946 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1947 STORE( &r[0], 0, CHAN_X ); 1948 } 1949 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1950 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 1951 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 1952 STORE( &r[0], 0, CHAN_Y ); 1953 } 1954 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1955 STORE( &r[1], 0, CHAN_Z ); 1956 } 1957 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1958 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1959 } 1960 break; 1961 1962 case TGSI_OPCODE_MUL: 1963 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 1964 { 1965 FETCH(&r[0], 0, chan_index); 1966 FETCH(&r[1], 1, chan_index); 1967 1968 micro_mul( &r[0], &r[0], &r[1] ); 1969 1970 STORE(&r[0], 0, chan_index); 1971 } 1972 break; 1973 1974 case TGSI_OPCODE_ADD: 1975 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1976 FETCH( &r[0], 0, chan_index ); 1977 FETCH( &r[1], 1, chan_index ); 1978 micro_add( &r[0], &r[0], &r[1] ); 1979 STORE( &r[0], 0, chan_index ); 1980 } 1981 break; 1982 1983 case TGSI_OPCODE_DP3: 1984 /* TGSI_OPCODE_DOT3 */ 1985 FETCH( &r[0], 0, CHAN_X ); 1986 FETCH( &r[1], 1, CHAN_X ); 1987 micro_mul( &r[0], &r[0], &r[1] ); 1988 1989 FETCH( &r[1], 0, CHAN_Y ); 1990 FETCH( &r[2], 1, CHAN_Y ); 1991 micro_mul( &r[1], &r[1], &r[2] ); 1992 micro_add( &r[0], &r[0], &r[1] ); 1993 1994 FETCH( &r[1], 0, CHAN_Z ); 1995 FETCH( &r[2], 1, CHAN_Z ); 1996 micro_mul( &r[1], &r[1], &r[2] ); 1997 micro_add( &r[0], &r[0], &r[1] ); 1998 1999 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2000 STORE( &r[0], 0, chan_index ); 2001 } 2002 break; 2003 2004 case TGSI_OPCODE_DP4: 2005 /* TGSI_OPCODE_DOT4 */ 2006 FETCH(&r[0], 0, CHAN_X); 2007 FETCH(&r[1], 1, CHAN_X); 2008 2009 micro_mul( &r[0], &r[0], &r[1] ); 2010 2011 FETCH(&r[1], 0, CHAN_Y); 2012 FETCH(&r[2], 1, CHAN_Y); 2013 2014 micro_mul( &r[1], &r[1], &r[2] ); 2015 micro_add( &r[0], &r[0], &r[1] ); 2016 2017 FETCH(&r[1], 0, CHAN_Z); 2018 FETCH(&r[2], 1, CHAN_Z); 2019 2020 micro_mul( &r[1], &r[1], &r[2] ); 2021 micro_add( &r[0], &r[0], &r[1] ); 2022 2023 FETCH(&r[1], 0, CHAN_W); 2024 FETCH(&r[2], 1, CHAN_W); 2025 2026 micro_mul( &r[1], &r[1], &r[2] ); 2027 micro_add( &r[0], &r[0], &r[1] ); 2028 2029 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2030 STORE( &r[0], 0, chan_index ); 2031 } 2032 break; 2033 2034 case TGSI_OPCODE_DST: 2035 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2036 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2037 } 2038 2039 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2040 FETCH( &r[0], 0, CHAN_Y ); 2041 FETCH( &r[1], 1, CHAN_Y); 2042 micro_mul( &r[0], &r[0], &r[1] ); 2043 STORE( &r[0], 0, CHAN_Y ); 2044 } 2045 2046 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2047 FETCH( &r[0], 0, CHAN_Z ); 2048 STORE( &r[0], 0, CHAN_Z ); 2049 } 2050 2051 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2052 FETCH( &r[0], 1, CHAN_W ); 2053 STORE( &r[0], 0, CHAN_W ); 2054 } 2055 break; 2056 2057 case TGSI_OPCODE_MIN: 2058 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2059 FETCH(&r[0], 0, chan_index); 2060 FETCH(&r[1], 1, chan_index); 2061 2062 /* XXX use micro_min()?? */ 2063 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); 2064 2065 STORE(&r[0], 0, chan_index); 2066 } 2067 break; 2068 2069 case TGSI_OPCODE_MAX: 2070 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2071 FETCH(&r[0], 0, chan_index); 2072 FETCH(&r[1], 1, chan_index); 2073 2074 /* XXX use micro_max()?? */ 2075 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); 2076 2077 STORE(&r[0], 0, chan_index ); 2078 } 2079 break; 2080 2081 case TGSI_OPCODE_SLT: 2082 /* TGSI_OPCODE_SETLT */ 2083 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2084 FETCH( &r[0], 0, chan_index ); 2085 FETCH( &r[1], 1, chan_index ); 2086 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2087 STORE( &r[0], 0, chan_index ); 2088 } 2089 break; 2090 2091 case TGSI_OPCODE_SGE: 2092 /* TGSI_OPCODE_SETGE */ 2093 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2094 FETCH( &r[0], 0, chan_index ); 2095 FETCH( &r[1], 1, chan_index ); 2096 micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2097 STORE( &r[0], 0, chan_index ); 2098 } 2099 break; 2100 2101 case TGSI_OPCODE_MAD: 2102 /* TGSI_OPCODE_MADD */ 2103 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2104 FETCH( &r[0], 0, chan_index ); 2105 FETCH( &r[1], 1, chan_index ); 2106 micro_mul( &r[0], &r[0], &r[1] ); 2107 FETCH( &r[1], 2, chan_index ); 2108 micro_add( &r[0], &r[0], &r[1] ); 2109 STORE( &r[0], 0, chan_index ); 2110 } 2111 break; 2112 2113 case TGSI_OPCODE_SUB: 2114 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2115 FETCH(&r[0], 0, chan_index); 2116 FETCH(&r[1], 1, chan_index); 2117 2118 micro_sub( &r[0], &r[0], &r[1] ); 2119 2120 STORE(&r[0], 0, chan_index); 2121 } 2122 break; 2123 2124 case TGSI_OPCODE_LERP: 2125 /* TGSI_OPCODE_LRP */ 2126 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2127 FETCH(&r[0], 0, chan_index); 2128 FETCH(&r[1], 1, chan_index); 2129 FETCH(&r[2], 2, chan_index); 2130 2131 micro_sub( &r[1], &r[1], &r[2] ); 2132 micro_mul( &r[0], &r[0], &r[1] ); 2133 micro_add( &r[0], &r[0], &r[2] ); 2134 2135 STORE(&r[0], 0, chan_index); 2136 } 2137 break; 2138 2139 case TGSI_OPCODE_CND: 2140 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2141 FETCH(&r[0], 0, chan_index); 2142 FETCH(&r[1], 1, chan_index); 2143 FETCH(&r[2], 2, chan_index); 2144 micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2145 STORE(&r[0], 0, chan_index); 2146 } 2147 break; 2148 2149 case TGSI_OPCODE_CND0: 2150 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2151 FETCH(&r[0], 0, chan_index); 2152 FETCH(&r[1], 1, chan_index); 2153 FETCH(&r[2], 2, chan_index); 2154 micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]); 2155 STORE(&r[0], 0, chan_index); 2156 } 2157 break; 2158 2159 case TGSI_OPCODE_DOT2ADD: 2160 /* TGSI_OPCODE_DP2A */ 2161 FETCH( &r[0], 0, CHAN_X ); 2162 FETCH( &r[1], 1, CHAN_X ); 2163 micro_mul( &r[0], &r[0], &r[1] ); 2164 2165 FETCH( &r[1], 0, CHAN_Y ); 2166 FETCH( &r[2], 1, CHAN_Y ); 2167 micro_mul( &r[1], &r[1], &r[2] ); 2168 micro_add( &r[0], &r[0], &r[1] ); 2169 2170 FETCH( &r[2], 2, CHAN_X ); 2171 micro_add( &r[0], &r[0], &r[2] ); 2172 2173 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2174 STORE( &r[0], 0, chan_index ); 2175 } 2176 break; 2177 2178 case TGSI_OPCODE_INDEX: 2179 /* XXX: considered for removal */ 2180 assert (0); 2181 break; 2182 2183 case TGSI_OPCODE_NEGATE: 2184 /* XXX: considered for removal */ 2185 assert (0); 2186 break; 2187 2188 case TGSI_OPCODE_FRAC: 2189 /* TGSI_OPCODE_FRC */ 2190 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2191 FETCH( &r[0], 0, chan_index ); 2192 micro_frc( &r[0], &r[0] ); 2193 STORE( &r[0], 0, chan_index ); 2194 } 2195 break; 2196 2197 case TGSI_OPCODE_CLAMP: 2198 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2199 FETCH(&r[0], 0, chan_index); 2200 FETCH(&r[1], 1, chan_index); 2201 micro_max(&r[0], &r[0], &r[1]); 2202 FETCH(&r[1], 2, chan_index); 2203 micro_min(&r[0], &r[0], &r[1]); 2204 STORE(&r[0], 0, chan_index); 2205 } 2206 break; 2207 2208 case TGSI_OPCODE_ROUND: 2209 case TGSI_OPCODE_ARR: 2210 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2211 FETCH( &r[0], 0, chan_index ); 2212 micro_rnd( &r[0], &r[0] ); 2213 STORE( &r[0], 0, chan_index ); 2214 } 2215 break; 2216 2217 case TGSI_OPCODE_EXPBASE2: 2218 /* TGSI_OPCODE_EX2 */ 2219 FETCH(&r[0], 0, CHAN_X); 2220 2221#if FAST_MATH 2222 micro_exp2( &r[0], &r[0] ); 2223#else 2224 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); 2225#endif 2226 2227 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2228 STORE( &r[0], 0, chan_index ); 2229 } 2230 break; 2231 2232 case TGSI_OPCODE_LOGBASE2: 2233 /* TGSI_OPCODE_LG2 */ 2234 FETCH( &r[0], 0, CHAN_X ); 2235 micro_lg2( &r[0], &r[0] ); 2236 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2237 STORE( &r[0], 0, chan_index ); 2238 } 2239 break; 2240 2241 case TGSI_OPCODE_POWER: 2242 /* TGSI_OPCODE_POW */ 2243 FETCH(&r[0], 0, CHAN_X); 2244 FETCH(&r[1], 1, CHAN_X); 2245 2246 micro_pow( &r[0], &r[0], &r[1] ); 2247 2248 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2249 STORE( &r[0], 0, chan_index ); 2250 } 2251 break; 2252 2253 case TGSI_OPCODE_CROSSPRODUCT: 2254 /* TGSI_OPCODE_XPD */ 2255 FETCH(&r[0], 0, CHAN_Y); 2256 FETCH(&r[1], 1, CHAN_Z); 2257 2258 micro_mul( &r[2], &r[0], &r[1] ); 2259 2260 FETCH(&r[3], 0, CHAN_Z); 2261 FETCH(&r[4], 1, CHAN_Y); 2262 2263 micro_mul( &r[5], &r[3], &r[4] ); 2264 micro_sub( &r[2], &r[2], &r[5] ); 2265 2266 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2267 STORE( &r[2], 0, CHAN_X ); 2268 } 2269 2270 FETCH(&r[2], 1, CHAN_X); 2271 2272 micro_mul( &r[3], &r[3], &r[2] ); 2273 2274 FETCH(&r[5], 0, CHAN_X); 2275 2276 micro_mul( &r[1], &r[1], &r[5] ); 2277 micro_sub( &r[3], &r[3], &r[1] ); 2278 2279 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2280 STORE( &r[3], 0, CHAN_Y ); 2281 } 2282 2283 micro_mul( &r[5], &r[5], &r[4] ); 2284 micro_mul( &r[0], &r[0], &r[2] ); 2285 micro_sub( &r[5], &r[5], &r[0] ); 2286 2287 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2288 STORE( &r[5], 0, CHAN_Z ); 2289 } 2290 2291 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2292 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2293 } 2294 break; 2295 2296 case TGSI_OPCODE_MULTIPLYMATRIX: 2297 /* XXX: considered for removal */ 2298 assert (0); 2299 break; 2300 2301 case TGSI_OPCODE_ABS: 2302 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2303 FETCH(&r[0], 0, chan_index); 2304 2305 micro_abs( &r[0], &r[0] ); 2306 2307 STORE(&r[0], 0, chan_index); 2308 } 2309 break; 2310 2311 case TGSI_OPCODE_RCC: 2312 FETCH(&r[0], 0, CHAN_X); 2313 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2314 micro_float_clamp(&r[0], &r[0]); 2315 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2316 STORE(&r[0], 0, chan_index); 2317 } 2318 break; 2319 2320 case TGSI_OPCODE_DPH: 2321 FETCH(&r[0], 0, CHAN_X); 2322 FETCH(&r[1], 1, CHAN_X); 2323 2324 micro_mul( &r[0], &r[0], &r[1] ); 2325 2326 FETCH(&r[1], 0, CHAN_Y); 2327 FETCH(&r[2], 1, CHAN_Y); 2328 2329 micro_mul( &r[1], &r[1], &r[2] ); 2330 micro_add( &r[0], &r[0], &r[1] ); 2331 2332 FETCH(&r[1], 0, CHAN_Z); 2333 FETCH(&r[2], 1, CHAN_Z); 2334 2335 micro_mul( &r[1], &r[1], &r[2] ); 2336 micro_add( &r[0], &r[0], &r[1] ); 2337 2338 FETCH(&r[1], 1, CHAN_W); 2339 2340 micro_add( &r[0], &r[0], &r[1] ); 2341 2342 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2343 STORE( &r[0], 0, chan_index ); 2344 } 2345 break; 2346 2347 case TGSI_OPCODE_COS: 2348 FETCH(&r[0], 0, CHAN_X); 2349 2350 micro_cos( &r[0], &r[0] ); 2351 2352 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2353 STORE( &r[0], 0, chan_index ); 2354 } 2355 break; 2356 2357 case TGSI_OPCODE_DDX: 2358 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2359 FETCH( &r[0], 0, chan_index ); 2360 micro_ddx( &r[0], &r[0] ); 2361 STORE( &r[0], 0, chan_index ); 2362 } 2363 break; 2364 2365 case TGSI_OPCODE_DDY: 2366 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2367 FETCH( &r[0], 0, chan_index ); 2368 micro_ddy( &r[0], &r[0] ); 2369 STORE( &r[0], 0, chan_index ); 2370 } 2371 break; 2372 2373 case TGSI_OPCODE_KILP: 2374 exec_kilp (mach, inst); 2375 break; 2376 2377 case TGSI_OPCODE_KIL: 2378 exec_kil (mach, inst); 2379 break; 2380 2381 case TGSI_OPCODE_PK2H: 2382 assert (0); 2383 break; 2384 2385 case TGSI_OPCODE_PK2US: 2386 assert (0); 2387 break; 2388 2389 case TGSI_OPCODE_PK4B: 2390 assert (0); 2391 break; 2392 2393 case TGSI_OPCODE_PK4UB: 2394 assert (0); 2395 break; 2396 2397 case TGSI_OPCODE_RFL: 2398 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2399 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2400 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2401 /* r0 = dp3(src0, src0) */ 2402 FETCH(&r[2], 0, CHAN_X); 2403 micro_mul(&r[0], &r[2], &r[2]); 2404 FETCH(&r[4], 0, CHAN_Y); 2405 micro_mul(&r[8], &r[4], &r[4]); 2406 micro_add(&r[0], &r[0], &r[8]); 2407 FETCH(&r[6], 0, CHAN_Z); 2408 micro_mul(&r[8], &r[6], &r[6]); 2409 micro_add(&r[0], &r[0], &r[8]); 2410 2411 /* r1 = dp3(src0, src1) */ 2412 FETCH(&r[3], 1, CHAN_X); 2413 micro_mul(&r[1], &r[2], &r[3]); 2414 FETCH(&r[5], 1, CHAN_Y); 2415 micro_mul(&r[8], &r[4], &r[5]); 2416 micro_add(&r[1], &r[1], &r[8]); 2417 FETCH(&r[7], 1, CHAN_Z); 2418 micro_mul(&r[8], &r[6], &r[7]); 2419 micro_add(&r[1], &r[1], &r[8]); 2420 2421 /* r1 = 2 * r1 / r0 */ 2422 micro_add(&r[1], &r[1], &r[1]); 2423 micro_div(&r[1], &r[1], &r[0]); 2424 2425 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2426 micro_mul(&r[2], &r[2], &r[1]); 2427 micro_sub(&r[2], &r[2], &r[3]); 2428 STORE(&r[2], 0, CHAN_X); 2429 } 2430 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2431 micro_mul(&r[4], &r[4], &r[1]); 2432 micro_sub(&r[4], &r[4], &r[5]); 2433 STORE(&r[4], 0, CHAN_Y); 2434 } 2435 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2436 micro_mul(&r[6], &r[6], &r[1]); 2437 micro_sub(&r[6], &r[6], &r[7]); 2438 STORE(&r[6], 0, CHAN_Z); 2439 } 2440 } 2441 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2442 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2443 } 2444 break; 2445 2446 case TGSI_OPCODE_SEQ: 2447 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2448 FETCH( &r[0], 0, chan_index ); 2449 FETCH( &r[1], 1, chan_index ); 2450 micro_eq( &r[0], &r[0], &r[1], 2451 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 2452 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2453 STORE( &r[0], 0, chan_index ); 2454 } 2455 break; 2456 2457 case TGSI_OPCODE_SFL: 2458 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2459 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2460 } 2461 break; 2462 2463 case TGSI_OPCODE_SGT: 2464 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2465 FETCH( &r[0], 0, chan_index ); 2466 FETCH( &r[1], 1, chan_index ); 2467 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2468 STORE( &r[0], 0, chan_index ); 2469 } 2470 break; 2471 2472 case TGSI_OPCODE_SIN: 2473 FETCH( &r[0], 0, CHAN_X ); 2474 micro_sin( &r[0], &r[0] ); 2475 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2476 STORE( &r[0], 0, chan_index ); 2477 } 2478 break; 2479 2480 case TGSI_OPCODE_SLE: 2481 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2482 FETCH( &r[0], 0, chan_index ); 2483 FETCH( &r[1], 1, chan_index ); 2484 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2485 STORE( &r[0], 0, chan_index ); 2486 } 2487 break; 2488 2489 case TGSI_OPCODE_SNE: 2490 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2491 FETCH( &r[0], 0, chan_index ); 2492 FETCH( &r[1], 1, chan_index ); 2493 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2494 STORE( &r[0], 0, chan_index ); 2495 } 2496 break; 2497 2498 case TGSI_OPCODE_STR: 2499 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2500 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2501 } 2502 break; 2503 2504 case TGSI_OPCODE_TEX: 2505 /* simple texture lookup */ 2506 /* src[0] = texcoord */ 2507 /* src[1] = sampler unit */ 2508 exec_tex(mach, inst, FALSE, FALSE); 2509 break; 2510 2511 case TGSI_OPCODE_TXB: 2512 /* Texture lookup with lod bias */ 2513 /* src[0] = texcoord (src[0].w = LOD bias) */ 2514 /* src[1] = sampler unit */ 2515 exec_tex(mach, inst, TRUE, FALSE); 2516 break; 2517 2518 case TGSI_OPCODE_TXD: 2519 /* Texture lookup with explict partial derivatives */ 2520 /* src[0] = texcoord */ 2521 /* src[1] = d[strq]/dx */ 2522 /* src[2] = d[strq]/dy */ 2523 /* src[3] = sampler unit */ 2524 assert (0); 2525 break; 2526 2527 case TGSI_OPCODE_TXL: 2528 /* Texture lookup with explit LOD */ 2529 /* src[0] = texcoord (src[0].w = LOD) */ 2530 /* src[1] = sampler unit */ 2531 exec_tex(mach, inst, TRUE, FALSE); 2532 break; 2533 2534 case TGSI_OPCODE_TXP: 2535 /* Texture lookup with projection */ 2536 /* src[0] = texcoord (src[0].w = projection) */ 2537 /* src[1] = sampler unit */ 2538 exec_tex(mach, inst, FALSE, TRUE); 2539 break; 2540 2541 case TGSI_OPCODE_UP2H: 2542 assert (0); 2543 break; 2544 2545 case TGSI_OPCODE_UP2US: 2546 assert (0); 2547 break; 2548 2549 case TGSI_OPCODE_UP4B: 2550 assert (0); 2551 break; 2552 2553 case TGSI_OPCODE_UP4UB: 2554 assert (0); 2555 break; 2556 2557 case TGSI_OPCODE_X2D: 2558 FETCH(&r[0], 1, CHAN_X); 2559 FETCH(&r[1], 1, CHAN_Y); 2560 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2561 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2562 FETCH(&r[2], 2, CHAN_X); 2563 micro_mul(&r[2], &r[2], &r[0]); 2564 FETCH(&r[3], 2, CHAN_Y); 2565 micro_mul(&r[3], &r[3], &r[1]); 2566 micro_add(&r[2], &r[2], &r[3]); 2567 FETCH(&r[3], 0, CHAN_X); 2568 micro_add(&r[2], &r[2], &r[3]); 2569 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2570 STORE(&r[2], 0, CHAN_X); 2571 } 2572 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2573 STORE(&r[2], 0, CHAN_Z); 2574 } 2575 } 2576 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2577 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2578 FETCH(&r[2], 2, CHAN_Z); 2579 micro_mul(&r[2], &r[2], &r[0]); 2580 FETCH(&r[3], 2, CHAN_W); 2581 micro_mul(&r[3], &r[3], &r[1]); 2582 micro_add(&r[2], &r[2], &r[3]); 2583 FETCH(&r[3], 0, CHAN_Y); 2584 micro_add(&r[2], &r[2], &r[3]); 2585 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2586 STORE(&r[2], 0, CHAN_Y); 2587 } 2588 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2589 STORE(&r[2], 0, CHAN_W); 2590 } 2591 } 2592 break; 2593 2594 case TGSI_OPCODE_ARA: 2595 assert (0); 2596 break; 2597 2598 case TGSI_OPCODE_BRA: 2599 assert (0); 2600 break; 2601 2602 case TGSI_OPCODE_CAL: 2603 /* skip the call if no execution channels are enabled */ 2604 if (mach->ExecMask) { 2605 /* do the call */ 2606 2607 /* push the Cond, Loop, Cont stacks */ 2608 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2609 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2610 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2611 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2612 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2613 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2614 2615 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2616 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2617 2618 /* note that PC was already incremented above */ 2619 mach->CallStack[mach->CallStackTop++] = *pc; 2620 *pc = inst->InstructionExtLabel.Label; 2621 } 2622 break; 2623 2624 case TGSI_OPCODE_RET: 2625 mach->FuncMask &= ~mach->ExecMask; 2626 UPDATE_EXEC_MASK(mach); 2627 2628 if (mach->FuncMask == 0x0) { 2629 /* really return now (otherwise, keep executing */ 2630 2631 if (mach->CallStackTop == 0) { 2632 /* returning from main() */ 2633 *pc = -1; 2634 return; 2635 } 2636 *pc = mach->CallStack[--mach->CallStackTop]; 2637 2638 /* pop the Cond, Loop, Cont stacks */ 2639 assert(mach->CondStackTop > 0); 2640 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2641 assert(mach->LoopStackTop > 0); 2642 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2643 assert(mach->ContStackTop > 0); 2644 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2645 assert(mach->FuncStackTop > 0); 2646 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2647 2648 UPDATE_EXEC_MASK(mach); 2649 } 2650 break; 2651 2652 case TGSI_OPCODE_SSG: 2653 /* TGSI_OPCODE_SGN */ 2654 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2655 FETCH( &r[0], 0, chan_index ); 2656 micro_sgn( &r[0], &r[0] ); 2657 STORE( &r[0], 0, chan_index ); 2658 } 2659 break; 2660 2661 case TGSI_OPCODE_CMP: 2662 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2663 FETCH(&r[0], 0, chan_index); 2664 FETCH(&r[1], 1, chan_index); 2665 FETCH(&r[2], 2, chan_index); 2666 2667 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); 2668 2669 STORE(&r[0], 0, chan_index); 2670 } 2671 break; 2672 2673 case TGSI_OPCODE_SCS: 2674 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2675 FETCH( &r[0], 0, CHAN_X ); 2676 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2677 micro_cos(&r[1], &r[0]); 2678 STORE(&r[1], 0, CHAN_X); 2679 } 2680 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2681 micro_sin(&r[1], &r[0]); 2682 STORE(&r[1], 0, CHAN_Y); 2683 } 2684 } 2685 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2686 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2687 } 2688 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2689 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2690 } 2691 break; 2692 2693 case TGSI_OPCODE_NRM: 2694 /* 3-component vector normalize */ 2695 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2696 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2697 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2698 /* r3 = sqrt(dp3(src0, src0)) */ 2699 FETCH(&r[0], 0, CHAN_X); 2700 micro_mul(&r[3], &r[0], &r[0]); 2701 FETCH(&r[1], 0, CHAN_Y); 2702 micro_mul(&r[4], &r[1], &r[1]); 2703 micro_add(&r[3], &r[3], &r[4]); 2704 FETCH(&r[2], 0, CHAN_Z); 2705 micro_mul(&r[4], &r[2], &r[2]); 2706 micro_add(&r[3], &r[3], &r[4]); 2707 micro_sqrt(&r[3], &r[3]); 2708 2709 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2710 micro_div(&r[0], &r[0], &r[3]); 2711 STORE(&r[0], 0, CHAN_X); 2712 } 2713 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2714 micro_div(&r[1], &r[1], &r[3]); 2715 STORE(&r[1], 0, CHAN_Y); 2716 } 2717 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2718 micro_div(&r[2], &r[2], &r[3]); 2719 STORE(&r[2], 0, CHAN_Z); 2720 } 2721 } 2722 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2723 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2724 } 2725 break; 2726 2727 case TGSI_OPCODE_NRM4: 2728 /* 4-component vector normalize */ 2729 { 2730 union tgsi_exec_channel tmp, dot; 2731 2732 /* tmp = dp4(src0, src0): */ 2733 FETCH( &r[0], 0, CHAN_X ); 2734 micro_mul( &tmp, &r[0], &r[0] ); 2735 2736 FETCH( &r[1], 0, CHAN_Y ); 2737 micro_mul( &dot, &r[1], &r[1] ); 2738 micro_add( &tmp, &tmp, &dot ); 2739 2740 FETCH( &r[2], 0, CHAN_Z ); 2741 micro_mul( &dot, &r[2], &r[2] ); 2742 micro_add( &tmp, &tmp, &dot ); 2743 2744 FETCH( &r[3], 0, CHAN_W ); 2745 micro_mul( &dot, &r[3], &r[3] ); 2746 micro_add( &tmp, &tmp, &dot ); 2747 2748 /* tmp = 1 / sqrt(tmp) */ 2749 micro_sqrt( &tmp, &tmp ); 2750 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2751 2752 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2753 /* chan = chan * tmp */ 2754 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2755 STORE( &r[chan_index], 0, chan_index ); 2756 } 2757 } 2758 break; 2759 2760 case TGSI_OPCODE_DIV: 2761 assert( 0 ); 2762 break; 2763 2764 case TGSI_OPCODE_DP2: 2765 FETCH( &r[0], 0, CHAN_X ); 2766 FETCH( &r[1], 1, CHAN_X ); 2767 micro_mul( &r[0], &r[0], &r[1] ); 2768 2769 FETCH( &r[1], 0, CHAN_Y ); 2770 FETCH( &r[2], 1, CHAN_Y ); 2771 micro_mul( &r[1], &r[1], &r[2] ); 2772 micro_add( &r[0], &r[0], &r[1] ); 2773 2774 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2775 STORE( &r[0], 0, chan_index ); 2776 } 2777 break; 2778 2779 case TGSI_OPCODE_IF: 2780 /* push CondMask */ 2781 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2782 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2783 FETCH( &r[0], 0, CHAN_X ); 2784 /* update CondMask */ 2785 if( ! r[0].u[0] ) { 2786 mach->CondMask &= ~0x1; 2787 } 2788 if( ! r[0].u[1] ) { 2789 mach->CondMask &= ~0x2; 2790 } 2791 if( ! r[0].u[2] ) { 2792 mach->CondMask &= ~0x4; 2793 } 2794 if( ! r[0].u[3] ) { 2795 mach->CondMask &= ~0x8; 2796 } 2797 UPDATE_EXEC_MASK(mach); 2798 /* Todo: If CondMask==0, jump to ELSE */ 2799 break; 2800 2801 case TGSI_OPCODE_ELSE: 2802 /* invert CondMask wrt previous mask */ 2803 { 2804 uint prevMask; 2805 assert(mach->CondStackTop > 0); 2806 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2807 mach->CondMask = ~mach->CondMask & prevMask; 2808 UPDATE_EXEC_MASK(mach); 2809 /* Todo: If CondMask==0, jump to ENDIF */ 2810 } 2811 break; 2812 2813 case TGSI_OPCODE_ENDIF: 2814 /* pop CondMask */ 2815 assert(mach->CondStackTop > 0); 2816 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2817 UPDATE_EXEC_MASK(mach); 2818 break; 2819 2820 case TGSI_OPCODE_END: 2821 /* halt execution */ 2822 *pc = -1; 2823 break; 2824 2825 case TGSI_OPCODE_REP: 2826 assert (0); 2827 break; 2828 2829 case TGSI_OPCODE_ENDREP: 2830 assert (0); 2831 break; 2832 2833 case TGSI_OPCODE_PUSHA: 2834 assert (0); 2835 break; 2836 2837 case TGSI_OPCODE_POPA: 2838 assert (0); 2839 break; 2840 2841 case TGSI_OPCODE_CEIL: 2842 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2843 FETCH( &r[0], 0, chan_index ); 2844 micro_ceil( &r[0], &r[0] ); 2845 STORE( &r[0], 0, chan_index ); 2846 } 2847 break; 2848 2849 case TGSI_OPCODE_I2F: 2850 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2851 FETCH( &r[0], 0, chan_index ); 2852 micro_i2f( &r[0], &r[0] ); 2853 STORE( &r[0], 0, chan_index ); 2854 } 2855 break; 2856 2857 case TGSI_OPCODE_NOT: 2858 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2859 FETCH( &r[0], 0, chan_index ); 2860 micro_not( &r[0], &r[0] ); 2861 STORE( &r[0], 0, chan_index ); 2862 } 2863 break; 2864 2865 case TGSI_OPCODE_TRUNC: 2866 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2867 FETCH( &r[0], 0, chan_index ); 2868 micro_trunc( &r[0], &r[0] ); 2869 STORE( &r[0], 0, chan_index ); 2870 } 2871 break; 2872 2873 case TGSI_OPCODE_SHL: 2874 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2875 FETCH( &r[0], 0, chan_index ); 2876 FETCH( &r[1], 1, chan_index ); 2877 micro_shl( &r[0], &r[0], &r[1] ); 2878 STORE( &r[0], 0, chan_index ); 2879 } 2880 break; 2881 2882 case TGSI_OPCODE_SHR: 2883 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2884 FETCH( &r[0], 0, chan_index ); 2885 FETCH( &r[1], 1, chan_index ); 2886 micro_ishr( &r[0], &r[0], &r[1] ); 2887 STORE( &r[0], 0, chan_index ); 2888 } 2889 break; 2890 2891 case TGSI_OPCODE_AND: 2892 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2893 FETCH( &r[0], 0, chan_index ); 2894 FETCH( &r[1], 1, chan_index ); 2895 micro_and( &r[0], &r[0], &r[1] ); 2896 STORE( &r[0], 0, chan_index ); 2897 } 2898 break; 2899 2900 case TGSI_OPCODE_OR: 2901 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2902 FETCH( &r[0], 0, chan_index ); 2903 FETCH( &r[1], 1, chan_index ); 2904 micro_or( &r[0], &r[0], &r[1] ); 2905 STORE( &r[0], 0, chan_index ); 2906 } 2907 break; 2908 2909 case TGSI_OPCODE_MOD: 2910 assert (0); 2911 break; 2912 2913 case TGSI_OPCODE_XOR: 2914 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2915 FETCH( &r[0], 0, chan_index ); 2916 FETCH( &r[1], 1, chan_index ); 2917 micro_xor( &r[0], &r[0], &r[1] ); 2918 STORE( &r[0], 0, chan_index ); 2919 } 2920 break; 2921 2922 case TGSI_OPCODE_SAD: 2923 assert (0); 2924 break; 2925 2926 case TGSI_OPCODE_TXF: 2927 assert (0); 2928 break; 2929 2930 case TGSI_OPCODE_TXQ: 2931 assert (0); 2932 break; 2933 2934 case TGSI_OPCODE_EMIT: 2935 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 2936 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 2937 break; 2938 2939 case TGSI_OPCODE_ENDPRIM: 2940 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 2941 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 2942 break; 2943 2944 case TGSI_OPCODE_LOOP: 2945 /* fall-through (for now) */ 2946 case TGSI_OPCODE_BGNLOOP2: 2947 /* push LoopMask and ContMasks */ 2948 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2949 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2950 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2951 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2952 break; 2953 2954 case TGSI_OPCODE_ENDLOOP: 2955 /* fall-through (for now at least) */ 2956 case TGSI_OPCODE_ENDLOOP2: 2957 /* Restore ContMask, but don't pop */ 2958 assert(mach->ContStackTop > 0); 2959 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 2960 UPDATE_EXEC_MASK(mach); 2961 if (mach->ExecMask) { 2962 /* repeat loop: jump to instruction just past BGNLOOP */ 2963 *pc = inst->InstructionExtLabel.Label + 1; 2964 } 2965 else { 2966 /* exit loop: pop LoopMask */ 2967 assert(mach->LoopStackTop > 0); 2968 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2969 /* pop ContMask */ 2970 assert(mach->ContStackTop > 0); 2971 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2972 } 2973 UPDATE_EXEC_MASK(mach); 2974 break; 2975 2976 case TGSI_OPCODE_BRK: 2977 /* turn off loop channels for each enabled exec channel */ 2978 mach->LoopMask &= ~mach->ExecMask; 2979 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2980 UPDATE_EXEC_MASK(mach); 2981 break; 2982 2983 case TGSI_OPCODE_CONT: 2984 /* turn off cont channels for each enabled exec channel */ 2985 mach->ContMask &= ~mach->ExecMask; 2986 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2987 UPDATE_EXEC_MASK(mach); 2988 break; 2989 2990 case TGSI_OPCODE_BGNSUB: 2991 /* no-op */ 2992 break; 2993 2994 case TGSI_OPCODE_ENDSUB: 2995 /* no-op */ 2996 break; 2997 2998 case TGSI_OPCODE_NOISE1: 2999 assert( 0 ); 3000 break; 3001 3002 case TGSI_OPCODE_NOISE2: 3003 assert( 0 ); 3004 break; 3005 3006 case TGSI_OPCODE_NOISE3: 3007 assert( 0 ); 3008 break; 3009 3010 case TGSI_OPCODE_NOISE4: 3011 assert( 0 ); 3012 break; 3013 3014 case TGSI_OPCODE_NOP: 3015 break; 3016 3017 default: 3018 assert( 0 ); 3019 } 3020} 3021 3022 3023/** 3024 * Run TGSI interpreter. 3025 * \return bitmask of "alive" quad components 3026 */ 3027uint 3028tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3029{ 3030 uint i; 3031 int pc = 0; 3032 3033 mach->CondMask = 0xf; 3034 mach->LoopMask = 0xf; 3035 mach->ContMask = 0xf; 3036 mach->FuncMask = 0xf; 3037 mach->ExecMask = 0xf; 3038 3039 mach->CondStackTop = 0; /* temporarily subvert this assertion */ 3040 assert(mach->CondStackTop == 0); 3041 assert(mach->LoopStackTop == 0); 3042 assert(mach->ContStackTop == 0); 3043 assert(mach->CallStackTop == 0); 3044 3045 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3046 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3047 3048 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3049 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3050 mach->Primitives[0] = 0; 3051 } 3052 3053 for (i = 0; i < QUAD_SIZE; i++) { 3054 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3055 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3056 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3057 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3058 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3059 } 3060 3061 /* execute declarations (interpolants) */ 3062 for (i = 0; i < mach->NumDeclarations; i++) { 3063 exec_declaration( mach, mach->Declarations+i ); 3064 } 3065 3066 /* execute instructions, until pc is set to -1 */ 3067 while (pc != -1) { 3068 assert(pc < (int) mach->NumInstructions); 3069 exec_instruction( mach, mach->Instructions + pc, &pc ); 3070 } 3071 3072#if 0 3073 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3074 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3075 /* 3076 * Scale back depth component. 3077 */ 3078 for (i = 0; i < 4; i++) 3079 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3080 } 3081#endif 3082 3083 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3084} 3085