tgsi_exec.c revision 848ab8be8c34b00b2afe6120882f8c29f047ced5
1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/************************************************************************** 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Permission is hereby granted, free of charge, to any person obtaining a 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * copy of this software and associated documentation files (the 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "Software"), to deal in the Software without restriction, including 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * without limitation the rights to use, copy, modify, merge, publish, 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * distribute, sub license, and/or sell copies of the Software, and to 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * permit persons to whom the Software is furnished to do so, subject to 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the following conditions: 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The above copyright notice and this permission notice (including the 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * next paragraph) shall be included in all copies or substantial portions 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of the Software. 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru **************************************************************************/ 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TGSI interpreter/executor. 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Flow control information: 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * care since a condition may be true for some quad components but false 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for other components. 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We basically execute all statements (even if they're in the part of 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * an IF/ELSE clause that's "not taken") and use a special mask to 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * control writing to destination registers. This is the ExecMask. 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * See store_dest(). 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The ExecMask is computed from three other masks (CondMask, LoopMask and 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ContMask) which are controlled by the flow control instructions (namely: 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Authors: 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Michal Krol 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Brian Paul 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "pipe/p_compiler.h" 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "pipe/p_state.h" 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "pipe/p_shader_tokens.h" 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "tgsi/tgsi_dump.h" 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "tgsi/tgsi_parse.h" 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "tgsi/tgsi_util.h" 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "tgsi_exec.h" 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util/u_memory.h" 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util/u_math.h" 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define FAST_MATH 1 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** for tgsi_full_instruction::Flags */ 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SOA_DEPENDENCY_FLAG 0x1 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TILE_TOP_LEFT 0 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TILE_TOP_RIGHT 1 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TILE_BOTTOM_LEFT 2 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TILE_BOTTOM_RIGHT 3 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define CHAN_X 0 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define CHAN_Y 1 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define CHAN_Z 2 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define CHAN_W 3 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Shorthand locations of various utility registers (_I = Index, _C = Channel) 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_128_I TGSI_EXEC_TEMP_128_I 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_128_C TGSI_EXEC_TEMP_128_C 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TEMP_R0 TGSI_EXEC_TEMP_R0 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define IS_CHANNEL_ENABLED(INST, CHAN)\ 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (IS_CHANNEL_ENABLED( INST, CHAN )) 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (IS_CHANNEL_ENABLED2( INST, CHAN )) 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** The execution mask depends on the conditional mask and the loop mask */ 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UPDATE_EXEC_MASK(MACH) \ 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const union tgsi_exec_channel ZeroVec = 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { { 0.0, 0.0, 0.0, 0.0 } }; 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef DEBUG 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 13785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hocheck_inf_or_nan(const union tgsi_exec_channel *chan) 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru assert(!util_is_inf_or_nan(chan->f[0])); 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru assert(!util_is_inf_or_nan(chan->f[1])); 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru assert(!util_is_inf_or_nan(chan->f[2])); 14285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho assert(!util_is_inf_or_nan(chan->f[3])); 14385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 14485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif 14585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 14685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 14785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef DEBUG 14885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprint_chan(const char *msg, const union tgsi_exec_channel *chan) 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru debug_printf("%s = {%f, %f, %f, %f}\n", 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef DEBUG 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprint_temp(const struct tgsi_exec_machine *mach, uint index) 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i; 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru debug_printf("Temp[%u] =\n", index); 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i = 0; i < 4; i++) { 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru debug_printf(" %c: { %f, %f, %f, %f }\n", 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "XYZW"[i], 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tmp->xyzw[i].f[0], 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tmp->xyzw[i].f[1], 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tmp->xyzw[i].f[2], 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tmp->xyzw[i].f[3]); 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Check if there's a potential src/dst register data dependency when 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * using SOA execution. 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Example: 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * MOV T, T.yxwz; 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This would expand into: 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * MOV t0, t1; 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * MOV t1, t0; 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * MOV t2, t3; 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * MOV t3, t2; 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The second instruction will have the wrong value for t0 if executed as-is. 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruboolean 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint i, chan; 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (writemask == TGSI_WRITEMASK_X || 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru writemask == TGSI_WRITEMASK_Y || 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru writemask == TGSI_WRITEMASK_Z || 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru writemask == TGSI_WRITEMASK_W || 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru writemask == TGSI_WRITEMASK_NONE) { 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no chance of data dependency */ 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* loop over src regs */ 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if ((inst->FullSrcRegisters[i].SrcRegister.File == 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru inst->FullDstRegisters[0].DstRegister.File) && 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (inst->FullSrcRegisters[i].SrcRegister.Index == 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru inst->FullDstRegisters[0].DstRegister.Index)) { 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* loop over dest channels */ 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint channelsWritten = 0x0; 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FOR_EACH_ENABLED_CHANNEL(*inst, chan) { 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check if we're reading a channel that's been written */ 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint swizzle = tgsi_util_get_full_src_register_extswizzle(&inst->FullSrcRegisters[i], chan); 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (swizzle <= TGSI_SWIZZLE_W && 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (channelsWritten & (1 << swizzle))) { 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru channelsWritten |= (1 << chan); 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Initialize machine state by expanding tokens to full instructions, 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * allocating temporary storage, setting up constants, etc. 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * After this, we can call tgsi_exec_machine_run() many times. 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutgsi_exec_machine_bind_shader( 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru struct tgsi_exec_machine *mach, 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const struct tgsi_token *tokens, 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint numSamplers, 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru struct tgsi_sampler **samplers) 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint k; 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru struct tgsi_parse_context parse; 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru struct tgsi_exec_labels *labels = &mach->Labels; 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru struct tgsi_full_instruction *instructions; 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru struct tgsi_full_declaration *declarations; 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint maxInstructions = 10, numInstructions = 0; 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint maxDeclarations = 10, numDeclarations = 0; 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint instno = 0; 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tgsi_dump(tokens, 0); 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru util_init_math(); 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Tokens = tokens; 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Samplers = samplers; 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru k = tgsi_parse_init (&parse, mach->Tokens); 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (k != TGSI_PARSE_OK) { 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru debug_printf( "Problem parsing!\n" ); 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Processor = parse.FullHeader.Processor.Processor; 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->ImmLimit = 0; 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru labels->count = 0; 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru declarations = (struct tgsi_full_declaration *) 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!declarations) { 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru instructions = (struct tgsi_full_instruction *) 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!instructions) { 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FREE( declarations ); 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while( !tgsi_parse_end_of_tokens( &parse ) ) { 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint pointer = parse.Position; 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint i; 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tgsi_parse_token( &parse ); 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch( parse.FullToken.Token.Type ) { 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case TGSI_TOKEN_TYPE_DECLARATION: 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* save expanded declaration */ 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (numDeclarations == maxDeclarations) { 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru declarations = REALLOC(declarations, 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru maxDeclarations 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sizeof(struct tgsi_full_declaration), 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (maxDeclarations + 10) 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sizeof(struct tgsi_full_declaration)); 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru maxDeclarations += 10; 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru memcpy(declarations + numDeclarations, 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &parse.FullToken.FullDeclaration, 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(declarations[0])); 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru numDeclarations++; 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case TGSI_TOKEN_TYPE_IMMEDIATE: 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru assert( size <= 4 ); 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for( i = 0; i < size; i++ ) { 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Imms[mach->ImmLimit][i] = 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru parse.FullToken.FullImmediate.u[i].Float; 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->ImmLimit += 1; 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case TGSI_TOKEN_TYPE_INSTRUCTION: 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru assert( labels->count < MAX_LABELS ); 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru labels->labels[labels->count][0] = instno; 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru labels->labels[labels->count][1] = pointer; 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru labels->count++; 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* save expanded instruction */ 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (numInstructions == maxInstructions) { 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru instructions = REALLOC(instructions, 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru maxInstructions 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sizeof(struct tgsi_full_instruction), 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (maxInstructions + 10) 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sizeof(struct tgsi_full_instruction)); 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru maxInstructions += 10; 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG; 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* XXX we only handle SOA dependencies properly for MOV/SWZ 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * at this time! 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opcode != TGSI_OPCODE_MOV && opcode != TGSI_OPCODE_SWZ) { 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru debug_printf("Warning: SOA dependency in instruction" 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru " is not handled:\n"); 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tgsi_dump_instruction(&parse.FullToken.FullInstruction, 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru numInstructions); 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru memcpy(instructions + numInstructions, 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &parse.FullToken.FullInstruction, 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(instructions[0])); 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru numInstructions++; 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru assert( 0 ); 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tgsi_parse_free (&parse); 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (mach->Declarations) { 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FREE( mach->Declarations ); 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Declarations = declarations; 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->NumDeclarations = numDeclarations; 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (mach->Instructions) { 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FREE( mach->Instructions ); 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Instructions = instructions; 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->NumInstructions = numInstructions; 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct tgsi_exec_machine * 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutgsi_exec_machine_create( void ) 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru struct tgsi_exec_machine *mach; 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint i; 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach = align_malloc( sizeof *mach, 16 ); 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!mach) 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto fail; 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru memset(mach, 0, sizeof(*mach)); 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Setup constants. */ 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for( i = 0; i < 4; i++ ) { 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef DEBUG 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* silence warnings */ 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (void) print_chan; 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (void) print_temp; 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return mach; 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querufail: 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru align_free(mach); 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (mach) { 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FREE(mach->Instructions); 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FREE(mach->Declarations); 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru align_free(mach); 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querumicro_abs( 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru union tgsi_exec_channel *dst, 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const union tgsi_exec_channel *src ) 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dst->f[0] = fabsf( src->f[0] ); 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dst->f[1] = fabsf( src->f[1] ); 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dst->f[2] = fabsf( src->f[2] ); 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dst->f[3] = fabsf( src->f[3] ); 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querumicro_add( 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru union tgsi_exec_channel *dst, 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const union tgsi_exec_channel *src0, 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const union tgsi_exec_channel *src1 ) 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dst->f[0] = src0->f[0] + src1->f[0]; 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dst->f[1] = src0->f[1] + src1->f[1]; 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dst->f[2] = src0->f[2] + src1->f[2]; 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dst->f[3] = src0->f[3] + src1->f[3]; 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querumicro_iadd( 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru union tgsi_exec_channel *dst, 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const union tgsi_exec_channel *src0, 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const union tgsi_exec_channel *src1 ) 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dst->i[0] = src0->i[0] + src1->i[0]; 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dst->i[1] = src0->i[1] + src1->i[1]; 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dst->i[2] = src0->i[2] + src1->i[2]; 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dst->i[3] = src0->i[3] + src1->i[3]; 464} 465#endif 466 467static void 468micro_and( 469 union tgsi_exec_channel *dst, 470 const union tgsi_exec_channel *src0, 471 const union tgsi_exec_channel *src1 ) 472{ 473 dst->u[0] = src0->u[0] & src1->u[0]; 474 dst->u[1] = src0->u[1] & src1->u[1]; 475 dst->u[2] = src0->u[2] & src1->u[2]; 476 dst->u[3] = src0->u[3] & src1->u[3]; 477} 478 479static void 480micro_ceil( 481 union tgsi_exec_channel *dst, 482 const union tgsi_exec_channel *src ) 483{ 484 dst->f[0] = ceilf( src->f[0] ); 485 dst->f[1] = ceilf( src->f[1] ); 486 dst->f[2] = ceilf( src->f[2] ); 487 dst->f[3] = ceilf( src->f[3] ); 488} 489 490static void 491micro_cos( 492 union tgsi_exec_channel *dst, 493 const union tgsi_exec_channel *src ) 494{ 495 dst->f[0] = cosf( src->f[0] ); 496 dst->f[1] = cosf( src->f[1] ); 497 dst->f[2] = cosf( src->f[2] ); 498 dst->f[3] = cosf( src->f[3] ); 499} 500 501static void 502micro_ddx( 503 union tgsi_exec_channel *dst, 504 const union tgsi_exec_channel *src ) 505{ 506 dst->f[0] = 507 dst->f[1] = 508 dst->f[2] = 509 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 510} 511 512static void 513micro_ddy( 514 union tgsi_exec_channel *dst, 515 const union tgsi_exec_channel *src ) 516{ 517 dst->f[0] = 518 dst->f[1] = 519 dst->f[2] = 520 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 521} 522 523static void 524micro_div( 525 union tgsi_exec_channel *dst, 526 const union tgsi_exec_channel *src0, 527 const union tgsi_exec_channel *src1 ) 528{ 529 if (src1->f[0] != 0) { 530 dst->f[0] = src0->f[0] / src1->f[0]; 531 } 532 if (src1->f[1] != 0) { 533 dst->f[1] = src0->f[1] / src1->f[1]; 534 } 535 if (src1->f[2] != 0) { 536 dst->f[2] = src0->f[2] / src1->f[2]; 537 } 538 if (src1->f[3] != 0) { 539 dst->f[3] = src0->f[3] / src1->f[3]; 540 } 541} 542 543#if 0 544static void 545micro_udiv( 546 union tgsi_exec_channel *dst, 547 const union tgsi_exec_channel *src0, 548 const union tgsi_exec_channel *src1 ) 549{ 550 dst->u[0] = src0->u[0] / src1->u[0]; 551 dst->u[1] = src0->u[1] / src1->u[1]; 552 dst->u[2] = src0->u[2] / src1->u[2]; 553 dst->u[3] = src0->u[3] / src1->u[3]; 554} 555#endif 556 557static void 558micro_eq( 559 union tgsi_exec_channel *dst, 560 const union tgsi_exec_channel *src0, 561 const union tgsi_exec_channel *src1, 562 const union tgsi_exec_channel *src2, 563 const union tgsi_exec_channel *src3 ) 564{ 565 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 566 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 567 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 568 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 569} 570 571#if 0 572static void 573micro_ieq( 574 union tgsi_exec_channel *dst, 575 const union tgsi_exec_channel *src0, 576 const union tgsi_exec_channel *src1, 577 const union tgsi_exec_channel *src2, 578 const union tgsi_exec_channel *src3 ) 579{ 580 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 581 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 582 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 583 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 584} 585#endif 586 587static void 588micro_exp2( 589 union tgsi_exec_channel *dst, 590 const union tgsi_exec_channel *src) 591{ 592#if FAST_MATH 593 dst->f[0] = util_fast_exp2( src->f[0] ); 594 dst->f[1] = util_fast_exp2( src->f[1] ); 595 dst->f[2] = util_fast_exp2( src->f[2] ); 596 dst->f[3] = util_fast_exp2( src->f[3] ); 597#else 598 dst->f[0] = powf( 2.0f, src->f[0] ); 599 dst->f[1] = powf( 2.0f, src->f[1] ); 600 dst->f[2] = powf( 2.0f, src->f[2] ); 601 dst->f[3] = powf( 2.0f, src->f[3] ); 602#endif 603} 604 605#if 0 606static void 607micro_f2ut( 608 union tgsi_exec_channel *dst, 609 const union tgsi_exec_channel *src ) 610{ 611 dst->u[0] = (uint) src->f[0]; 612 dst->u[1] = (uint) src->f[1]; 613 dst->u[2] = (uint) src->f[2]; 614 dst->u[3] = (uint) src->f[3]; 615} 616#endif 617 618static void 619micro_float_clamp(union tgsi_exec_channel *dst, 620 const union tgsi_exec_channel *src) 621{ 622 uint i; 623 624 for (i = 0; i < 4; i++) { 625 if (src->f[i] > 0.0f) { 626 if (src->f[i] > 1.884467e+019f) 627 dst->f[i] = 1.884467e+019f; 628 else if (src->f[i] < 5.42101e-020f) 629 dst->f[i] = 5.42101e-020f; 630 else 631 dst->f[i] = src->f[i]; 632 } 633 else { 634 if (src->f[i] < -1.884467e+019f) 635 dst->f[i] = -1.884467e+019f; 636 else if (src->f[i] > -5.42101e-020f) 637 dst->f[i] = -5.42101e-020f; 638 else 639 dst->f[i] = src->f[i]; 640 } 641 } 642} 643 644static void 645micro_flr( 646 union tgsi_exec_channel *dst, 647 const union tgsi_exec_channel *src ) 648{ 649 dst->f[0] = floorf( src->f[0] ); 650 dst->f[1] = floorf( src->f[1] ); 651 dst->f[2] = floorf( src->f[2] ); 652 dst->f[3] = floorf( src->f[3] ); 653} 654 655static void 656micro_frc( 657 union tgsi_exec_channel *dst, 658 const union tgsi_exec_channel *src ) 659{ 660 dst->f[0] = src->f[0] - floorf( src->f[0] ); 661 dst->f[1] = src->f[1] - floorf( src->f[1] ); 662 dst->f[2] = src->f[2] - floorf( src->f[2] ); 663 dst->f[3] = src->f[3] - floorf( src->f[3] ); 664} 665 666static void 667micro_i2f( 668 union tgsi_exec_channel *dst, 669 const union tgsi_exec_channel *src ) 670{ 671 dst->f[0] = (float) src->i[0]; 672 dst->f[1] = (float) src->i[1]; 673 dst->f[2] = (float) src->i[2]; 674 dst->f[3] = (float) src->i[3]; 675} 676 677static void 678micro_lg2( 679 union tgsi_exec_channel *dst, 680 const union tgsi_exec_channel *src ) 681{ 682#if FAST_MATH 683 dst->f[0] = util_fast_log2( src->f[0] ); 684 dst->f[1] = util_fast_log2( src->f[1] ); 685 dst->f[2] = util_fast_log2( src->f[2] ); 686 dst->f[3] = util_fast_log2( src->f[3] ); 687#else 688 dst->f[0] = logf( src->f[0] ) * 1.442695f; 689 dst->f[1] = logf( src->f[1] ) * 1.442695f; 690 dst->f[2] = logf( src->f[2] ) * 1.442695f; 691 dst->f[3] = logf( src->f[3] ) * 1.442695f; 692#endif 693} 694 695static void 696micro_le( 697 union tgsi_exec_channel *dst, 698 const union tgsi_exec_channel *src0, 699 const union tgsi_exec_channel *src1, 700 const union tgsi_exec_channel *src2, 701 const union tgsi_exec_channel *src3 ) 702{ 703 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 704 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 705 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 706 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 707} 708 709static void 710micro_lt( 711 union tgsi_exec_channel *dst, 712 const union tgsi_exec_channel *src0, 713 const union tgsi_exec_channel *src1, 714 const union tgsi_exec_channel *src2, 715 const union tgsi_exec_channel *src3 ) 716{ 717 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 718 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 719 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 720 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 721} 722 723#if 0 724static void 725micro_ilt( 726 union tgsi_exec_channel *dst, 727 const union tgsi_exec_channel *src0, 728 const union tgsi_exec_channel *src1, 729 const union tgsi_exec_channel *src2, 730 const union tgsi_exec_channel *src3 ) 731{ 732 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 733 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 734 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 735 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 736} 737#endif 738 739#if 0 740static void 741micro_ult( 742 union tgsi_exec_channel *dst, 743 const union tgsi_exec_channel *src0, 744 const union tgsi_exec_channel *src1, 745 const union tgsi_exec_channel *src2, 746 const union tgsi_exec_channel *src3 ) 747{ 748 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 749 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 750 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 751 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 752} 753#endif 754 755static void 756micro_max( 757 union tgsi_exec_channel *dst, 758 const union tgsi_exec_channel *src0, 759 const union tgsi_exec_channel *src1 ) 760{ 761 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 762 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 763 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 764 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 765} 766 767#if 0 768static void 769micro_imax( 770 union tgsi_exec_channel *dst, 771 const union tgsi_exec_channel *src0, 772 const union tgsi_exec_channel *src1 ) 773{ 774 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 775 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 776 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 777 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 778} 779#endif 780 781#if 0 782static void 783micro_umax( 784 union tgsi_exec_channel *dst, 785 const union tgsi_exec_channel *src0, 786 const union tgsi_exec_channel *src1 ) 787{ 788 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 789 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 790 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 791 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 792} 793#endif 794 795static void 796micro_min( 797 union tgsi_exec_channel *dst, 798 const union tgsi_exec_channel *src0, 799 const union tgsi_exec_channel *src1 ) 800{ 801 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 802 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 803 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 804 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 805} 806 807#if 0 808static void 809micro_imin( 810 union tgsi_exec_channel *dst, 811 const union tgsi_exec_channel *src0, 812 const union tgsi_exec_channel *src1 ) 813{ 814 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 815 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 816 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 817 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 818} 819#endif 820 821#if 0 822static void 823micro_umin( 824 union tgsi_exec_channel *dst, 825 const union tgsi_exec_channel *src0, 826 const union tgsi_exec_channel *src1 ) 827{ 828 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 829 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 830 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 831 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 832} 833#endif 834 835#if 0 836static void 837micro_umod( 838 union tgsi_exec_channel *dst, 839 const union tgsi_exec_channel *src0, 840 const union tgsi_exec_channel *src1 ) 841{ 842 dst->u[0] = src0->u[0] % src1->u[0]; 843 dst->u[1] = src0->u[1] % src1->u[1]; 844 dst->u[2] = src0->u[2] % src1->u[2]; 845 dst->u[3] = src0->u[3] % src1->u[3]; 846} 847#endif 848 849static void 850micro_mul( 851 union tgsi_exec_channel *dst, 852 const union tgsi_exec_channel *src0, 853 const union tgsi_exec_channel *src1 ) 854{ 855 dst->f[0] = src0->f[0] * src1->f[0]; 856 dst->f[1] = src0->f[1] * src1->f[1]; 857 dst->f[2] = src0->f[2] * src1->f[2]; 858 dst->f[3] = src0->f[3] * src1->f[3]; 859} 860 861#if 0 862static void 863micro_imul( 864 union tgsi_exec_channel *dst, 865 const union tgsi_exec_channel *src0, 866 const union tgsi_exec_channel *src1 ) 867{ 868 dst->i[0] = src0->i[0] * src1->i[0]; 869 dst->i[1] = src0->i[1] * src1->i[1]; 870 dst->i[2] = src0->i[2] * src1->i[2]; 871 dst->i[3] = src0->i[3] * src1->i[3]; 872} 873#endif 874 875#if 0 876static void 877micro_imul64( 878 union tgsi_exec_channel *dst0, 879 union tgsi_exec_channel *dst1, 880 const union tgsi_exec_channel *src0, 881 const union tgsi_exec_channel *src1 ) 882{ 883 dst1->i[0] = src0->i[0] * src1->i[0]; 884 dst1->i[1] = src0->i[1] * src1->i[1]; 885 dst1->i[2] = src0->i[2] * src1->i[2]; 886 dst1->i[3] = src0->i[3] * src1->i[3]; 887 dst0->i[0] = 0; 888 dst0->i[1] = 0; 889 dst0->i[2] = 0; 890 dst0->i[3] = 0; 891} 892#endif 893 894#if 0 895static void 896micro_umul64( 897 union tgsi_exec_channel *dst0, 898 union tgsi_exec_channel *dst1, 899 const union tgsi_exec_channel *src0, 900 const union tgsi_exec_channel *src1 ) 901{ 902 dst1->u[0] = src0->u[0] * src1->u[0]; 903 dst1->u[1] = src0->u[1] * src1->u[1]; 904 dst1->u[2] = src0->u[2] * src1->u[2]; 905 dst1->u[3] = src0->u[3] * src1->u[3]; 906 dst0->u[0] = 0; 907 dst0->u[1] = 0; 908 dst0->u[2] = 0; 909 dst0->u[3] = 0; 910} 911#endif 912 913 914#if 0 915static void 916micro_movc( 917 union tgsi_exec_channel *dst, 918 const union tgsi_exec_channel *src0, 919 const union tgsi_exec_channel *src1, 920 const union tgsi_exec_channel *src2 ) 921{ 922 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 923 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 924 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 925 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 926} 927#endif 928 929static void 930micro_neg( 931 union tgsi_exec_channel *dst, 932 const union tgsi_exec_channel *src ) 933{ 934 dst->f[0] = -src->f[0]; 935 dst->f[1] = -src->f[1]; 936 dst->f[2] = -src->f[2]; 937 dst->f[3] = -src->f[3]; 938} 939 940#if 0 941static void 942micro_ineg( 943 union tgsi_exec_channel *dst, 944 const union tgsi_exec_channel *src ) 945{ 946 dst->i[0] = -src->i[0]; 947 dst->i[1] = -src->i[1]; 948 dst->i[2] = -src->i[2]; 949 dst->i[3] = -src->i[3]; 950} 951#endif 952 953static void 954micro_not( 955 union tgsi_exec_channel *dst, 956 const union tgsi_exec_channel *src ) 957{ 958 dst->u[0] = ~src->u[0]; 959 dst->u[1] = ~src->u[1]; 960 dst->u[2] = ~src->u[2]; 961 dst->u[3] = ~src->u[3]; 962} 963 964static void 965micro_or( 966 union tgsi_exec_channel *dst, 967 const union tgsi_exec_channel *src0, 968 const union tgsi_exec_channel *src1 ) 969{ 970 dst->u[0] = src0->u[0] | src1->u[0]; 971 dst->u[1] = src0->u[1] | src1->u[1]; 972 dst->u[2] = src0->u[2] | src1->u[2]; 973 dst->u[3] = src0->u[3] | src1->u[3]; 974} 975 976static void 977micro_pow( 978 union tgsi_exec_channel *dst, 979 const union tgsi_exec_channel *src0, 980 const union tgsi_exec_channel *src1 ) 981{ 982#if FAST_MATH 983 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 984 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 985 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 986 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 987#else 988 dst->f[0] = powf( src0->f[0], src1->f[0] ); 989 dst->f[1] = powf( src0->f[1], src1->f[1] ); 990 dst->f[2] = powf( src0->f[2], src1->f[2] ); 991 dst->f[3] = powf( src0->f[3], src1->f[3] ); 992#endif 993} 994 995static void 996micro_rnd( 997 union tgsi_exec_channel *dst, 998 const union tgsi_exec_channel *src ) 999{ 1000 dst->f[0] = floorf( src->f[0] + 0.5f ); 1001 dst->f[1] = floorf( src->f[1] + 0.5f ); 1002 dst->f[2] = floorf( src->f[2] + 0.5f ); 1003 dst->f[3] = floorf( src->f[3] + 0.5f ); 1004} 1005 1006static void 1007micro_sgn( 1008 union tgsi_exec_channel *dst, 1009 const union tgsi_exec_channel *src ) 1010{ 1011 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 1012 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 1013 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 1014 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 1015} 1016 1017static void 1018micro_shl( 1019 union tgsi_exec_channel *dst, 1020 const union tgsi_exec_channel *src0, 1021 const union tgsi_exec_channel *src1 ) 1022{ 1023 dst->i[0] = src0->i[0] << src1->i[0]; 1024 dst->i[1] = src0->i[1] << src1->i[1]; 1025 dst->i[2] = src0->i[2] << src1->i[2]; 1026 dst->i[3] = src0->i[3] << src1->i[3]; 1027} 1028 1029static void 1030micro_ishr( 1031 union tgsi_exec_channel *dst, 1032 const union tgsi_exec_channel *src0, 1033 const union tgsi_exec_channel *src1 ) 1034{ 1035 dst->i[0] = src0->i[0] >> src1->i[0]; 1036 dst->i[1] = src0->i[1] >> src1->i[1]; 1037 dst->i[2] = src0->i[2] >> src1->i[2]; 1038 dst->i[3] = src0->i[3] >> src1->i[3]; 1039} 1040 1041static void 1042micro_trunc( 1043 union tgsi_exec_channel *dst, 1044 const union tgsi_exec_channel *src0 ) 1045{ 1046 dst->f[0] = (float) (int) src0->f[0]; 1047 dst->f[1] = (float) (int) src0->f[1]; 1048 dst->f[2] = (float) (int) src0->f[2]; 1049 dst->f[3] = (float) (int) src0->f[3]; 1050} 1051 1052#if 0 1053static void 1054micro_ushr( 1055 union tgsi_exec_channel *dst, 1056 const union tgsi_exec_channel *src0, 1057 const union tgsi_exec_channel *src1 ) 1058{ 1059 dst->u[0] = src0->u[0] >> src1->u[0]; 1060 dst->u[1] = src0->u[1] >> src1->u[1]; 1061 dst->u[2] = src0->u[2] >> src1->u[2]; 1062 dst->u[3] = src0->u[3] >> src1->u[3]; 1063} 1064#endif 1065 1066static void 1067micro_sin( 1068 union tgsi_exec_channel *dst, 1069 const union tgsi_exec_channel *src ) 1070{ 1071 dst->f[0] = sinf( src->f[0] ); 1072 dst->f[1] = sinf( src->f[1] ); 1073 dst->f[2] = sinf( src->f[2] ); 1074 dst->f[3] = sinf( src->f[3] ); 1075} 1076 1077static void 1078micro_sqrt( union tgsi_exec_channel *dst, 1079 const union tgsi_exec_channel *src ) 1080{ 1081 dst->f[0] = sqrtf( src->f[0] ); 1082 dst->f[1] = sqrtf( src->f[1] ); 1083 dst->f[2] = sqrtf( src->f[2] ); 1084 dst->f[3] = sqrtf( src->f[3] ); 1085} 1086 1087static void 1088micro_sub( 1089 union tgsi_exec_channel *dst, 1090 const union tgsi_exec_channel *src0, 1091 const union tgsi_exec_channel *src1 ) 1092{ 1093 dst->f[0] = src0->f[0] - src1->f[0]; 1094 dst->f[1] = src0->f[1] - src1->f[1]; 1095 dst->f[2] = src0->f[2] - src1->f[2]; 1096 dst->f[3] = src0->f[3] - src1->f[3]; 1097} 1098 1099#if 0 1100static void 1101micro_u2f( 1102 union tgsi_exec_channel *dst, 1103 const union tgsi_exec_channel *src ) 1104{ 1105 dst->f[0] = (float) src->u[0]; 1106 dst->f[1] = (float) src->u[1]; 1107 dst->f[2] = (float) src->u[2]; 1108 dst->f[3] = (float) src->u[3]; 1109} 1110#endif 1111 1112static void 1113micro_xor( 1114 union tgsi_exec_channel *dst, 1115 const union tgsi_exec_channel *src0, 1116 const union tgsi_exec_channel *src1 ) 1117{ 1118 dst->u[0] = src0->u[0] ^ src1->u[0]; 1119 dst->u[1] = src0->u[1] ^ src1->u[1]; 1120 dst->u[2] = src0->u[2] ^ src1->u[2]; 1121 dst->u[3] = src0->u[3] ^ src1->u[3]; 1122} 1123 1124static void 1125fetch_src_file_channel( 1126 const struct tgsi_exec_machine *mach, 1127 const uint file, 1128 const uint swizzle, 1129 const union tgsi_exec_channel *index, 1130 union tgsi_exec_channel *chan ) 1131{ 1132 switch( swizzle ) { 1133 case TGSI_EXTSWIZZLE_X: 1134 case TGSI_EXTSWIZZLE_Y: 1135 case TGSI_EXTSWIZZLE_Z: 1136 case TGSI_EXTSWIZZLE_W: 1137 switch( file ) { 1138 case TGSI_FILE_CONSTANT: 1139 assert(mach->Consts); 1140 if (index->i[0] < 0) 1141 chan->f[0] = 0.0f; 1142 else 1143 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 1144 if (index->i[1] < 0) 1145 chan->f[1] = 0.0f; 1146 else 1147 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 1148 if (index->i[2] < 0) 1149 chan->f[2] = 0.0f; 1150 else 1151 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 1152 if (index->i[3] < 0) 1153 chan->f[3] = 0.0f; 1154 else 1155 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1156 break; 1157 1158 case TGSI_FILE_INPUT: 1159 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1160 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1161 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1162 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1163 break; 1164 1165 case TGSI_FILE_TEMPORARY: 1166 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1167 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1168 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1169 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1170 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1171 break; 1172 1173 case TGSI_FILE_IMMEDIATE: 1174 assert( index->i[0] < (int) mach->ImmLimit ); 1175 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1176 assert( index->i[1] < (int) mach->ImmLimit ); 1177 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1178 assert( index->i[2] < (int) mach->ImmLimit ); 1179 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1180 assert( index->i[3] < (int) mach->ImmLimit ); 1181 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1182 break; 1183 1184 case TGSI_FILE_ADDRESS: 1185 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1186 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1187 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1188 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1189 break; 1190 1191 case TGSI_FILE_OUTPUT: 1192 /* vertex/fragment output vars can be read too */ 1193 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1194 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1195 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1196 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1197 break; 1198 1199 default: 1200 assert( 0 ); 1201 } 1202 break; 1203 1204 case TGSI_EXTSWIZZLE_ZERO: 1205 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; 1206 break; 1207 1208 case TGSI_EXTSWIZZLE_ONE: 1209 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; 1210 break; 1211 1212 default: 1213 assert( 0 ); 1214 } 1215} 1216 1217static void 1218fetch_source( 1219 const struct tgsi_exec_machine *mach, 1220 union tgsi_exec_channel *chan, 1221 const struct tgsi_full_src_register *reg, 1222 const uint chan_index ) 1223{ 1224 union tgsi_exec_channel index; 1225 uint swizzle; 1226 1227 /* We start with a direct index into a register file. 1228 * 1229 * file[1], 1230 * where: 1231 * file = SrcRegister.File 1232 * [1] = SrcRegister.Index 1233 */ 1234 index.i[0] = 1235 index.i[1] = 1236 index.i[2] = 1237 index.i[3] = reg->SrcRegister.Index; 1238 1239 /* There is an extra source register that indirectly subscripts 1240 * a register file. The direct index now becomes an offset 1241 * that is being added to the indirect register. 1242 * 1243 * file[ind[2].x+1], 1244 * where: 1245 * ind = SrcRegisterInd.File 1246 * [2] = SrcRegisterInd.Index 1247 * .x = SrcRegisterInd.SwizzleX 1248 */ 1249 if (reg->SrcRegister.Indirect) { 1250 union tgsi_exec_channel index2; 1251 union tgsi_exec_channel indir_index; 1252 const uint execmask = mach->ExecMask; 1253 uint i; 1254 1255 /* which address register (always zero now) */ 1256 index2.i[0] = 1257 index2.i[1] = 1258 index2.i[2] = 1259 index2.i[3] = reg->SrcRegisterInd.Index; 1260 1261 /* get current value of address register[swizzle] */ 1262 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1263 fetch_src_file_channel( 1264 mach, 1265 reg->SrcRegisterInd.File, 1266 swizzle, 1267 &index2, 1268 &indir_index ); 1269 1270 /* add value of address register to the offset */ 1271 index.i[0] += (int) indir_index.f[0]; 1272 index.i[1] += (int) indir_index.f[1]; 1273 index.i[2] += (int) indir_index.f[2]; 1274 index.i[3] += (int) indir_index.f[3]; 1275 1276 /* for disabled execution channels, zero-out the index to 1277 * avoid using a potential garbage value. 1278 */ 1279 for (i = 0; i < QUAD_SIZE; i++) { 1280 if ((execmask & (1 << i)) == 0) 1281 index.i[i] = 0; 1282 } 1283 } 1284 1285 /* There is an extra source register that is a second 1286 * subscript to a register file. Effectively it means that 1287 * the register file is actually a 2D array of registers. 1288 * 1289 * file[1][3] == file[1*sizeof(file[1])+3], 1290 * where: 1291 * [3] = SrcRegisterDim.Index 1292 */ 1293 if (reg->SrcRegister.Dimension) { 1294 /* The size of the first-order array depends on the register file type. 1295 * We need to multiply the index to the first array to get an effective, 1296 * "flat" index that points to the beginning of the second-order array. 1297 */ 1298 switch (reg->SrcRegister.File) { 1299 case TGSI_FILE_INPUT: 1300 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1301 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1302 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1303 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1304 break; 1305 case TGSI_FILE_CONSTANT: 1306 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1307 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1308 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1309 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1310 break; 1311 default: 1312 assert( 0 ); 1313 } 1314 1315 index.i[0] += reg->SrcRegisterDim.Index; 1316 index.i[1] += reg->SrcRegisterDim.Index; 1317 index.i[2] += reg->SrcRegisterDim.Index; 1318 index.i[3] += reg->SrcRegisterDim.Index; 1319 1320 /* Again, the second subscript index can be addressed indirectly 1321 * identically to the first one. 1322 * Nothing stops us from indirectly addressing the indirect register, 1323 * but there is no need for that, so we won't exercise it. 1324 * 1325 * file[1][ind[4].y+3], 1326 * where: 1327 * ind = SrcRegisterDimInd.File 1328 * [4] = SrcRegisterDimInd.Index 1329 * .y = SrcRegisterDimInd.SwizzleX 1330 */ 1331 if (reg->SrcRegisterDim.Indirect) { 1332 union tgsi_exec_channel index2; 1333 union tgsi_exec_channel indir_index; 1334 const uint execmask = mach->ExecMask; 1335 uint i; 1336 1337 index2.i[0] = 1338 index2.i[1] = 1339 index2.i[2] = 1340 index2.i[3] = reg->SrcRegisterDimInd.Index; 1341 1342 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1343 fetch_src_file_channel( 1344 mach, 1345 reg->SrcRegisterDimInd.File, 1346 swizzle, 1347 &index2, 1348 &indir_index ); 1349 1350 index.i[0] += (int) indir_index.f[0]; 1351 index.i[1] += (int) indir_index.f[1]; 1352 index.i[2] += (int) indir_index.f[2]; 1353 index.i[3] += (int) indir_index.f[3]; 1354 1355 /* for disabled execution channels, zero-out the index to 1356 * avoid using a potential garbage value. 1357 */ 1358 for (i = 0; i < QUAD_SIZE; i++) { 1359 if ((execmask & (1 << i)) == 0) 1360 index.i[i] = 0; 1361 } 1362 } 1363 1364 /* If by any chance there was a need for a 3D array of register 1365 * files, we would have to check whether SrcRegisterDim is followed 1366 * by a dimension register and continue the saga. 1367 */ 1368 } 1369 1370 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); 1371 fetch_src_file_channel( 1372 mach, 1373 reg->SrcRegister.File, 1374 swizzle, 1375 &index, 1376 chan ); 1377 1378 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1379 case TGSI_UTIL_SIGN_CLEAR: 1380 micro_abs( chan, chan ); 1381 break; 1382 1383 case TGSI_UTIL_SIGN_SET: 1384 micro_abs( chan, chan ); 1385 micro_neg( chan, chan ); 1386 break; 1387 1388 case TGSI_UTIL_SIGN_TOGGLE: 1389 micro_neg( chan, chan ); 1390 break; 1391 1392 case TGSI_UTIL_SIGN_KEEP: 1393 break; 1394 } 1395 1396 if (reg->SrcRegisterExtMod.Complement) { 1397 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1398 } 1399} 1400 1401static void 1402store_dest( 1403 struct tgsi_exec_machine *mach, 1404 const union tgsi_exec_channel *chan, 1405 const struct tgsi_full_dst_register *reg, 1406 const struct tgsi_full_instruction *inst, 1407 uint chan_index ) 1408{ 1409 uint i; 1410 union tgsi_exec_channel null; 1411 union tgsi_exec_channel *dst; 1412 uint execmask = mach->ExecMask; 1413 int offset = 0; /* indirection offset */ 1414 int index; 1415 1416#ifdef DEBUG 1417 check_inf_or_nan(chan); 1418#endif 1419 1420 /* There is an extra source register that indirectly subscripts 1421 * a register file. The direct index now becomes an offset 1422 * that is being added to the indirect register. 1423 * 1424 * file[ind[2].x+1], 1425 * where: 1426 * ind = DstRegisterInd.File 1427 * [2] = DstRegisterInd.Index 1428 * .x = DstRegisterInd.SwizzleX 1429 */ 1430 if (reg->DstRegister.Indirect) { 1431 union tgsi_exec_channel index; 1432 union tgsi_exec_channel indir_index; 1433 uint swizzle; 1434 1435 /* which address register (always zero for now) */ 1436 index.i[0] = 1437 index.i[1] = 1438 index.i[2] = 1439 index.i[3] = reg->DstRegisterInd.Index; 1440 1441 /* get current value of address register[swizzle] */ 1442 swizzle = tgsi_util_get_src_register_swizzle( ®->DstRegisterInd, CHAN_X ); 1443 1444 /* fetch values from the address/indirection register */ 1445 fetch_src_file_channel( 1446 mach, 1447 reg->DstRegisterInd.File, 1448 swizzle, 1449 &index, 1450 &indir_index ); 1451 1452 /* save indirection offset */ 1453 offset = (int) indir_index.f[0]; 1454 } 1455 1456 switch (reg->DstRegister.File) { 1457 case TGSI_FILE_NULL: 1458 dst = &null; 1459 break; 1460 1461 case TGSI_FILE_OUTPUT: 1462 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1463 + reg->DstRegister.Index; 1464 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1465 break; 1466 1467 case TGSI_FILE_TEMPORARY: 1468 index = reg->DstRegister.Index; 1469 assert( index < TGSI_EXEC_NUM_TEMPS ); 1470 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1471 break; 1472 1473 case TGSI_FILE_ADDRESS: 1474 index = reg->DstRegister.Index; 1475 dst = &mach->Addrs[index].xyzw[chan_index]; 1476 break; 1477 1478 default: 1479 assert( 0 ); 1480 return; 1481 } 1482 1483 if (inst->InstructionExtNv.CondFlowEnable) { 1484 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1485 uint swizzle; 1486 uint shift; 1487 uint mask; 1488 uint test; 1489 1490 /* Only CC0 supported. 1491 */ 1492 assert( inst->InstructionExtNv.CondFlowIndex < 1 ); 1493 1494 switch (chan_index) { 1495 case CHAN_X: 1496 swizzle = inst->InstructionExtNv.CondSwizzleX; 1497 break; 1498 case CHAN_Y: 1499 swizzle = inst->InstructionExtNv.CondSwizzleY; 1500 break; 1501 case CHAN_Z: 1502 swizzle = inst->InstructionExtNv.CondSwizzleZ; 1503 break; 1504 case CHAN_W: 1505 swizzle = inst->InstructionExtNv.CondSwizzleW; 1506 break; 1507 default: 1508 assert( 0 ); 1509 return; 1510 } 1511 1512 switch (swizzle) { 1513 case TGSI_SWIZZLE_X: 1514 shift = TGSI_EXEC_CC_X_SHIFT; 1515 mask = TGSI_EXEC_CC_X_MASK; 1516 break; 1517 case TGSI_SWIZZLE_Y: 1518 shift = TGSI_EXEC_CC_Y_SHIFT; 1519 mask = TGSI_EXEC_CC_Y_MASK; 1520 break; 1521 case TGSI_SWIZZLE_Z: 1522 shift = TGSI_EXEC_CC_Z_SHIFT; 1523 mask = TGSI_EXEC_CC_Z_MASK; 1524 break; 1525 case TGSI_SWIZZLE_W: 1526 shift = TGSI_EXEC_CC_W_SHIFT; 1527 mask = TGSI_EXEC_CC_W_MASK; 1528 break; 1529 default: 1530 assert( 0 ); 1531 return; 1532 } 1533 1534 switch (inst->InstructionExtNv.CondMask) { 1535 case TGSI_CC_GT: 1536 test = ~(TGSI_EXEC_CC_GT << shift) & mask; 1537 for (i = 0; i < QUAD_SIZE; i++) 1538 if (cc->u[i] & test) 1539 execmask &= ~(1 << i); 1540 break; 1541 1542 case TGSI_CC_EQ: 1543 test = ~(TGSI_EXEC_CC_EQ << shift) & mask; 1544 for (i = 0; i < QUAD_SIZE; i++) 1545 if (cc->u[i] & test) 1546 execmask &= ~(1 << i); 1547 break; 1548 1549 case TGSI_CC_LT: 1550 test = ~(TGSI_EXEC_CC_LT << shift) & mask; 1551 for (i = 0; i < QUAD_SIZE; i++) 1552 if (cc->u[i] & test) 1553 execmask &= ~(1 << i); 1554 break; 1555 1556 case TGSI_CC_GE: 1557 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; 1558 for (i = 0; i < QUAD_SIZE; i++) 1559 if (cc->u[i] & test) 1560 execmask &= ~(1 << i); 1561 break; 1562 1563 case TGSI_CC_LE: 1564 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; 1565 for (i = 0; i < QUAD_SIZE; i++) 1566 if (cc->u[i] & test) 1567 execmask &= ~(1 << i); 1568 break; 1569 1570 case TGSI_CC_NE: 1571 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; 1572 for (i = 0; i < QUAD_SIZE; i++) 1573 if (cc->u[i] & test) 1574 execmask &= ~(1 << i); 1575 break; 1576 1577 case TGSI_CC_TR: 1578 break; 1579 1580 case TGSI_CC_FL: 1581 for (i = 0; i < QUAD_SIZE; i++) 1582 execmask &= ~(1 << i); 1583 break; 1584 1585 default: 1586 assert( 0 ); 1587 return; 1588 } 1589 } 1590 1591 switch (inst->Instruction.Saturate) { 1592 case TGSI_SAT_NONE: 1593 for (i = 0; i < QUAD_SIZE; i++) 1594 if (execmask & (1 << i)) 1595 dst->i[i] = chan->i[i]; 1596 break; 1597 1598 case TGSI_SAT_ZERO_ONE: 1599 for (i = 0; i < QUAD_SIZE; i++) 1600 if (execmask & (1 << i)) { 1601 if (chan->f[i] < 0.0f) 1602 dst->f[i] = 0.0f; 1603 else if (chan->f[i] > 1.0f) 1604 dst->f[i] = 1.0f; 1605 else 1606 dst->i[i] = chan->i[i]; 1607 } 1608 break; 1609 1610 case TGSI_SAT_MINUS_PLUS_ONE: 1611 for (i = 0; i < QUAD_SIZE; i++) 1612 if (execmask & (1 << i)) { 1613 if (chan->f[i] < -1.0f) 1614 dst->f[i] = -1.0f; 1615 else if (chan->f[i] > 1.0f) 1616 dst->f[i] = 1.0f; 1617 else 1618 dst->i[i] = chan->i[i]; 1619 } 1620 break; 1621 1622 default: 1623 assert( 0 ); 1624 } 1625 1626 if (inst->InstructionExtNv.CondDstUpdate) { 1627 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1628 uint shift; 1629 uint mask; 1630 1631 /* Only CC0 supported. 1632 */ 1633 assert( inst->InstructionExtNv.CondDstIndex < 1 ); 1634 1635 switch (chan_index) { 1636 case CHAN_X: 1637 shift = TGSI_EXEC_CC_X_SHIFT; 1638 mask = ~TGSI_EXEC_CC_X_MASK; 1639 break; 1640 case CHAN_Y: 1641 shift = TGSI_EXEC_CC_Y_SHIFT; 1642 mask = ~TGSI_EXEC_CC_Y_MASK; 1643 break; 1644 case CHAN_Z: 1645 shift = TGSI_EXEC_CC_Z_SHIFT; 1646 mask = ~TGSI_EXEC_CC_Z_MASK; 1647 break; 1648 case CHAN_W: 1649 shift = TGSI_EXEC_CC_W_SHIFT; 1650 mask = ~TGSI_EXEC_CC_W_MASK; 1651 break; 1652 default: 1653 assert( 0 ); 1654 return; 1655 } 1656 1657 for (i = 0; i < QUAD_SIZE; i++) 1658 if (execmask & (1 << i)) { 1659 cc->u[i] &= mask; 1660 if (dst->f[i] < 0.0f) 1661 cc->u[i] |= TGSI_EXEC_CC_LT << shift; 1662 else if (dst->f[i] > 0.0f) 1663 cc->u[i] |= TGSI_EXEC_CC_GT << shift; 1664 else if (dst->f[i] == 0.0f) 1665 cc->u[i] |= TGSI_EXEC_CC_EQ << shift; 1666 else 1667 cc->u[i] |= TGSI_EXEC_CC_UN << shift; 1668 } 1669 } 1670} 1671 1672#define FETCH(VAL,INDEX,CHAN)\ 1673 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1674 1675#define STORE(VAL,INDEX,CHAN)\ 1676 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1677 1678 1679/** 1680 * Execute ARB-style KIL which is predicated by a src register. 1681 * Kill fragment if any of the four values is less than zero. 1682 */ 1683static void 1684exec_kil(struct tgsi_exec_machine *mach, 1685 const struct tgsi_full_instruction *inst) 1686{ 1687 uint uniquemask; 1688 uint chan_index; 1689 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1690 union tgsi_exec_channel r[1]; 1691 1692 /* This mask stores component bits that were already tested. Note that 1693 * we test if the value is less than zero, so 1.0 and 0.0 need not to be 1694 * tested. */ 1695 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); 1696 1697 for (chan_index = 0; chan_index < 4; chan_index++) 1698 { 1699 uint swizzle; 1700 uint i; 1701 1702 /* unswizzle channel */ 1703 swizzle = tgsi_util_get_full_src_register_extswizzle ( 1704 &inst->FullSrcRegisters[0], 1705 chan_index); 1706 1707 /* check if the component has not been already tested */ 1708 if (uniquemask & (1 << swizzle)) 1709 continue; 1710 uniquemask |= 1 << swizzle; 1711 1712 FETCH(&r[0], 0, chan_index); 1713 for (i = 0; i < 4; i++) 1714 if (r[0].f[i] < 0.0f) 1715 kilmask |= 1 << i; 1716 } 1717 1718 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1719} 1720 1721/** 1722 * Execute NVIDIA-style KIL which is predicated by a condition code. 1723 * Kill fragment if the condition code is TRUE. 1724 */ 1725static void 1726exec_kilp(struct tgsi_exec_machine *mach, 1727 const struct tgsi_full_instruction *inst) 1728{ 1729 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1730 1731 if (inst->InstructionExtNv.CondFlowEnable) { 1732 uint swizzle[4]; 1733 uint chan_index; 1734 1735 kilmask = 0x0; 1736 1737 swizzle[0] = inst->InstructionExtNv.CondSwizzleX; 1738 swizzle[1] = inst->InstructionExtNv.CondSwizzleY; 1739 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; 1740 swizzle[3] = inst->InstructionExtNv.CondSwizzleW; 1741 1742 for (chan_index = 0; chan_index < 4; chan_index++) 1743 { 1744 uint i; 1745 1746 for (i = 0; i < 4; i++) { 1747 /* TODO: evaluate the condition code */ 1748 if (0) 1749 kilmask |= 1 << i; 1750 } 1751 } 1752 } 1753 else { 1754 /* "unconditional" kil */ 1755 kilmask = mach->ExecMask; 1756 } 1757 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1758} 1759 1760 1761/* 1762 * Fetch a four texture samples using STR texture coordinates. 1763 */ 1764static void 1765fetch_texel( struct tgsi_sampler *sampler, 1766 const union tgsi_exec_channel *s, 1767 const union tgsi_exec_channel *t, 1768 const union tgsi_exec_channel *p, 1769 float lodbias, /* XXX should be float[4] */ 1770 union tgsi_exec_channel *r, 1771 union tgsi_exec_channel *g, 1772 union tgsi_exec_channel *b, 1773 union tgsi_exec_channel *a ) 1774{ 1775 uint j; 1776 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1777 1778 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1779 1780 for (j = 0; j < 4; j++) { 1781 r->f[j] = rgba[0][j]; 1782 g->f[j] = rgba[1][j]; 1783 b->f[j] = rgba[2][j]; 1784 a->f[j] = rgba[3][j]; 1785 } 1786} 1787 1788 1789static void 1790exec_tex(struct tgsi_exec_machine *mach, 1791 const struct tgsi_full_instruction *inst, 1792 boolean biasLod, 1793 boolean projected) 1794{ 1795 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1796 union tgsi_exec_channel r[4]; 1797 uint chan_index; 1798 float lodBias; 1799 1800 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1801 1802 switch (inst->InstructionExtTexture.Texture) { 1803 case TGSI_TEXTURE_1D: 1804 case TGSI_TEXTURE_SHADOW1D: 1805 1806 FETCH(&r[0], 0, CHAN_X); 1807 1808 if (projected) { 1809 FETCH(&r[1], 0, CHAN_W); 1810 micro_div( &r[0], &r[0], &r[1] ); 1811 } 1812 1813 if (biasLod) { 1814 FETCH(&r[1], 0, CHAN_W); 1815 lodBias = r[2].f[0]; 1816 } 1817 else 1818 lodBias = 0.0; 1819 1820 fetch_texel(mach->Samplers[unit], 1821 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ 1822 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1823 break; 1824 1825 case TGSI_TEXTURE_2D: 1826 case TGSI_TEXTURE_RECT: 1827 case TGSI_TEXTURE_SHADOW2D: 1828 case TGSI_TEXTURE_SHADOWRECT: 1829 1830 FETCH(&r[0], 0, CHAN_X); 1831 FETCH(&r[1], 0, CHAN_Y); 1832 FETCH(&r[2], 0, CHAN_Z); 1833 1834 if (projected) { 1835 FETCH(&r[3], 0, CHAN_W); 1836 micro_div( &r[0], &r[0], &r[3] ); 1837 micro_div( &r[1], &r[1], &r[3] ); 1838 micro_div( &r[2], &r[2], &r[3] ); 1839 } 1840 1841 if (biasLod) { 1842 FETCH(&r[3], 0, CHAN_W); 1843 lodBias = r[3].f[0]; 1844 } 1845 else 1846 lodBias = 0.0; 1847 1848 fetch_texel(mach->Samplers[unit], 1849 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1850 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1851 break; 1852 1853 case TGSI_TEXTURE_3D: 1854 case TGSI_TEXTURE_CUBE: 1855 1856 FETCH(&r[0], 0, CHAN_X); 1857 FETCH(&r[1], 0, CHAN_Y); 1858 FETCH(&r[2], 0, CHAN_Z); 1859 1860 if (projected) { 1861 FETCH(&r[3], 0, CHAN_W); 1862 micro_div( &r[0], &r[0], &r[3] ); 1863 micro_div( &r[1], &r[1], &r[3] ); 1864 micro_div( &r[2], &r[2], &r[3] ); 1865 } 1866 1867 if (biasLod) { 1868 FETCH(&r[3], 0, CHAN_W); 1869 lodBias = r[3].f[0]; 1870 } 1871 else 1872 lodBias = 0.0; 1873 1874 fetch_texel(mach->Samplers[unit], 1875 &r[0], &r[1], &r[2], lodBias, 1876 &r[0], &r[1], &r[2], &r[3]); 1877 break; 1878 1879 default: 1880 assert (0); 1881 } 1882 1883 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1884 STORE( &r[chan_index], 0, chan_index ); 1885 } 1886} 1887 1888 1889/** 1890 * Evaluate a constant-valued coefficient at the position of the 1891 * current quad. 1892 */ 1893static void 1894eval_constant_coef( 1895 struct tgsi_exec_machine *mach, 1896 unsigned attrib, 1897 unsigned chan ) 1898{ 1899 unsigned i; 1900 1901 for( i = 0; i < QUAD_SIZE; i++ ) { 1902 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1903 } 1904} 1905 1906/** 1907 * Evaluate a linear-valued coefficient at the position of the 1908 * current quad. 1909 */ 1910static void 1911eval_linear_coef( 1912 struct tgsi_exec_machine *mach, 1913 unsigned attrib, 1914 unsigned chan ) 1915{ 1916 const float x = mach->QuadPos.xyzw[0].f[0]; 1917 const float y = mach->QuadPos.xyzw[1].f[0]; 1918 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1919 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1920 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1921 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1922 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1923 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1924 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1925} 1926 1927/** 1928 * Evaluate a perspective-valued coefficient at the position of the 1929 * current quad. 1930 */ 1931static void 1932eval_perspective_coef( 1933 struct tgsi_exec_machine *mach, 1934 unsigned attrib, 1935 unsigned chan ) 1936{ 1937 const float x = mach->QuadPos.xyzw[0].f[0]; 1938 const float y = mach->QuadPos.xyzw[1].f[0]; 1939 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1940 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1941 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1942 const float *w = mach->QuadPos.xyzw[3].f; 1943 /* divide by W here */ 1944 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1945 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1946 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1947 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1948} 1949 1950 1951typedef void (* eval_coef_func)( 1952 struct tgsi_exec_machine *mach, 1953 unsigned attrib, 1954 unsigned chan ); 1955 1956static void 1957exec_declaration( 1958 struct tgsi_exec_machine *mach, 1959 const struct tgsi_full_declaration *decl ) 1960{ 1961 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 1962 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1963 unsigned first, last, mask; 1964 eval_coef_func eval; 1965 1966 first = decl->DeclarationRange.First; 1967 last = decl->DeclarationRange.Last; 1968 mask = decl->Declaration.UsageMask; 1969 1970 switch( decl->Declaration.Interpolate ) { 1971 case TGSI_INTERPOLATE_CONSTANT: 1972 eval = eval_constant_coef; 1973 break; 1974 1975 case TGSI_INTERPOLATE_LINEAR: 1976 eval = eval_linear_coef; 1977 break; 1978 1979 case TGSI_INTERPOLATE_PERSPECTIVE: 1980 eval = eval_perspective_coef; 1981 break; 1982 1983 default: 1984 eval = NULL; 1985 assert( 0 ); 1986 } 1987 1988 if( mask == TGSI_WRITEMASK_XYZW ) { 1989 unsigned i, j; 1990 1991 for( i = first; i <= last; i++ ) { 1992 for( j = 0; j < NUM_CHANNELS; j++ ) { 1993 eval( mach, i, j ); 1994 } 1995 } 1996 } 1997 else { 1998 unsigned i, j; 1999 2000 for( j = 0; j < NUM_CHANNELS; j++ ) { 2001 if( mask & (1 << j) ) { 2002 for( i = first; i <= last; i++ ) { 2003 eval( mach, i, j ); 2004 } 2005 } 2006 } 2007 } 2008 } 2009 } 2010} 2011 2012static void 2013exec_instruction( 2014 struct tgsi_exec_machine *mach, 2015 const struct tgsi_full_instruction *inst, 2016 int *pc ) 2017{ 2018 uint chan_index; 2019 union tgsi_exec_channel r[10]; 2020 2021 (*pc)++; 2022 2023 switch (inst->Instruction.Opcode) { 2024 case TGSI_OPCODE_ARL: 2025 case TGSI_OPCODE_FLR: 2026 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2027 FETCH( &r[0], 0, chan_index ); 2028 micro_flr( &r[0], &r[0] ); 2029 STORE( &r[0], 0, chan_index ); 2030 } 2031 break; 2032 2033 case TGSI_OPCODE_MOV: 2034 case TGSI_OPCODE_SWZ: 2035 if (inst->Flags & SOA_DEPENDENCY_FLAG) { 2036 /* Do all fetches into temp regs, then do all stores to avoid 2037 * intermediate/accidental clobbering. This could be done all the 2038 * time for MOV but for other instructions we'll need more temps... 2039 */ 2040 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2041 FETCH( &r[chan_index], 0, chan_index ); 2042 } 2043 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2044 STORE( &r[chan_index], 0, chan_index ); 2045 } 2046 } 2047 else { 2048 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2049 FETCH( &r[0], 0, chan_index ); 2050 STORE( &r[0], 0, chan_index ); 2051 } 2052 } 2053 break; 2054 2055 case TGSI_OPCODE_LIT: 2056 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2057 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2058 } 2059 2060 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2061 FETCH( &r[0], 0, CHAN_X ); 2062 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2063 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2064 STORE( &r[0], 0, CHAN_Y ); 2065 } 2066 2067 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2068 FETCH( &r[1], 0, CHAN_Y ); 2069 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2070 2071 FETCH( &r[2], 0, CHAN_W ); 2072 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 2073 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 2074 micro_pow( &r[1], &r[1], &r[2] ); 2075 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2076 STORE( &r[0], 0, CHAN_Z ); 2077 } 2078 } 2079 2080 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2081 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2082 } 2083 break; 2084 2085 case TGSI_OPCODE_RCP: 2086 /* TGSI_OPCODE_RECIP */ 2087 FETCH( &r[0], 0, CHAN_X ); 2088 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2089 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2090 STORE( &r[0], 0, chan_index ); 2091 } 2092 break; 2093 2094 case TGSI_OPCODE_RSQ: 2095 /* TGSI_OPCODE_RECIPSQRT */ 2096 FETCH( &r[0], 0, CHAN_X ); 2097 micro_abs( &r[0], &r[0] ); 2098 micro_sqrt( &r[0], &r[0] ); 2099 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2100 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2101 STORE( &r[0], 0, chan_index ); 2102 } 2103 break; 2104 2105 case TGSI_OPCODE_EXP: 2106 FETCH( &r[0], 0, CHAN_X ); 2107 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 2108 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2109 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 2110 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 2111 } 2112 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2113 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 2114 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 2115 } 2116 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2117 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 2118 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 2119 } 2120 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2121 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2122 } 2123 break; 2124 2125 case TGSI_OPCODE_LOG: 2126 FETCH( &r[0], 0, CHAN_X ); 2127 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 2128 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 2129 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 2130 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2131 STORE( &r[0], 0, CHAN_X ); 2132 } 2133 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2134 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 2135 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 2136 STORE( &r[0], 0, CHAN_Y ); 2137 } 2138 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2139 STORE( &r[1], 0, CHAN_Z ); 2140 } 2141 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2142 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2143 } 2144 break; 2145 2146 case TGSI_OPCODE_MUL: 2147 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 2148 { 2149 FETCH(&r[0], 0, chan_index); 2150 FETCH(&r[1], 1, chan_index); 2151 2152 micro_mul( &r[0], &r[0], &r[1] ); 2153 2154 STORE(&r[0], 0, chan_index); 2155 } 2156 break; 2157 2158 case TGSI_OPCODE_ADD: 2159 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2160 FETCH( &r[0], 0, chan_index ); 2161 FETCH( &r[1], 1, chan_index ); 2162 micro_add( &r[0], &r[0], &r[1] ); 2163 STORE( &r[0], 0, chan_index ); 2164 } 2165 break; 2166 2167 case TGSI_OPCODE_DP3: 2168 /* TGSI_OPCODE_DOT3 */ 2169 FETCH( &r[0], 0, CHAN_X ); 2170 FETCH( &r[1], 1, CHAN_X ); 2171 micro_mul( &r[0], &r[0], &r[1] ); 2172 2173 FETCH( &r[1], 0, CHAN_Y ); 2174 FETCH( &r[2], 1, CHAN_Y ); 2175 micro_mul( &r[1], &r[1], &r[2] ); 2176 micro_add( &r[0], &r[0], &r[1] ); 2177 2178 FETCH( &r[1], 0, CHAN_Z ); 2179 FETCH( &r[2], 1, CHAN_Z ); 2180 micro_mul( &r[1], &r[1], &r[2] ); 2181 micro_add( &r[0], &r[0], &r[1] ); 2182 2183 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2184 STORE( &r[0], 0, chan_index ); 2185 } 2186 break; 2187 2188 case TGSI_OPCODE_DP4: 2189 /* TGSI_OPCODE_DOT4 */ 2190 FETCH(&r[0], 0, CHAN_X); 2191 FETCH(&r[1], 1, CHAN_X); 2192 2193 micro_mul( &r[0], &r[0], &r[1] ); 2194 2195 FETCH(&r[1], 0, CHAN_Y); 2196 FETCH(&r[2], 1, CHAN_Y); 2197 2198 micro_mul( &r[1], &r[1], &r[2] ); 2199 micro_add( &r[0], &r[0], &r[1] ); 2200 2201 FETCH(&r[1], 0, CHAN_Z); 2202 FETCH(&r[2], 1, CHAN_Z); 2203 2204 micro_mul( &r[1], &r[1], &r[2] ); 2205 micro_add( &r[0], &r[0], &r[1] ); 2206 2207 FETCH(&r[1], 0, CHAN_W); 2208 FETCH(&r[2], 1, CHAN_W); 2209 2210 micro_mul( &r[1], &r[1], &r[2] ); 2211 micro_add( &r[0], &r[0], &r[1] ); 2212 2213 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2214 STORE( &r[0], 0, chan_index ); 2215 } 2216 break; 2217 2218 case TGSI_OPCODE_DST: 2219 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2220 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2221 } 2222 2223 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2224 FETCH( &r[0], 0, CHAN_Y ); 2225 FETCH( &r[1], 1, CHAN_Y); 2226 micro_mul( &r[0], &r[0], &r[1] ); 2227 STORE( &r[0], 0, CHAN_Y ); 2228 } 2229 2230 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2231 FETCH( &r[0], 0, CHAN_Z ); 2232 STORE( &r[0], 0, CHAN_Z ); 2233 } 2234 2235 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2236 FETCH( &r[0], 1, CHAN_W ); 2237 STORE( &r[0], 0, CHAN_W ); 2238 } 2239 break; 2240 2241 case TGSI_OPCODE_MIN: 2242 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2243 FETCH(&r[0], 0, chan_index); 2244 FETCH(&r[1], 1, chan_index); 2245 2246 /* XXX use micro_min()?? */ 2247 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); 2248 2249 STORE(&r[0], 0, chan_index); 2250 } 2251 break; 2252 2253 case TGSI_OPCODE_MAX: 2254 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2255 FETCH(&r[0], 0, chan_index); 2256 FETCH(&r[1], 1, chan_index); 2257 2258 /* XXX use micro_max()?? */ 2259 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); 2260 2261 STORE(&r[0], 0, chan_index ); 2262 } 2263 break; 2264 2265 case TGSI_OPCODE_SLT: 2266 /* TGSI_OPCODE_SETLT */ 2267 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2268 FETCH( &r[0], 0, chan_index ); 2269 FETCH( &r[1], 1, chan_index ); 2270 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2271 STORE( &r[0], 0, chan_index ); 2272 } 2273 break; 2274 2275 case TGSI_OPCODE_SGE: 2276 /* TGSI_OPCODE_SETGE */ 2277 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2278 FETCH( &r[0], 0, chan_index ); 2279 FETCH( &r[1], 1, chan_index ); 2280 micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2281 STORE( &r[0], 0, chan_index ); 2282 } 2283 break; 2284 2285 case TGSI_OPCODE_MAD: 2286 /* TGSI_OPCODE_MADD */ 2287 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2288 FETCH( &r[0], 0, chan_index ); 2289 FETCH( &r[1], 1, chan_index ); 2290 micro_mul( &r[0], &r[0], &r[1] ); 2291 FETCH( &r[1], 2, chan_index ); 2292 micro_add( &r[0], &r[0], &r[1] ); 2293 STORE( &r[0], 0, chan_index ); 2294 } 2295 break; 2296 2297 case TGSI_OPCODE_SUB: 2298 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2299 FETCH(&r[0], 0, chan_index); 2300 FETCH(&r[1], 1, chan_index); 2301 2302 micro_sub( &r[0], &r[0], &r[1] ); 2303 2304 STORE(&r[0], 0, chan_index); 2305 } 2306 break; 2307 2308 case TGSI_OPCODE_LRP: 2309 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2310 FETCH(&r[0], 0, chan_index); 2311 FETCH(&r[1], 1, chan_index); 2312 FETCH(&r[2], 2, chan_index); 2313 2314 micro_sub( &r[1], &r[1], &r[2] ); 2315 micro_mul( &r[0], &r[0], &r[1] ); 2316 micro_add( &r[0], &r[0], &r[2] ); 2317 2318 STORE(&r[0], 0, chan_index); 2319 } 2320 break; 2321 2322 case TGSI_OPCODE_CND: 2323 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2324 FETCH(&r[0], 0, chan_index); 2325 FETCH(&r[1], 1, chan_index); 2326 FETCH(&r[2], 2, chan_index); 2327 micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2328 STORE(&r[0], 0, chan_index); 2329 } 2330 break; 2331 2332 case TGSI_OPCODE_DP2A: 2333 FETCH( &r[0], 0, CHAN_X ); 2334 FETCH( &r[1], 1, CHAN_X ); 2335 micro_mul( &r[0], &r[0], &r[1] ); 2336 2337 FETCH( &r[1], 0, CHAN_Y ); 2338 FETCH( &r[2], 1, CHAN_Y ); 2339 micro_mul( &r[1], &r[1], &r[2] ); 2340 micro_add( &r[0], &r[0], &r[1] ); 2341 2342 FETCH( &r[2], 2, CHAN_X ); 2343 micro_add( &r[0], &r[0], &r[2] ); 2344 2345 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2346 STORE( &r[0], 0, chan_index ); 2347 } 2348 break; 2349 2350 case TGSI_OPCODE_FRC: 2351 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2352 FETCH( &r[0], 0, chan_index ); 2353 micro_frc( &r[0], &r[0] ); 2354 STORE( &r[0], 0, chan_index ); 2355 } 2356 break; 2357 2358 case TGSI_OPCODE_CLAMP: 2359 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2360 FETCH(&r[0], 0, chan_index); 2361 FETCH(&r[1], 1, chan_index); 2362 micro_max(&r[0], &r[0], &r[1]); 2363 FETCH(&r[1], 2, chan_index); 2364 micro_min(&r[0], &r[0], &r[1]); 2365 STORE(&r[0], 0, chan_index); 2366 } 2367 break; 2368 2369 case TGSI_OPCODE_ROUND: 2370 case TGSI_OPCODE_ARR: 2371 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2372 FETCH( &r[0], 0, chan_index ); 2373 micro_rnd( &r[0], &r[0] ); 2374 STORE( &r[0], 0, chan_index ); 2375 } 2376 break; 2377 2378 case TGSI_OPCODE_EX2: 2379 FETCH(&r[0], 0, CHAN_X); 2380 2381#if FAST_MATH 2382 micro_exp2( &r[0], &r[0] ); 2383#else 2384 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); 2385#endif 2386 2387 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2388 STORE( &r[0], 0, chan_index ); 2389 } 2390 break; 2391 2392 case TGSI_OPCODE_LG2: 2393 FETCH( &r[0], 0, CHAN_X ); 2394 micro_lg2( &r[0], &r[0] ); 2395 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2396 STORE( &r[0], 0, chan_index ); 2397 } 2398 break; 2399 2400 case TGSI_OPCODE_POW: 2401 FETCH(&r[0], 0, CHAN_X); 2402 FETCH(&r[1], 1, CHAN_X); 2403 2404 micro_pow( &r[0], &r[0], &r[1] ); 2405 2406 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2407 STORE( &r[0], 0, chan_index ); 2408 } 2409 break; 2410 2411 case TGSI_OPCODE_XPD: 2412 FETCH(&r[0], 0, CHAN_Y); 2413 FETCH(&r[1], 1, CHAN_Z); 2414 2415 micro_mul( &r[2], &r[0], &r[1] ); 2416 2417 FETCH(&r[3], 0, CHAN_Z); 2418 FETCH(&r[4], 1, CHAN_Y); 2419 2420 micro_mul( &r[5], &r[3], &r[4] ); 2421 micro_sub( &r[2], &r[2], &r[5] ); 2422 2423 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2424 STORE( &r[2], 0, CHAN_X ); 2425 } 2426 2427 FETCH(&r[2], 1, CHAN_X); 2428 2429 micro_mul( &r[3], &r[3], &r[2] ); 2430 2431 FETCH(&r[5], 0, CHAN_X); 2432 2433 micro_mul( &r[1], &r[1], &r[5] ); 2434 micro_sub( &r[3], &r[3], &r[1] ); 2435 2436 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2437 STORE( &r[3], 0, CHAN_Y ); 2438 } 2439 2440 micro_mul( &r[5], &r[5], &r[4] ); 2441 micro_mul( &r[0], &r[0], &r[2] ); 2442 micro_sub( &r[5], &r[5], &r[0] ); 2443 2444 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2445 STORE( &r[5], 0, CHAN_Z ); 2446 } 2447 2448 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2449 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2450 } 2451 break; 2452 2453 case TGSI_OPCODE_ABS: 2454 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2455 FETCH(&r[0], 0, chan_index); 2456 2457 micro_abs( &r[0], &r[0] ); 2458 2459 STORE(&r[0], 0, chan_index); 2460 } 2461 break; 2462 2463 case TGSI_OPCODE_RCC: 2464 FETCH(&r[0], 0, CHAN_X); 2465 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2466 micro_float_clamp(&r[0], &r[0]); 2467 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2468 STORE(&r[0], 0, chan_index); 2469 } 2470 break; 2471 2472 case TGSI_OPCODE_DPH: 2473 FETCH(&r[0], 0, CHAN_X); 2474 FETCH(&r[1], 1, CHAN_X); 2475 2476 micro_mul( &r[0], &r[0], &r[1] ); 2477 2478 FETCH(&r[1], 0, CHAN_Y); 2479 FETCH(&r[2], 1, CHAN_Y); 2480 2481 micro_mul( &r[1], &r[1], &r[2] ); 2482 micro_add( &r[0], &r[0], &r[1] ); 2483 2484 FETCH(&r[1], 0, CHAN_Z); 2485 FETCH(&r[2], 1, CHAN_Z); 2486 2487 micro_mul( &r[1], &r[1], &r[2] ); 2488 micro_add( &r[0], &r[0], &r[1] ); 2489 2490 FETCH(&r[1], 1, CHAN_W); 2491 2492 micro_add( &r[0], &r[0], &r[1] ); 2493 2494 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2495 STORE( &r[0], 0, chan_index ); 2496 } 2497 break; 2498 2499 case TGSI_OPCODE_COS: 2500 FETCH(&r[0], 0, CHAN_X); 2501 2502 micro_cos( &r[0], &r[0] ); 2503 2504 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2505 STORE( &r[0], 0, chan_index ); 2506 } 2507 break; 2508 2509 case TGSI_OPCODE_DDX: 2510 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2511 FETCH( &r[0], 0, chan_index ); 2512 micro_ddx( &r[0], &r[0] ); 2513 STORE( &r[0], 0, chan_index ); 2514 } 2515 break; 2516 2517 case TGSI_OPCODE_DDY: 2518 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2519 FETCH( &r[0], 0, chan_index ); 2520 micro_ddy( &r[0], &r[0] ); 2521 STORE( &r[0], 0, chan_index ); 2522 } 2523 break; 2524 2525 case TGSI_OPCODE_KILP: 2526 exec_kilp (mach, inst); 2527 break; 2528 2529 case TGSI_OPCODE_KIL: 2530 exec_kil (mach, inst); 2531 break; 2532 2533 case TGSI_OPCODE_PK2H: 2534 assert (0); 2535 break; 2536 2537 case TGSI_OPCODE_PK2US: 2538 assert (0); 2539 break; 2540 2541 case TGSI_OPCODE_PK4B: 2542 assert (0); 2543 break; 2544 2545 case TGSI_OPCODE_PK4UB: 2546 assert (0); 2547 break; 2548 2549 case TGSI_OPCODE_RFL: 2550 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2551 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2552 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2553 /* r0 = dp3(src0, src0) */ 2554 FETCH(&r[2], 0, CHAN_X); 2555 micro_mul(&r[0], &r[2], &r[2]); 2556 FETCH(&r[4], 0, CHAN_Y); 2557 micro_mul(&r[8], &r[4], &r[4]); 2558 micro_add(&r[0], &r[0], &r[8]); 2559 FETCH(&r[6], 0, CHAN_Z); 2560 micro_mul(&r[8], &r[6], &r[6]); 2561 micro_add(&r[0], &r[0], &r[8]); 2562 2563 /* r1 = dp3(src0, src1) */ 2564 FETCH(&r[3], 1, CHAN_X); 2565 micro_mul(&r[1], &r[2], &r[3]); 2566 FETCH(&r[5], 1, CHAN_Y); 2567 micro_mul(&r[8], &r[4], &r[5]); 2568 micro_add(&r[1], &r[1], &r[8]); 2569 FETCH(&r[7], 1, CHAN_Z); 2570 micro_mul(&r[8], &r[6], &r[7]); 2571 micro_add(&r[1], &r[1], &r[8]); 2572 2573 /* r1 = 2 * r1 / r0 */ 2574 micro_add(&r[1], &r[1], &r[1]); 2575 micro_div(&r[1], &r[1], &r[0]); 2576 2577 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2578 micro_mul(&r[2], &r[2], &r[1]); 2579 micro_sub(&r[2], &r[2], &r[3]); 2580 STORE(&r[2], 0, CHAN_X); 2581 } 2582 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2583 micro_mul(&r[4], &r[4], &r[1]); 2584 micro_sub(&r[4], &r[4], &r[5]); 2585 STORE(&r[4], 0, CHAN_Y); 2586 } 2587 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2588 micro_mul(&r[6], &r[6], &r[1]); 2589 micro_sub(&r[6], &r[6], &r[7]); 2590 STORE(&r[6], 0, CHAN_Z); 2591 } 2592 } 2593 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2594 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2595 } 2596 break; 2597 2598 case TGSI_OPCODE_SEQ: 2599 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2600 FETCH( &r[0], 0, chan_index ); 2601 FETCH( &r[1], 1, chan_index ); 2602 micro_eq( &r[0], &r[0], &r[1], 2603 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 2604 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2605 STORE( &r[0], 0, chan_index ); 2606 } 2607 break; 2608 2609 case TGSI_OPCODE_SFL: 2610 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2611 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2612 } 2613 break; 2614 2615 case TGSI_OPCODE_SGT: 2616 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2617 FETCH( &r[0], 0, chan_index ); 2618 FETCH( &r[1], 1, chan_index ); 2619 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2620 STORE( &r[0], 0, chan_index ); 2621 } 2622 break; 2623 2624 case TGSI_OPCODE_SIN: 2625 FETCH( &r[0], 0, CHAN_X ); 2626 micro_sin( &r[0], &r[0] ); 2627 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2628 STORE( &r[0], 0, chan_index ); 2629 } 2630 break; 2631 2632 case TGSI_OPCODE_SLE: 2633 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2634 FETCH( &r[0], 0, chan_index ); 2635 FETCH( &r[1], 1, chan_index ); 2636 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2637 STORE( &r[0], 0, chan_index ); 2638 } 2639 break; 2640 2641 case TGSI_OPCODE_SNE: 2642 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2643 FETCH( &r[0], 0, chan_index ); 2644 FETCH( &r[1], 1, chan_index ); 2645 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2646 STORE( &r[0], 0, chan_index ); 2647 } 2648 break; 2649 2650 case TGSI_OPCODE_STR: 2651 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2652 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2653 } 2654 break; 2655 2656 case TGSI_OPCODE_TEX: 2657 /* simple texture lookup */ 2658 /* src[0] = texcoord */ 2659 /* src[1] = sampler unit */ 2660 exec_tex(mach, inst, FALSE, FALSE); 2661 break; 2662 2663 case TGSI_OPCODE_TXB: 2664 /* Texture lookup with lod bias */ 2665 /* src[0] = texcoord (src[0].w = LOD bias) */ 2666 /* src[1] = sampler unit */ 2667 exec_tex(mach, inst, TRUE, FALSE); 2668 break; 2669 2670 case TGSI_OPCODE_TXD: 2671 /* Texture lookup with explict partial derivatives */ 2672 /* src[0] = texcoord */ 2673 /* src[1] = d[strq]/dx */ 2674 /* src[2] = d[strq]/dy */ 2675 /* src[3] = sampler unit */ 2676 assert (0); 2677 break; 2678 2679 case TGSI_OPCODE_TXL: 2680 /* Texture lookup with explit LOD */ 2681 /* src[0] = texcoord (src[0].w = LOD) */ 2682 /* src[1] = sampler unit */ 2683 exec_tex(mach, inst, TRUE, FALSE); 2684 break; 2685 2686 case TGSI_OPCODE_TXP: 2687 /* Texture lookup with projection */ 2688 /* src[0] = texcoord (src[0].w = projection) */ 2689 /* src[1] = sampler unit */ 2690 exec_tex(mach, inst, FALSE, TRUE); 2691 break; 2692 2693 case TGSI_OPCODE_UP2H: 2694 assert (0); 2695 break; 2696 2697 case TGSI_OPCODE_UP2US: 2698 assert (0); 2699 break; 2700 2701 case TGSI_OPCODE_UP4B: 2702 assert (0); 2703 break; 2704 2705 case TGSI_OPCODE_UP4UB: 2706 assert (0); 2707 break; 2708 2709 case TGSI_OPCODE_X2D: 2710 FETCH(&r[0], 1, CHAN_X); 2711 FETCH(&r[1], 1, CHAN_Y); 2712 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2713 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2714 FETCH(&r[2], 2, CHAN_X); 2715 micro_mul(&r[2], &r[2], &r[0]); 2716 FETCH(&r[3], 2, CHAN_Y); 2717 micro_mul(&r[3], &r[3], &r[1]); 2718 micro_add(&r[2], &r[2], &r[3]); 2719 FETCH(&r[3], 0, CHAN_X); 2720 micro_add(&r[2], &r[2], &r[3]); 2721 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2722 STORE(&r[2], 0, CHAN_X); 2723 } 2724 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2725 STORE(&r[2], 0, CHAN_Z); 2726 } 2727 } 2728 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2729 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2730 FETCH(&r[2], 2, CHAN_Z); 2731 micro_mul(&r[2], &r[2], &r[0]); 2732 FETCH(&r[3], 2, CHAN_W); 2733 micro_mul(&r[3], &r[3], &r[1]); 2734 micro_add(&r[2], &r[2], &r[3]); 2735 FETCH(&r[3], 0, CHAN_Y); 2736 micro_add(&r[2], &r[2], &r[3]); 2737 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2738 STORE(&r[2], 0, CHAN_Y); 2739 } 2740 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2741 STORE(&r[2], 0, CHAN_W); 2742 } 2743 } 2744 break; 2745 2746 case TGSI_OPCODE_ARA: 2747 assert (0); 2748 break; 2749 2750 case TGSI_OPCODE_BRA: 2751 assert (0); 2752 break; 2753 2754 case TGSI_OPCODE_CAL: 2755 /* skip the call if no execution channels are enabled */ 2756 if (mach->ExecMask) { 2757 /* do the call */ 2758 2759 /* push the Cond, Loop, Cont stacks */ 2760 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2761 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2762 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2763 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2764 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2765 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2766 2767 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2768 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2769 2770 /* note that PC was already incremented above */ 2771 mach->CallStack[mach->CallStackTop++] = *pc; 2772 *pc = inst->InstructionExtLabel.Label; 2773 } 2774 break; 2775 2776 case TGSI_OPCODE_RET: 2777 mach->FuncMask &= ~mach->ExecMask; 2778 UPDATE_EXEC_MASK(mach); 2779 2780 if (mach->FuncMask == 0x0) { 2781 /* really return now (otherwise, keep executing */ 2782 2783 if (mach->CallStackTop == 0) { 2784 /* returning from main() */ 2785 *pc = -1; 2786 return; 2787 } 2788 *pc = mach->CallStack[--mach->CallStackTop]; 2789 2790 /* pop the Cond, Loop, Cont stacks */ 2791 assert(mach->CondStackTop > 0); 2792 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2793 assert(mach->LoopStackTop > 0); 2794 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2795 assert(mach->ContStackTop > 0); 2796 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2797 assert(mach->FuncStackTop > 0); 2798 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2799 2800 UPDATE_EXEC_MASK(mach); 2801 } 2802 break; 2803 2804 case TGSI_OPCODE_SSG: 2805 /* TGSI_OPCODE_SGN */ 2806 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2807 FETCH( &r[0], 0, chan_index ); 2808 micro_sgn( &r[0], &r[0] ); 2809 STORE( &r[0], 0, chan_index ); 2810 } 2811 break; 2812 2813 case TGSI_OPCODE_CMP: 2814 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2815 FETCH(&r[0], 0, chan_index); 2816 FETCH(&r[1], 1, chan_index); 2817 FETCH(&r[2], 2, chan_index); 2818 2819 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); 2820 2821 STORE(&r[0], 0, chan_index); 2822 } 2823 break; 2824 2825 case TGSI_OPCODE_SCS: 2826 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2827 FETCH( &r[0], 0, CHAN_X ); 2828 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2829 micro_cos(&r[1], &r[0]); 2830 STORE(&r[1], 0, CHAN_X); 2831 } 2832 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2833 micro_sin(&r[1], &r[0]); 2834 STORE(&r[1], 0, CHAN_Y); 2835 } 2836 } 2837 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2838 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2839 } 2840 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2841 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2842 } 2843 break; 2844 2845 case TGSI_OPCODE_NRM: 2846 /* 3-component vector normalize */ 2847 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2848 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2849 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2850 /* r3 = sqrt(dp3(src0, src0)) */ 2851 FETCH(&r[0], 0, CHAN_X); 2852 micro_mul(&r[3], &r[0], &r[0]); 2853 FETCH(&r[1], 0, CHAN_Y); 2854 micro_mul(&r[4], &r[1], &r[1]); 2855 micro_add(&r[3], &r[3], &r[4]); 2856 FETCH(&r[2], 0, CHAN_Z); 2857 micro_mul(&r[4], &r[2], &r[2]); 2858 micro_add(&r[3], &r[3], &r[4]); 2859 micro_sqrt(&r[3], &r[3]); 2860 2861 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2862 micro_div(&r[0], &r[0], &r[3]); 2863 STORE(&r[0], 0, CHAN_X); 2864 } 2865 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2866 micro_div(&r[1], &r[1], &r[3]); 2867 STORE(&r[1], 0, CHAN_Y); 2868 } 2869 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2870 micro_div(&r[2], &r[2], &r[3]); 2871 STORE(&r[2], 0, CHAN_Z); 2872 } 2873 } 2874 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2875 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2876 } 2877 break; 2878 2879 case TGSI_OPCODE_NRM4: 2880 /* 4-component vector normalize */ 2881 { 2882 union tgsi_exec_channel tmp, dot; 2883 2884 /* tmp = dp4(src0, src0): */ 2885 FETCH( &r[0], 0, CHAN_X ); 2886 micro_mul( &tmp, &r[0], &r[0] ); 2887 2888 FETCH( &r[1], 0, CHAN_Y ); 2889 micro_mul( &dot, &r[1], &r[1] ); 2890 micro_add( &tmp, &tmp, &dot ); 2891 2892 FETCH( &r[2], 0, CHAN_Z ); 2893 micro_mul( &dot, &r[2], &r[2] ); 2894 micro_add( &tmp, &tmp, &dot ); 2895 2896 FETCH( &r[3], 0, CHAN_W ); 2897 micro_mul( &dot, &r[3], &r[3] ); 2898 micro_add( &tmp, &tmp, &dot ); 2899 2900 /* tmp = 1 / sqrt(tmp) */ 2901 micro_sqrt( &tmp, &tmp ); 2902 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2903 2904 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2905 /* chan = chan * tmp */ 2906 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2907 STORE( &r[chan_index], 0, chan_index ); 2908 } 2909 } 2910 break; 2911 2912 case TGSI_OPCODE_DIV: 2913 assert( 0 ); 2914 break; 2915 2916 case TGSI_OPCODE_DP2: 2917 FETCH( &r[0], 0, CHAN_X ); 2918 FETCH( &r[1], 1, CHAN_X ); 2919 micro_mul( &r[0], &r[0], &r[1] ); 2920 2921 FETCH( &r[1], 0, CHAN_Y ); 2922 FETCH( &r[2], 1, CHAN_Y ); 2923 micro_mul( &r[1], &r[1], &r[2] ); 2924 micro_add( &r[0], &r[0], &r[1] ); 2925 2926 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2927 STORE( &r[0], 0, chan_index ); 2928 } 2929 break; 2930 2931 case TGSI_OPCODE_IF: 2932 /* push CondMask */ 2933 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2934 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2935 FETCH( &r[0], 0, CHAN_X ); 2936 /* update CondMask */ 2937 if( ! r[0].u[0] ) { 2938 mach->CondMask &= ~0x1; 2939 } 2940 if( ! r[0].u[1] ) { 2941 mach->CondMask &= ~0x2; 2942 } 2943 if( ! r[0].u[2] ) { 2944 mach->CondMask &= ~0x4; 2945 } 2946 if( ! r[0].u[3] ) { 2947 mach->CondMask &= ~0x8; 2948 } 2949 UPDATE_EXEC_MASK(mach); 2950 /* Todo: If CondMask==0, jump to ELSE */ 2951 break; 2952 2953 case TGSI_OPCODE_ELSE: 2954 /* invert CondMask wrt previous mask */ 2955 { 2956 uint prevMask; 2957 assert(mach->CondStackTop > 0); 2958 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2959 mach->CondMask = ~mach->CondMask & prevMask; 2960 UPDATE_EXEC_MASK(mach); 2961 /* Todo: If CondMask==0, jump to ENDIF */ 2962 } 2963 break; 2964 2965 case TGSI_OPCODE_ENDIF: 2966 /* pop CondMask */ 2967 assert(mach->CondStackTop > 0); 2968 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2969 UPDATE_EXEC_MASK(mach); 2970 break; 2971 2972 case TGSI_OPCODE_END: 2973 /* halt execution */ 2974 *pc = -1; 2975 break; 2976 2977 case TGSI_OPCODE_REP: 2978 assert (0); 2979 break; 2980 2981 case TGSI_OPCODE_ENDREP: 2982 assert (0); 2983 break; 2984 2985 case TGSI_OPCODE_PUSHA: 2986 assert (0); 2987 break; 2988 2989 case TGSI_OPCODE_POPA: 2990 assert (0); 2991 break; 2992 2993 case TGSI_OPCODE_CEIL: 2994 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2995 FETCH( &r[0], 0, chan_index ); 2996 micro_ceil( &r[0], &r[0] ); 2997 STORE( &r[0], 0, chan_index ); 2998 } 2999 break; 3000 3001 case TGSI_OPCODE_I2F: 3002 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3003 FETCH( &r[0], 0, chan_index ); 3004 micro_i2f( &r[0], &r[0] ); 3005 STORE( &r[0], 0, chan_index ); 3006 } 3007 break; 3008 3009 case TGSI_OPCODE_NOT: 3010 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3011 FETCH( &r[0], 0, chan_index ); 3012 micro_not( &r[0], &r[0] ); 3013 STORE( &r[0], 0, chan_index ); 3014 } 3015 break; 3016 3017 case TGSI_OPCODE_TRUNC: 3018 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3019 FETCH( &r[0], 0, chan_index ); 3020 micro_trunc( &r[0], &r[0] ); 3021 STORE( &r[0], 0, chan_index ); 3022 } 3023 break; 3024 3025 case TGSI_OPCODE_SHL: 3026 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3027 FETCH( &r[0], 0, chan_index ); 3028 FETCH( &r[1], 1, chan_index ); 3029 micro_shl( &r[0], &r[0], &r[1] ); 3030 STORE( &r[0], 0, chan_index ); 3031 } 3032 break; 3033 3034 case TGSI_OPCODE_SHR: 3035 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3036 FETCH( &r[0], 0, chan_index ); 3037 FETCH( &r[1], 1, chan_index ); 3038 micro_ishr( &r[0], &r[0], &r[1] ); 3039 STORE( &r[0], 0, chan_index ); 3040 } 3041 break; 3042 3043 case TGSI_OPCODE_AND: 3044 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3045 FETCH( &r[0], 0, chan_index ); 3046 FETCH( &r[1], 1, chan_index ); 3047 micro_and( &r[0], &r[0], &r[1] ); 3048 STORE( &r[0], 0, chan_index ); 3049 } 3050 break; 3051 3052 case TGSI_OPCODE_OR: 3053 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3054 FETCH( &r[0], 0, chan_index ); 3055 FETCH( &r[1], 1, chan_index ); 3056 micro_or( &r[0], &r[0], &r[1] ); 3057 STORE( &r[0], 0, chan_index ); 3058 } 3059 break; 3060 3061 case TGSI_OPCODE_MOD: 3062 assert (0); 3063 break; 3064 3065 case TGSI_OPCODE_XOR: 3066 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3067 FETCH( &r[0], 0, chan_index ); 3068 FETCH( &r[1], 1, chan_index ); 3069 micro_xor( &r[0], &r[0], &r[1] ); 3070 STORE( &r[0], 0, chan_index ); 3071 } 3072 break; 3073 3074 case TGSI_OPCODE_SAD: 3075 assert (0); 3076 break; 3077 3078 case TGSI_OPCODE_TXF: 3079 assert (0); 3080 break; 3081 3082 case TGSI_OPCODE_TXQ: 3083 assert (0); 3084 break; 3085 3086 case TGSI_OPCODE_EMIT: 3087 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 3088 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 3089 break; 3090 3091 case TGSI_OPCODE_ENDPRIM: 3092 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 3093 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 3094 break; 3095 3096 case TGSI_OPCODE_BGNFOR: 3097 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3098 for (chan_index = 0; chan_index < 3; chan_index++) { 3099 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); 3100 } 3101 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); 3102 ++mach->LoopCounterStackTop; 3103 /* fall-through (for now) */ 3104 case TGSI_OPCODE_BGNLOOP: 3105 /* push LoopMask and ContMasks */ 3106 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3107 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3108 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3109 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3110 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3111 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 3112 break; 3113 3114 case TGSI_OPCODE_ENDFOR: 3115 assert(mach->LoopCounterStackTop > 0); 3116 micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3117 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3118 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 3119 /* update LoopMask */ 3120 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) { 3121 mach->LoopMask &= ~0x1; 3122 } 3123 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) { 3124 mach->LoopMask &= ~0x2; 3125 } 3126 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) { 3127 mach->LoopMask &= ~0x4; 3128 } 3129 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) { 3130 mach->LoopMask &= ~0x8; 3131 } 3132 micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3133 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3134 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); 3135 assert(mach->LoopLabelStackTop > 0); 3136 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; 3137 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); 3138 /* Restore ContMask, but don't pop */ 3139 assert(mach->ContStackTop > 0); 3140 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3141 UPDATE_EXEC_MASK(mach); 3142 if (mach->ExecMask) { 3143 /* repeat loop: jump to instruction just past BGNLOOP */ 3144 assert(mach->LoopLabelStackTop > 0); 3145 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3146 } 3147 else { 3148 /* exit loop: pop LoopMask */ 3149 assert(mach->LoopStackTop > 0); 3150 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3151 /* pop ContMask */ 3152 assert(mach->ContStackTop > 0); 3153 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3154 assert(mach->LoopLabelStackTop > 0); 3155 --mach->LoopLabelStackTop; 3156 assert(mach->LoopCounterStackTop > 0); 3157 --mach->LoopCounterStackTop; 3158 } 3159 UPDATE_EXEC_MASK(mach); 3160 break; 3161 3162 case TGSI_OPCODE_ENDLOOP: 3163 /* Restore ContMask, but don't pop */ 3164 assert(mach->ContStackTop > 0); 3165 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3166 UPDATE_EXEC_MASK(mach); 3167 if (mach->ExecMask) { 3168 /* repeat loop: jump to instruction just past BGNLOOP */ 3169 assert(mach->LoopLabelStackTop > 0); 3170 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3171 } 3172 else { 3173 /* exit loop: pop LoopMask */ 3174 assert(mach->LoopStackTop > 0); 3175 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3176 /* pop ContMask */ 3177 assert(mach->ContStackTop > 0); 3178 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3179 assert(mach->LoopLabelStackTop > 0); 3180 --mach->LoopLabelStackTop; 3181 } 3182 UPDATE_EXEC_MASK(mach); 3183 break; 3184 3185 case TGSI_OPCODE_BRK: 3186 /* turn off loop channels for each enabled exec channel */ 3187 mach->LoopMask &= ~mach->ExecMask; 3188 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3189 UPDATE_EXEC_MASK(mach); 3190 break; 3191 3192 case TGSI_OPCODE_CONT: 3193 /* turn off cont channels for each enabled exec channel */ 3194 mach->ContMask &= ~mach->ExecMask; 3195 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3196 UPDATE_EXEC_MASK(mach); 3197 break; 3198 3199 case TGSI_OPCODE_BGNSUB: 3200 /* no-op */ 3201 break; 3202 3203 case TGSI_OPCODE_ENDSUB: 3204 /* no-op */ 3205 break; 3206 3207 case TGSI_OPCODE_NOISE1: 3208 assert( 0 ); 3209 break; 3210 3211 case TGSI_OPCODE_NOISE2: 3212 assert( 0 ); 3213 break; 3214 3215 case TGSI_OPCODE_NOISE3: 3216 assert( 0 ); 3217 break; 3218 3219 case TGSI_OPCODE_NOISE4: 3220 assert( 0 ); 3221 break; 3222 3223 case TGSI_OPCODE_NOP: 3224 break; 3225 3226 default: 3227 assert( 0 ); 3228 } 3229} 3230 3231 3232/** 3233 * Run TGSI interpreter. 3234 * \return bitmask of "alive" quad components 3235 */ 3236uint 3237tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3238{ 3239 uint i; 3240 int pc = 0; 3241 3242 mach->CondMask = 0xf; 3243 mach->LoopMask = 0xf; 3244 mach->ContMask = 0xf; 3245 mach->FuncMask = 0xf; 3246 mach->ExecMask = 0xf; 3247 3248 mach->CondStackTop = 0; /* temporarily subvert this assertion */ 3249 assert(mach->CondStackTop == 0); 3250 assert(mach->LoopStackTop == 0); 3251 assert(mach->ContStackTop == 0); 3252 assert(mach->CallStackTop == 0); 3253 3254 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3255 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3256 3257 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3258 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3259 mach->Primitives[0] = 0; 3260 } 3261 3262 for (i = 0; i < QUAD_SIZE; i++) { 3263 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3264 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3265 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3266 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3267 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3268 } 3269 3270 /* execute declarations (interpolants) */ 3271 for (i = 0; i < mach->NumDeclarations; i++) { 3272 exec_declaration( mach, mach->Declarations+i ); 3273 } 3274 3275 /* execute instructions, until pc is set to -1 */ 3276 while (pc != -1) { 3277 assert(pc < (int) mach->NumInstructions); 3278 exec_instruction( mach, mach->Instructions + pc, &pc ); 3279 } 3280 3281#if 0 3282 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3283 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3284 /* 3285 * Scale back depth component. 3286 */ 3287 for (i = 0; i < 4; i++) 3288 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3289 } 3290#endif 3291 3292 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3293} 3294