1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "VertexProgram.hpp"
16
17#include "VertexShader.hpp"
18#include "SamplerCore.hpp"
19#include "Renderer/Renderer.hpp"
20#include "Renderer/Vertex.hpp"
21#include "Common/Half.hpp"
22#include "Common/Debug.hpp"
23
24namespace sw
25{
26	VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader)
27		: VertexRoutine(state, shader), shader(shader), r(shader->dynamicallyIndexedTemporaries)
28	{
29		ifDepth = 0;
30		loopRepDepth = 0;
31		currentLabel = -1;
32		whileTest = false;
33
34		for(int i = 0; i < 2048; i++)
35		{
36			labelBlock[i] = 0;
37		}
38
39		loopDepth = -1;
40		enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
41
42		if(shader->containsBreakInstruction())
43		{
44			enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
45		}
46
47		if(shader->containsContinueInstruction())
48		{
49			enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
50		}
51
52		if(shader->isInstanceIdDeclared())
53		{
54			instanceID = *Pointer<Int>(data + OFFSET(DrawData,instanceID));
55		}
56	}
57
58	VertexProgram::~VertexProgram()
59	{
60	}
61
62	void VertexProgram::pipeline(UInt& index)
63	{
64		if(!state.preTransformed)
65		{
66			program(index);
67		}
68		else
69		{
70			passThrough();
71		}
72	}
73
74	void VertexProgram::program(UInt& index)
75	{
76	//	shader->print("VertexShader-%0.8X.txt", state.shaderID);
77
78		unsigned short shaderModel = shader->getShaderModel();
79
80		enableIndex = 0;
81		stackIndex = 0;
82
83		if(shader->containsLeaveInstruction())
84		{
85			enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
86		}
87
88		if(shader->isVertexIdDeclared())
89		{
90			if(state.textureSampling)
91			{
92				vertexID = Int4(index);
93			}
94			else
95			{
96				vertexID = Insert(vertexID, As<Int>(index), 0);
97				vertexID = Insert(vertexID, As<Int>(index + 1), 1);
98				vertexID = Insert(vertexID, As<Int>(index + 2), 2);
99				vertexID = Insert(vertexID, As<Int>(index + 3), 3);
100			}
101		}
102
103		// Create all call site return blocks up front
104		for(size_t i = 0; i < shader->getLength(); i++)
105		{
106			const Shader::Instruction *instruction = shader->getInstruction(i);
107			Shader::Opcode opcode = instruction->opcode;
108
109			if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
110			{
111				const Dst &dst = instruction->dst;
112
113				ASSERT(callRetBlock[dst.label].size() == dst.callSite);
114				callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
115			}
116		}
117
118		for(size_t i = 0; i < shader->getLength(); i++)
119		{
120			const Shader::Instruction *instruction = shader->getInstruction(i);
121			Shader::Opcode opcode = instruction->opcode;
122
123			if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
124			{
125				continue;
126			}
127
128			Dst dst = instruction->dst;
129			Src src0 = instruction->src[0];
130			Src src1 = instruction->src[1];
131			Src src2 = instruction->src[2];
132			Src src3 = instruction->src[3];
133			Src src4 = instruction->src[4];
134
135			bool predicate = instruction->predicate;
136			Control control = instruction->control;
137			bool integer = dst.type == Shader::PARAMETER_ADDR;
138			bool pp = dst.partialPrecision;
139
140			Vector4f d;
141			Vector4f s0;
142			Vector4f s1;
143			Vector4f s2;
144			Vector4f s3;
145			Vector4f s4;
146
147			if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
148			if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
149			if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
150			if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3);
151			if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4);
152
153			switch(opcode)
154			{
155			case Shader::OPCODE_VS_1_0:                                     break;
156			case Shader::OPCODE_VS_1_1:                                     break;
157			case Shader::OPCODE_VS_2_0:                                     break;
158			case Shader::OPCODE_VS_2_x:                                     break;
159			case Shader::OPCODE_VS_2_sw:                                    break;
160			case Shader::OPCODE_VS_3_0:                                     break;
161			case Shader::OPCODE_VS_3_sw:                                    break;
162			case Shader::OPCODE_DCL:                                        break;
163			case Shader::OPCODE_DEF:                                        break;
164			case Shader::OPCODE_DEFI:                                       break;
165			case Shader::OPCODE_DEFB:                                       break;
166			case Shader::OPCODE_NOP:                                        break;
167			case Shader::OPCODE_ABS:        abs(d, s0);                     break;
168			case Shader::OPCODE_IABS:       iabs(d, s0);                    break;
169			case Shader::OPCODE_ADD:        add(d, s0, s1);                 break;
170			case Shader::OPCODE_IADD:       iadd(d, s0, s1);                break;
171			case Shader::OPCODE_CRS:        crs(d, s0, s1);                 break;
172			case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);        break;
173			case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);        break;
174			case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);        break;
175			case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);        break;
176			case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);            break;
177			case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);            break;
178			case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);            break;
179			case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);            break;
180			case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);      break;
181			case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);      break;
182			case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);      break;
183			case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);      break;
184			case Shader::OPCODE_DP1:        dp1(d, s0, s1);                 break;
185			case Shader::OPCODE_DP2:        dp2(d, s0, s1);                 break;
186			case Shader::OPCODE_DP3:        dp3(d, s0, s1);                 break;
187			case Shader::OPCODE_DP4:        dp4(d, s0, s1);                 break;
188			case Shader::OPCODE_DET2:       det2(d, s0, s1);                break;
189			case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);            break;
190			case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);        break;
191			case Shader::OPCODE_ATT:        att(d, s0, s1);                 break;
192			case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);               break;
193			case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                break;
194			case Shader::OPCODE_EXPP:       expp(d, s0, shaderModel);       break;
195			case Shader::OPCODE_EXP:        exp(d, s0, pp);                 break;
196			case Shader::OPCODE_FRC:        frc(d, s0);                     break;
197			case Shader::OPCODE_TRUNC:      trunc(d, s0);                   break;
198			case Shader::OPCODE_FLOOR:      floor(d, s0);                   break;
199			case Shader::OPCODE_ROUND:      round(d, s0);                   break;
200			case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);               break;
201			case Shader::OPCODE_CEIL:       ceil(d, s0);                    break;
202			case Shader::OPCODE_LIT:        lit(d, s0);                     break;
203			case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);               break;
204			case Shader::OPCODE_LOG2:       log2(d, s0, pp);                break;
205			case Shader::OPCODE_LOGP:       logp(d, s0, shaderModel);       break;
206			case Shader::OPCODE_LOG:        log(d, s0, pp);                 break;
207			case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);             break;
208			case Shader::OPCODE_STEP:       step(d, s0, s1);                break;
209			case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);          break;
210			case Shader::OPCODE_ISINF:      isinf(d, s0);                   break;
211			case Shader::OPCODE_ISNAN:      isnan(d, s0);                   break;
212			case Shader::OPCODE_FLOATBITSTOINT:
213			case Shader::OPCODE_FLOATBITSTOUINT:
214			case Shader::OPCODE_INTBITSTOFLOAT:
215			case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                    break;
216			case Shader::OPCODE_PACKSNORM2x16:   packSnorm2x16(d, s0);      break;
217			case Shader::OPCODE_PACKUNORM2x16:   packUnorm2x16(d, s0);      break;
218			case Shader::OPCODE_PACKHALF2x16:    packHalf2x16(d, s0);       break;
219			case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0);    break;
220			case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0);    break;
221			case Shader::OPCODE_UNPACKHALF2x16:  unpackHalf2x16(d, s0);     break;
222			case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);              break;
223			case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);              break;
224			case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);              break;
225			case Shader::OPCODE_M4X3:       M4X3(d, s0, src1);              break;
226			case Shader::OPCODE_M4X4:       M4X4(d, s0, src1);              break;
227			case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);             break;
228			case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);            break;
229			case Shader::OPCODE_MAX:        max(d, s0, s1);                 break;
230			case Shader::OPCODE_IMAX:       imax(d, s0, s1);                break;
231			case Shader::OPCODE_UMAX:       umax(d, s0, s1);                break;
232			case Shader::OPCODE_MIN:        min(d, s0, s1);                 break;
233			case Shader::OPCODE_IMIN:       imin(d, s0, s1);                break;
234			case Shader::OPCODE_UMIN:       umin(d, s0, s1);                break;
235			case Shader::OPCODE_MOV:        mov(d, s0, integer);            break;
236			case Shader::OPCODE_MOVA:       mov(d, s0, true);               break;
237			case Shader::OPCODE_NEG:        neg(d, s0);                     break;
238			case Shader::OPCODE_INEG:       ineg(d, s0);                    break;
239			case Shader::OPCODE_F2B:        f2b(d, s0);                     break;
240			case Shader::OPCODE_B2F:        b2f(d, s0);                     break;
241			case Shader::OPCODE_F2I:        f2i(d, s0);                     break;
242			case Shader::OPCODE_I2F:        i2f(d, s0);                     break;
243			case Shader::OPCODE_F2U:        f2u(d, s0);                     break;
244			case Shader::OPCODE_U2F:        u2f(d, s0);                     break;
245			case Shader::OPCODE_I2B:        i2b(d, s0);                     break;
246			case Shader::OPCODE_B2I:        b2i(d, s0);                     break;
247			case Shader::OPCODE_MUL:        mul(d, s0, s1);                 break;
248			case Shader::OPCODE_IMUL:       imul(d, s0, s1);                break;
249			case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                break;
250			case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                break;
251			case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                break;
252			case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);            break;
253			case Shader::OPCODE_POW:        pow(d, s0, s1, pp);             break;
254			case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                break;
255			case Shader::OPCODE_DIV:        div(d, s0, s1);                 break;
256			case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                break;
257			case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                break;
258			case Shader::OPCODE_MOD:        mod(d, s0, s1);                 break;
259			case Shader::OPCODE_IMOD:       imod(d, s0, s1);                break;
260			case Shader::OPCODE_UMOD:       umod(d, s0, s1);                break;
261			case Shader::OPCODE_SHL:        shl(d, s0, s1);                 break;
262			case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                break;
263			case Shader::OPCODE_USHR:       ushr(d, s0, s1);                break;
264			case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                break;
265			case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                break;
266			case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                 break;
267			case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);              break;
268			case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);              break;
269			case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);              break;
270			case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);         break;
271			case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);         break;
272			case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);         break;
273			case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);         break;
274			case Shader::OPCODE_SGE:        step(d, s1, s0);                break;
275			case Shader::OPCODE_SGN:        sgn(d, s0);                     break;
276			case Shader::OPCODE_ISGN:       isgn(d, s0);                    break;
277			case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);              break;
278			case Shader::OPCODE_COS:        cos(d, s0, pp);                 break;
279			case Shader::OPCODE_SIN:        sin(d, s0, pp);                 break;
280			case Shader::OPCODE_TAN:        tan(d, s0);                     break;
281			case Shader::OPCODE_ACOS:       acos(d, s0);                    break;
282			case Shader::OPCODE_ASIN:       asin(d, s0);                    break;
283			case Shader::OPCODE_ATAN:       atan(d, s0);                    break;
284			case Shader::OPCODE_ATAN2:      atan2(d, s0, s1);               break;
285			case Shader::OPCODE_COSH:       cosh(d, s0, pp);                break;
286			case Shader::OPCODE_SINH:       sinh(d, s0, pp);                break;
287			case Shader::OPCODE_TANH:       tanh(d, s0, pp);                break;
288			case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);               break;
289			case Shader::OPCODE_ASINH:      asinh(d, s0, pp);               break;
290			case Shader::OPCODE_ATANH:      atanh(d, s0, pp);               break;
291			case Shader::OPCODE_SLT:        slt(d, s0, s1);                 break;
292			case Shader::OPCODE_SUB:        sub(d, s0, s1);                 break;
293			case Shader::OPCODE_ISUB:       isub(d, s0, s1);                break;
294			case Shader::OPCODE_BREAK:      BREAK();                        break;
295			case Shader::OPCODE_BREAKC:     BREAKC(s0, s1, control);        break;
296			case Shader::OPCODE_BREAKP:     BREAKP(src0);                   break;
297			case Shader::OPCODE_CONTINUE:   CONTINUE();                     break;
298			case Shader::OPCODE_TEST:       TEST();                         break;
299			case Shader::OPCODE_CALL:       CALL(dst.label, dst.callSite);  break;
300			case Shader::OPCODE_CALLNZ:     CALLNZ(dst.label, dst.callSite, src0); break;
301			case Shader::OPCODE_ELSE:       ELSE();                         break;
302			case Shader::OPCODE_ENDIF:      ENDIF();                        break;
303			case Shader::OPCODE_ENDLOOP:    ENDLOOP();                      break;
304			case Shader::OPCODE_ENDREP:     ENDREP();                       break;
305			case Shader::OPCODE_ENDWHILE:   ENDWHILE();                     break;
306			case Shader::OPCODE_ENDSWITCH:  ENDSWITCH();                    break;
307			case Shader::OPCODE_IF:         IF(src0);                       break;
308			case Shader::OPCODE_IFC:        IFC(s0, s1, control);           break;
309			case Shader::OPCODE_LABEL:      LABEL(dst.index);               break;
310			case Shader::OPCODE_LOOP:       LOOP(src1);                     break;
311			case Shader::OPCODE_REP:        REP(src0);                      break;
312			case Shader::OPCODE_WHILE:      WHILE(src0);                    break;
313			case Shader::OPCODE_SWITCH:     SWITCH();                       break;
314			case Shader::OPCODE_RET:        RET();                          break;
315			case Shader::OPCODE_LEAVE:      LEAVE();                        break;
316			case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);        break;
317			case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);       break;
318			case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);       break;
319			case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);          break;
320			case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);         break;
321			case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);      break;
322			case Shader::OPCODE_ALL:        all(d.x, s0);                   break;
323			case Shader::OPCODE_ANY:        any(d.x, s0);                   break;
324			case Shader::OPCODE_NOT:        bitwise_not(d, s0);             break;
325			case Shader::OPCODE_OR:         bitwise_or(d, s0, s1);          break;
326			case Shader::OPCODE_XOR:        bitwise_xor(d, s0, s1);         break;
327			case Shader::OPCODE_AND:        bitwise_and(d, s0, s1);         break;
328			case Shader::OPCODE_EQ:         equal(d, s0, s1);               break;
329			case Shader::OPCODE_NE:         notEqual(d, s0, s1);            break;
330			case Shader::OPCODE_TEXLDL:     TEXLOD(d, s0, src1, s0.w);      break;
331			case Shader::OPCODE_TEXLOD:     TEXLOD(d, s0, src1, s2.x);      break;
332			case Shader::OPCODE_TEX:        TEX(d, s0, src1);               break;
333			case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2);     break;
334			case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x); break;
335			case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x);  break;
336			case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break;
337			case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);   break;
338			case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4); break;
339			case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);         break;
340			case Shader::OPCODE_END:                                        break;
341			default:
342				ASSERT(false);
343			}
344
345			if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_NOP)
346			{
347				if(dst.saturate)
348				{
349					if(dst.x) d.x = Max(d.x, Float4(0.0f));
350					if(dst.y) d.y = Max(d.y, Float4(0.0f));
351					if(dst.z) d.z = Max(d.z, Float4(0.0f));
352					if(dst.w) d.w = Max(d.w, Float4(0.0f));
353
354					if(dst.x) d.x = Min(d.x, Float4(1.0f));
355					if(dst.y) d.y = Min(d.y, Float4(1.0f));
356					if(dst.z) d.z = Min(d.z, Float4(1.0f));
357					if(dst.w) d.w = Min(d.w, Float4(1.0f));
358				}
359
360				if(instruction->isPredicated())
361				{
362					Vector4f pDst;   // FIXME: Rename
363
364					switch(dst.type)
365					{
366					case Shader::PARAMETER_VOID: break;
367					case Shader::PARAMETER_TEMP:
368						if(dst.rel.type == Shader::PARAMETER_VOID)
369						{
370							if(dst.x) pDst.x = r[dst.index].x;
371							if(dst.y) pDst.y = r[dst.index].y;
372							if(dst.z) pDst.z = r[dst.index].z;
373							if(dst.w) pDst.w = r[dst.index].w;
374						}
375						else
376						{
377							Int a = relativeAddress(dst);
378
379							if(dst.x) pDst.x = r[dst.index + a].x;
380							if(dst.y) pDst.y = r[dst.index + a].y;
381							if(dst.z) pDst.z = r[dst.index + a].z;
382							if(dst.w) pDst.w = r[dst.index + a].w;
383						}
384						break;
385					case Shader::PARAMETER_ADDR: pDst = a0; break;
386					case Shader::PARAMETER_RASTOUT:
387						switch(dst.index)
388						{
389						case 0:
390							if(dst.x) pDst.x = o[Pos].x;
391							if(dst.y) pDst.y = o[Pos].y;
392							if(dst.z) pDst.z = o[Pos].z;
393							if(dst.w) pDst.w = o[Pos].w;
394							break;
395						case 1:
396							pDst.x = o[Fog].x;
397							break;
398						case 2:
399							pDst.x = o[Pts].y;
400							break;
401						default:
402							ASSERT(false);
403						}
404						break;
405					case Shader::PARAMETER_ATTROUT:
406						if(dst.x) pDst.x = o[C0 + dst.index].x;
407						if(dst.y) pDst.y = o[C0 + dst.index].y;
408						if(dst.z) pDst.z = o[C0 + dst.index].z;
409						if(dst.w) pDst.w = o[C0 + dst.index].w;
410						break;
411					case Shader::PARAMETER_TEXCRDOUT:
412				//	case Shader::PARAMETER_OUTPUT:
413						if(shaderModel < 0x0300)
414						{
415							if(dst.x) pDst.x = o[T0 + dst.index].x;
416							if(dst.y) pDst.y = o[T0 + dst.index].y;
417							if(dst.z) pDst.z = o[T0 + dst.index].z;
418							if(dst.w) pDst.w = o[T0 + dst.index].w;
419						}
420						else
421						{
422							if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
423							{
424								if(dst.x) pDst.x = o[dst.index].x;
425								if(dst.y) pDst.y = o[dst.index].y;
426								if(dst.z) pDst.z = o[dst.index].z;
427								if(dst.w) pDst.w = o[dst.index].w;
428							}
429							else
430							{
431								Int a = relativeAddress(dst);
432
433								if(dst.x) pDst.x = o[dst.index + a].x;
434								if(dst.y) pDst.y = o[dst.index + a].y;
435								if(dst.z) pDst.z = o[dst.index + a].z;
436								if(dst.w) pDst.w = o[dst.index + a].w;
437							}
438						}
439						break;
440					case Shader::PARAMETER_LABEL:                break;
441					case Shader::PARAMETER_PREDICATE: pDst = p0; break;
442					case Shader::PARAMETER_INPUT:                break;
443					default:
444						ASSERT(false);
445					}
446
447					Int4 enable = enableMask(instruction);
448
449					Int4 xEnable = enable;
450					Int4 yEnable = enable;
451					Int4 zEnable = enable;
452					Int4 wEnable = enable;
453
454					if(predicate)
455					{
456						unsigned char pSwizzle = instruction->predicateSwizzle;
457
458						Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
459						Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
460						Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
461						Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
462
463						if(!instruction->predicateNot)
464						{
465							if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
466							if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
467							if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
468							if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
469						}
470						else
471						{
472							if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
473							if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
474							if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
475							if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
476						}
477					}
478
479					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
480					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
481					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
482					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
483
484					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
485					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
486					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
487					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
488				}
489
490				switch(dst.type)
491				{
492				case Shader::PARAMETER_VOID:
493					break;
494				case Shader::PARAMETER_TEMP:
495					if(dst.rel.type == Shader::PARAMETER_VOID)
496					{
497						if(dst.x) r[dst.index].x = d.x;
498						if(dst.y) r[dst.index].y = d.y;
499						if(dst.z) r[dst.index].z = d.z;
500						if(dst.w) r[dst.index].w = d.w;
501					}
502					else
503					{
504						Int a = relativeAddress(dst);
505
506						if(dst.x) r[dst.index + a].x = d.x;
507						if(dst.y) r[dst.index + a].y = d.y;
508						if(dst.z) r[dst.index + a].z = d.z;
509						if(dst.w) r[dst.index + a].w = d.w;
510					}
511					break;
512				case Shader::PARAMETER_ADDR:
513					if(dst.x) a0.x = d.x;
514					if(dst.y) a0.y = d.y;
515					if(dst.z) a0.z = d.z;
516					if(dst.w) a0.w = d.w;
517					break;
518				case Shader::PARAMETER_RASTOUT:
519					switch(dst.index)
520					{
521					case 0:
522						if(dst.x) o[Pos].x = d.x;
523						if(dst.y) o[Pos].y = d.y;
524						if(dst.z) o[Pos].z = d.z;
525						if(dst.w) o[Pos].w = d.w;
526						break;
527					case 1:
528						o[Fog].x = d.x;
529						break;
530					case 2:
531						o[Pts].y = d.x;
532						break;
533					default:	ASSERT(false);
534					}
535					break;
536				case Shader::PARAMETER_ATTROUT:
537					if(dst.x) o[C0 + dst.index].x = d.x;
538					if(dst.y) o[C0 + dst.index].y = d.y;
539					if(dst.z) o[C0 + dst.index].z = d.z;
540					if(dst.w) o[C0 + dst.index].w = d.w;
541					break;
542				case Shader::PARAMETER_TEXCRDOUT:
543			//	case Shader::PARAMETER_OUTPUT:
544					if(shaderModel < 0x0300)
545					{
546						if(dst.x) o[T0 + dst.index].x = d.x;
547						if(dst.y) o[T0 + dst.index].y = d.y;
548						if(dst.z) o[T0 + dst.index].z = d.z;
549						if(dst.w) o[T0 + dst.index].w = d.w;
550					}
551					else
552					{
553						if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
554						{
555							if(dst.x) o[dst.index].x = d.x;
556							if(dst.y) o[dst.index].y = d.y;
557							if(dst.z) o[dst.index].z = d.z;
558							if(dst.w) o[dst.index].w = d.w;
559						}
560						else
561						{
562							Int a = relativeAddress(dst);
563
564							if(dst.x) o[dst.index + a].x = d.x;
565							if(dst.y) o[dst.index + a].y = d.y;
566							if(dst.z) o[dst.index + a].z = d.z;
567							if(dst.w) o[dst.index + a].w = d.w;
568						}
569					}
570					break;
571				case Shader::PARAMETER_LABEL:             break;
572				case Shader::PARAMETER_PREDICATE: p0 = d; break;
573				case Shader::PARAMETER_INPUT:             break;
574				default:
575					ASSERT(false);
576				}
577			}
578		}
579
580		if(currentLabel != -1)
581		{
582			Nucleus::setInsertBlock(returnBlock);
583		}
584	}
585
586	void VertexProgram::passThrough()
587	{
588		if(shader)
589		{
590			for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
591			{
592				unsigned char usage = shader->getOutput(i, 0).usage;
593
594				switch(usage)
595				{
596				case 0xFF:
597					continue;
598				case Shader::USAGE_PSIZE:
599					o[i].y = v[i].x;
600					break;
601				case Shader::USAGE_TEXCOORD:
602					o[i].x = v[i].x;
603					o[i].y = v[i].y;
604					o[i].z = v[i].z;
605					o[i].w = v[i].w;
606					break;
607				case Shader::USAGE_POSITION:
608					o[i].x = v[i].x;
609					o[i].y = v[i].y;
610					o[i].z = v[i].z;
611					o[i].w = v[i].w;
612					break;
613				case Shader::USAGE_COLOR:
614					o[i].x = v[i].x;
615					o[i].y = v[i].y;
616					o[i].z = v[i].z;
617					o[i].w = v[i].w;
618					break;
619				case Shader::USAGE_FOG:
620					o[i].x = v[i].x;
621					break;
622				default:
623					ASSERT(false);
624				}
625			}
626		}
627		else
628		{
629			o[Pos].x = v[PositionT].x;
630			o[Pos].y = v[PositionT].y;
631			o[Pos].z = v[PositionT].z;
632			o[Pos].w = v[PositionT].w;
633
634			for(int i = 0; i < 2; i++)
635			{
636				o[C0 + i].x = v[Color0 + i].x;
637				o[C0 + i].y = v[Color0 + i].y;
638				o[C0 + i].z = v[Color0 + i].z;
639				o[C0 + i].w = v[Color0 + i].w;
640			}
641
642			for(int i = 0; i < 8; i++)
643			{
644				o[T0 + i].x = v[TexCoord0 + i].x;
645				o[T0 + i].y = v[TexCoord0 + i].y;
646				o[T0 + i].z = v[TexCoord0 + i].z;
647				o[T0 + i].w = v[TexCoord0 + i].w;
648			}
649
650			o[Pts].y = v[PointSize].x;
651		}
652	}
653
654	Vector4f VertexProgram::fetchRegister(const Src &src, unsigned int offset)
655	{
656		Vector4f reg;
657		unsigned int i = src.index + offset;
658
659		switch(src.type)
660		{
661		case Shader::PARAMETER_TEMP:
662			if(src.rel.type == Shader::PARAMETER_VOID)
663			{
664				reg = r[i];
665			}
666			else
667			{
668				reg = r[i + relativeAddress(src, src.bufferIndex)];
669			}
670			break;
671		case Shader::PARAMETER_CONST:
672			reg = readConstant(src, offset);
673			break;
674		case Shader::PARAMETER_INPUT:
675			if(src.rel.type == Shader::PARAMETER_VOID)
676			{
677				reg = v[i];
678			}
679			else
680			{
681				reg = v[i + relativeAddress(src, src.bufferIndex)];
682			}
683			break;
684		case Shader::PARAMETER_VOID: return r[0];   // Dummy
685		case Shader::PARAMETER_FLOAT4LITERAL:
686			reg.x = Float4(src.value[0]);
687			reg.y = Float4(src.value[1]);
688			reg.z = Float4(src.value[2]);
689			reg.w = Float4(src.value[3]);
690			break;
691		case Shader::PARAMETER_ADDR:      reg = a0; break;
692		case Shader::PARAMETER_CONSTBOOL: return r[0];   // Dummy
693		case Shader::PARAMETER_CONSTINT:  return r[0];   // Dummy
694		case Shader::PARAMETER_LOOP:      return r[0];   // Dummy
695		case Shader::PARAMETER_PREDICATE: return r[0];   // Dummy
696		case Shader::PARAMETER_SAMPLER:
697			if(src.rel.type == Shader::PARAMETER_VOID)
698			{
699				reg.x = As<Float4>(Int4(i));
700			}
701			else if(src.rel.type == Shader::PARAMETER_TEMP)
702			{
703				reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
704			}
705			return reg;
706		case Shader::PARAMETER_OUTPUT:
707			if(src.rel.type == Shader::PARAMETER_VOID)
708			{
709				reg = o[i];
710			}
711			else
712			{
713				reg = o[i + relativeAddress(src, src.bufferIndex)];
714			}
715			break;
716		case Shader::PARAMETER_MISCTYPE:
717			if(src.index == Shader::InstanceIDIndex)
718			{
719				reg.x = As<Float>(instanceID);
720			}
721			else if(src.index == Shader::VertexIDIndex)
722			{
723				reg.x = As<Float4>(vertexID);
724			}
725			else ASSERT(false);
726			return reg;
727		default:
728			ASSERT(false);
729		}
730
731		const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
732		const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
733		const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
734		const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
735
736		Vector4f mod;
737
738		switch(src.modifier)
739		{
740		case Shader::MODIFIER_NONE:
741			mod.x = x;
742			mod.y = y;
743			mod.z = z;
744			mod.w = w;
745			break;
746		case Shader::MODIFIER_NEGATE:
747			mod.x = -x;
748			mod.y = -y;
749			mod.z = -z;
750			mod.w = -w;
751			break;
752		case Shader::MODIFIER_ABS:
753			mod.x = Abs(x);
754			mod.y = Abs(y);
755			mod.z = Abs(z);
756			mod.w = Abs(w);
757			break;
758		case Shader::MODIFIER_ABS_NEGATE:
759			mod.x = -Abs(x);
760			mod.y = -Abs(y);
761			mod.z = -Abs(z);
762			mod.w = -Abs(w);
763			break;
764		case Shader::MODIFIER_NOT:
765			mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
766			mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
767			mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
768			mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
769			break;
770		default:
771			ASSERT(false);
772		}
773
774		return mod;
775	}
776
777	RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index)
778	{
779		if(bufferIndex == -1)
780		{
781			return data + OFFSET(DrawData, vs.c[index]);
782		}
783		else
784		{
785			return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.u[bufferIndex])) + index;
786		}
787	}
788
789	RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset)
790	{
791		return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
792	}
793
794	Vector4f VertexProgram::readConstant(const Src &src, unsigned int offset)
795	{
796		Vector4f c;
797		unsigned int i = src.index + offset;
798
799		if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
800		{
801			c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i));
802
803			c.x = c.x.xxxx;
804			c.y = c.y.yyyy;
805			c.z = c.z.zzzz;
806			c.w = c.w.wwww;
807
808			if(shader->containsDefineInstruction())   // Constant may be known at compile time
809			{
810				for(size_t j = 0; j < shader->getLength(); j++)
811				{
812					const Shader::Instruction &instruction = *shader->getInstruction(j);
813
814					if(instruction.opcode == Shader::OPCODE_DEF)
815					{
816						if(instruction.dst.index == i)
817						{
818							c.x = Float4(instruction.src[0].value[0]);
819							c.y = Float4(instruction.src[0].value[1]);
820							c.z = Float4(instruction.src[0].value[2]);
821							c.w = Float4(instruction.src[0].value[3]);
822
823							break;
824						}
825					}
826				}
827			}
828		}
829		else if(src.rel.type == Shader::PARAMETER_LOOP)
830		{
831			Int loopCounter = aL[loopDepth];
832
833			c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter));
834
835			c.x = c.x.xxxx;
836			c.y = c.y.yyyy;
837			c.z = c.z.zzzz;
838			c.w = c.w.wwww;
839		}
840		else
841		{
842			if(src.rel.deterministic)
843			{
844				Int a = relativeAddress(src, src.bufferIndex);
845
846				c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
847
848				c.x = c.x.xxxx;
849				c.y = c.y.yyyy;
850				c.z = c.z.zzzz;
851				c.w = c.w.wwww;
852			}
853			else
854			{
855				int component = src.rel.swizzle & 0x03;
856				Float4 a;
857
858				switch(src.rel.type)
859				{
860				case Shader::PARAMETER_ADDR:     a = a0[component]; break;
861				case Shader::PARAMETER_TEMP:     a = r[src.rel.index][component]; break;
862				case Shader::PARAMETER_INPUT:    a = v[src.rel.index][component]; break;
863				case Shader::PARAMETER_OUTPUT:   a = o[src.rel.index][component]; break;
864				case Shader::PARAMETER_CONST:    a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
865				case Shader::PARAMETER_MISCTYPE:
866					if(src.rel.index == Shader::InstanceIDIndex)
867					{
868						a = As<Float4>(Int4(instanceID)); break;
869					}
870					else if(src.rel.index == Shader::VertexIDIndex)
871					{
872						a = As<Float4>(vertexID); break;
873					}
874					else ASSERT(false);
875					break;
876				default: ASSERT(false);
877				}
878
879				Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
880
881				index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS));   // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
882
883				Int index0 = Extract(index, 0);
884				Int index1 = Extract(index, 1);
885				Int index2 = Extract(index, 2);
886				Int index3 = Extract(index, 3);
887
888				c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16);
889				c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16);
890				c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16);
891				c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16);
892
893				transpose4x4(c.x, c.y, c.z, c.w);
894			}
895		}
896
897		return c;
898	}
899
900	Int VertexProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex)
901	{
902		ASSERT(var.rel.deterministic);
903
904		if(var.rel.type == Shader::PARAMETER_TEMP)
905		{
906			return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
907		}
908		else if(var.rel.type == Shader::PARAMETER_INPUT)
909		{
910			return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
911		}
912		else if(var.rel.type == Shader::PARAMETER_OUTPUT)
913		{
914			return As<Int>(Extract(o[var.rel.index].x, 0)) * var.rel.scale;
915		}
916		else if(var.rel.type == Shader::PARAMETER_CONST)
917		{
918			return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale;
919		}
920		else if(var.rel.type == Shader::PARAMETER_LOOP)
921		{
922			return aL[loopDepth];
923		}
924		else ASSERT(false);
925
926		return 0;
927	}
928
929	Int4 VertexProgram::enableMask(const Shader::Instruction *instruction)
930	{
931		Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF);
932
933		if(!whileTest)
934		{
935			if(shader->containsBreakInstruction() && instruction->analysisBreak)
936			{
937				enable &= enableBreak;
938			}
939
940			if(shader->containsContinueInstruction() && instruction->analysisContinue)
941			{
942				enable &= enableContinue;
943			}
944
945			if(shader->containsLeaveInstruction() && instruction->analysisLeave)
946			{
947				enable &= enableLeave;
948			}
949		}
950
951		return enable;
952	}
953
954	void VertexProgram::M3X2(Vector4f &dst, Vector4f &src0, Src &src1)
955	{
956		Vector4f row0 = fetchRegister(src1, 0);
957		Vector4f row1 = fetchRegister(src1, 1);
958
959		dst.x = dot3(src0, row0);
960		dst.y = dot3(src0, row1);
961	}
962
963	void VertexProgram::M3X3(Vector4f &dst, Vector4f &src0, Src &src1)
964	{
965		Vector4f row0 = fetchRegister(src1, 0);
966		Vector4f row1 = fetchRegister(src1, 1);
967		Vector4f row2 = fetchRegister(src1, 2);
968
969		dst.x = dot3(src0, row0);
970		dst.y = dot3(src0, row1);
971		dst.z = dot3(src0, row2);
972	}
973
974	void VertexProgram::M3X4(Vector4f &dst, Vector4f &src0, Src &src1)
975	{
976		Vector4f row0 = fetchRegister(src1, 0);
977		Vector4f row1 = fetchRegister(src1, 1);
978		Vector4f row2 = fetchRegister(src1, 2);
979		Vector4f row3 = fetchRegister(src1, 3);
980
981		dst.x = dot3(src0, row0);
982		dst.y = dot3(src0, row1);
983		dst.z = dot3(src0, row2);
984		dst.w = dot3(src0, row3);
985	}
986
987	void VertexProgram::M4X3(Vector4f &dst, Vector4f &src0, Src &src1)
988	{
989		Vector4f row0 = fetchRegister(src1, 0);
990		Vector4f row1 = fetchRegister(src1, 1);
991		Vector4f row2 = fetchRegister(src1, 2);
992
993		dst.x = dot4(src0, row0);
994		dst.y = dot4(src0, row1);
995		dst.z = dot4(src0, row2);
996	}
997
998	void VertexProgram::M4X4(Vector4f &dst, Vector4f &src0, Src &src1)
999	{
1000		Vector4f row0 = fetchRegister(src1, 0);
1001		Vector4f row1 = fetchRegister(src1, 1);
1002		Vector4f row2 = fetchRegister(src1, 2);
1003		Vector4f row3 = fetchRegister(src1, 3);
1004
1005		dst.x = dot4(src0, row0);
1006		dst.y = dot4(src0, row1);
1007		dst.z = dot4(src0, row2);
1008		dst.w = dot4(src0, row3);
1009	}
1010
1011	void VertexProgram::BREAK()
1012	{
1013		enableBreak = enableBreak & ~enableStack[enableIndex];
1014	}
1015
1016	void VertexProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
1017	{
1018		Int4 condition;
1019
1020		switch(control)
1021		{
1022		case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1023		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1024		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1025		case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1026		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1027		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1028		default:
1029			ASSERT(false);
1030		}
1031
1032		BREAK(condition);
1033	}
1034
1035	void VertexProgram::BREAKP(const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1036	{
1037		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1038
1039		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1040		{
1041			condition = ~condition;
1042		}
1043
1044		BREAK(condition);
1045	}
1046
1047	void VertexProgram::BREAK(Int4 &condition)
1048	{
1049		condition &= enableStack[enableIndex];
1050
1051		enableBreak = enableBreak & ~condition;
1052	}
1053
1054	void VertexProgram::CONTINUE()
1055	{
1056		enableContinue = enableContinue & ~enableStack[enableIndex];
1057	}
1058
1059	void VertexProgram::TEST()
1060	{
1061		whileTest = true;
1062	}
1063
1064	void VertexProgram::CALL(int labelIndex, int callSiteIndex)
1065	{
1066		if(!labelBlock[labelIndex])
1067		{
1068			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1069		}
1070
1071		if(callRetBlock[labelIndex].size() > 1)
1072		{
1073			callStack[stackIndex++] = UInt(callSiteIndex);
1074		}
1075
1076		Int4 restoreLeave = enableLeave;
1077
1078		Nucleus::createBr(labelBlock[labelIndex]);
1079		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1080
1081		enableLeave = restoreLeave;
1082	}
1083
1084	void VertexProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
1085	{
1086		if(src.type == Shader::PARAMETER_CONSTBOOL)
1087		{
1088			CALLNZb(labelIndex, callSiteIndex, src);
1089		}
1090		else if(src.type == Shader::PARAMETER_PREDICATE)
1091		{
1092			CALLNZp(labelIndex, callSiteIndex, src);
1093		}
1094		else ASSERT(false);
1095	}
1096
1097	void VertexProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
1098	{
1099		Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1100
1101		if(boolRegister.modifier == Shader::MODIFIER_NOT)
1102		{
1103			condition = !condition;
1104		}
1105
1106		if(!labelBlock[labelIndex])
1107		{
1108			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1109		}
1110
1111		if(callRetBlock[labelIndex].size() > 1)
1112		{
1113			callStack[stackIndex++] = UInt(callSiteIndex);
1114		}
1115
1116		Int4 restoreLeave = enableLeave;
1117
1118		branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1119		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1120
1121		enableLeave = restoreLeave;
1122	}
1123
1124	void VertexProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
1125	{
1126		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1127
1128		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1129		{
1130			condition = ~condition;
1131		}
1132
1133		condition &= enableStack[enableIndex];
1134
1135		if(!labelBlock[labelIndex])
1136		{
1137			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1138		}
1139
1140		if(callRetBlock[labelIndex].size() > 1)
1141		{
1142			callStack[stackIndex++] = UInt(callSiteIndex);
1143		}
1144
1145		enableIndex++;
1146		enableStack[enableIndex] = condition;
1147		Int4 restoreLeave = enableLeave;
1148
1149		Bool notAllFalse = SignMask(condition) != 0;
1150		branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1151		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1152
1153		enableIndex--;
1154		enableLeave = restoreLeave;
1155	}
1156
1157	void VertexProgram::ELSE()
1158	{
1159		ifDepth--;
1160
1161		BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1162		BasicBlock *endBlock = Nucleus::createBasicBlock();
1163
1164		if(isConditionalIf[ifDepth])
1165		{
1166			Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1167			Bool notAllFalse = SignMask(condition) != 0;
1168
1169			branch(notAllFalse, falseBlock, endBlock);
1170
1171			enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1172		}
1173		else
1174		{
1175			Nucleus::createBr(endBlock);
1176			Nucleus::setInsertBlock(falseBlock);
1177		}
1178
1179		ifFalseBlock[ifDepth] = endBlock;
1180
1181		ifDepth++;
1182	}
1183
1184	void VertexProgram::ENDIF()
1185	{
1186		ifDepth--;
1187
1188		BasicBlock *endBlock = ifFalseBlock[ifDepth];
1189
1190		Nucleus::createBr(endBlock);
1191		Nucleus::setInsertBlock(endBlock);
1192
1193		if(isConditionalIf[ifDepth])
1194		{
1195			enableIndex--;
1196		}
1197	}
1198
1199	void VertexProgram::ENDLOOP()
1200	{
1201		loopRepDepth--;
1202
1203		aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
1204
1205		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1206		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1207
1208		Nucleus::createBr(testBlock);
1209		Nucleus::setInsertBlock(endBlock);
1210
1211		loopDepth--;
1212		enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1213	}
1214
1215	void VertexProgram::ENDREP()
1216	{
1217		loopRepDepth--;
1218
1219		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1220		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1221
1222		Nucleus::createBr(testBlock);
1223		Nucleus::setInsertBlock(endBlock);
1224
1225		loopDepth--;
1226		enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1227	}
1228
1229	void VertexProgram::ENDWHILE()
1230	{
1231		loopRepDepth--;
1232
1233		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1234		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1235
1236		Nucleus::createBr(testBlock);
1237		Nucleus::setInsertBlock(endBlock);
1238
1239		enableIndex--;
1240		whileTest = false;
1241	}
1242
1243	void VertexProgram::ENDSWITCH()
1244	{
1245		loopRepDepth--;
1246
1247		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1248
1249		Nucleus::createBr(endBlock);
1250		Nucleus::setInsertBlock(endBlock);
1251	}
1252
1253	void VertexProgram::IF(const Src &src)
1254	{
1255		if(src.type == Shader::PARAMETER_CONSTBOOL)
1256		{
1257			IFb(src);
1258		}
1259		else if(src.type == Shader::PARAMETER_PREDICATE)
1260		{
1261			IFp(src);
1262		}
1263		else
1264		{
1265			Int4 condition = As<Int4>(fetchRegister(src).x);
1266			IF(condition);
1267		}
1268	}
1269
1270	void VertexProgram::IFb(const Src &boolRegister)
1271	{
1272		ASSERT(ifDepth < 24 + 4);
1273
1274		Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1275
1276		if(boolRegister.modifier == Shader::MODIFIER_NOT)
1277		{
1278			condition = !condition;
1279		}
1280
1281		BasicBlock *trueBlock = Nucleus::createBasicBlock();
1282		BasicBlock *falseBlock = Nucleus::createBasicBlock();
1283
1284		branch(condition, trueBlock, falseBlock);
1285
1286		isConditionalIf[ifDepth] = false;
1287		ifFalseBlock[ifDepth] = falseBlock;
1288
1289		ifDepth++;
1290	}
1291
1292	void VertexProgram::IFp(const Src &predicateRegister)
1293	{
1294		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1295
1296		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1297		{
1298			condition = ~condition;
1299		}
1300
1301		IF(condition);
1302	}
1303
1304	void VertexProgram::IFC(Vector4f &src0, Vector4f &src1, Control control)
1305	{
1306		Int4 condition;
1307
1308		switch(control)
1309		{
1310		case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1311		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1312		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1313		case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1314		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1315		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1316		default:
1317			ASSERT(false);
1318		}
1319
1320		IF(condition);
1321	}
1322
1323	void VertexProgram::IF(Int4 &condition)
1324	{
1325		condition &= enableStack[enableIndex];
1326
1327		enableIndex++;
1328		enableStack[enableIndex] = condition;
1329
1330		BasicBlock *trueBlock = Nucleus::createBasicBlock();
1331		BasicBlock *falseBlock = Nucleus::createBasicBlock();
1332
1333		Bool notAllFalse = SignMask(condition) != 0;
1334
1335		branch(notAllFalse, trueBlock, falseBlock);
1336
1337		isConditionalIf[ifDepth] = true;
1338		ifFalseBlock[ifDepth] = falseBlock;
1339
1340		ifDepth++;
1341	}
1342
1343	void VertexProgram::LABEL(int labelIndex)
1344	{
1345		if(!labelBlock[labelIndex])
1346		{
1347			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1348		}
1349
1350		Nucleus::setInsertBlock(labelBlock[labelIndex]);
1351		currentLabel = labelIndex;
1352	}
1353
1354	void VertexProgram::LOOP(const Src &integerRegister)
1355	{
1356		loopDepth++;
1357
1358		iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1359		aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
1360		increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
1361
1362		// FIXME: Compiles to two instructions?
1363		If(increment[loopDepth] == 0)
1364		{
1365			increment[loopDepth] = 1;
1366		}
1367
1368		BasicBlock *loopBlock = Nucleus::createBasicBlock();
1369		BasicBlock *testBlock = Nucleus::createBasicBlock();
1370		BasicBlock *endBlock = Nucleus::createBasicBlock();
1371
1372		loopRepTestBlock[loopRepDepth] = testBlock;
1373		loopRepEndBlock[loopRepDepth] = endBlock;
1374
1375		// FIXME: jump(testBlock)
1376		Nucleus::createBr(testBlock);
1377		Nucleus::setInsertBlock(testBlock);
1378
1379		branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1380		Nucleus::setInsertBlock(loopBlock);
1381
1382		iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1383
1384		loopRepDepth++;
1385	}
1386
1387	void VertexProgram::REP(const Src &integerRegister)
1388	{
1389		loopDepth++;
1390
1391		iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1392		aL[loopDepth] = aL[loopDepth - 1];
1393
1394		BasicBlock *loopBlock = Nucleus::createBasicBlock();
1395		BasicBlock *testBlock = Nucleus::createBasicBlock();
1396		BasicBlock *endBlock = Nucleus::createBasicBlock();
1397
1398		loopRepTestBlock[loopRepDepth] = testBlock;
1399		loopRepEndBlock[loopRepDepth] = endBlock;
1400
1401		// FIXME: jump(testBlock)
1402		Nucleus::createBr(testBlock);
1403		Nucleus::setInsertBlock(testBlock);
1404
1405		branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1406		Nucleus::setInsertBlock(loopBlock);
1407
1408		iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1409
1410		loopRepDepth++;
1411	}
1412
1413	void VertexProgram::WHILE(const Src &temporaryRegister)
1414	{
1415		enableIndex++;
1416
1417		BasicBlock *loopBlock = Nucleus::createBasicBlock();
1418		BasicBlock *testBlock = Nucleus::createBasicBlock();
1419		BasicBlock *endBlock = Nucleus::createBasicBlock();
1420
1421		loopRepTestBlock[loopRepDepth] = testBlock;
1422		loopRepEndBlock[loopRepDepth] = endBlock;
1423
1424		Int4 restoreBreak = enableBreak;
1425		Int4 restoreContinue = enableContinue;
1426
1427		// TODO: jump(testBlock)
1428		Nucleus::createBr(testBlock);
1429		Nucleus::setInsertBlock(testBlock);
1430		enableContinue = restoreContinue;
1431
1432		const Vector4f &src = fetchRegister(temporaryRegister);
1433		Int4 condition = As<Int4>(src.x);
1434		condition &= enableStack[enableIndex - 1];
1435		if(shader->containsLeaveInstruction()) condition &= enableLeave;
1436		if(shader->containsBreakInstruction()) condition &= enableBreak;
1437		enableStack[enableIndex] = condition;
1438
1439		Bool notAllFalse = SignMask(condition) != 0;
1440		branch(notAllFalse, loopBlock, endBlock);
1441
1442		Nucleus::setInsertBlock(endBlock);
1443		enableBreak = restoreBreak;
1444
1445		Nucleus::setInsertBlock(loopBlock);
1446
1447		loopRepDepth++;
1448	}
1449
1450	void VertexProgram::SWITCH()
1451	{
1452		BasicBlock *endBlock = Nucleus::createBasicBlock();
1453
1454		loopRepTestBlock[loopRepDepth] = nullptr;
1455		loopRepEndBlock[loopRepDepth] = endBlock;
1456
1457		Int4 restoreBreak = enableBreak;
1458
1459		BasicBlock *currentBlock = Nucleus::getInsertBlock();
1460
1461		Nucleus::setInsertBlock(endBlock);
1462		enableBreak = restoreBreak;
1463
1464		Nucleus::setInsertBlock(currentBlock);
1465
1466		loopRepDepth++;
1467	}
1468
1469	void VertexProgram::RET()
1470	{
1471		if(currentLabel == -1)
1472		{
1473			returnBlock = Nucleus::createBasicBlock();
1474			Nucleus::createBr(returnBlock);
1475		}
1476		else
1477		{
1478			BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1479
1480			if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1481			{
1482				// FIXME: Encapsulate
1483				UInt index = callStack[--stackIndex];
1484
1485				Value *value = index.loadValue();
1486				SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1487
1488				for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1489				{
1490					Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]);
1491				}
1492			}
1493			else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1494			{
1495				Nucleus::createBr(callRetBlock[currentLabel][0]);
1496			}
1497			else   // Function isn't called
1498			{
1499				Nucleus::createBr(unreachableBlock);
1500			}
1501
1502			Nucleus::setInsertBlock(unreachableBlock);
1503			Nucleus::createUnreachable();
1504		}
1505	}
1506
1507	void VertexProgram::LEAVE()
1508	{
1509		enableLeave = enableLeave & ~enableStack[enableIndex];
1510
1511		// FIXME: Return from function if all instances left
1512		// FIXME: Use enableLeave in other control-flow constructs
1513	}
1514
1515	void VertexProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1)
1516	{
1517		dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), (src0), Base);
1518	}
1519
1520	void VertexProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset)
1521	{
1522		dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Base, Offset});
1523	}
1524
1525	void VertexProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod)
1526	{
1527		dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod);
1528	}
1529
1530	void VertexProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod)
1531	{
1532		dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset});
1533	}
1534
1535	void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod)
1536	{
1537		dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch);
1538	}
1539
1540	void VertexProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod)
1541	{
1542		dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset});
1543	}
1544
1545	void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy)
1546	{
1547		dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, src0, Grad);
1548	}
1549
1550	void VertexProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset)
1551	{
1552		dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset});
1553	}
1554
1555	void VertexProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
1556	{
1557		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + src1.index * sizeof(Texture);
1558		dst = SamplerCore::textureSize(texture, lod);
1559	}
1560
1561	Vector4f VertexProgram::sampleTexture(const Src &s, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
1562	{
1563		Vector4f tmp;
1564
1565		if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
1566		{
1567			tmp = sampleTexture(s.index, uvwq, lod, dsx, dsy, offset, function);
1568		}
1569		else
1570		{
1571			Int index = As<Int>(Float(fetchRegister(s).x.x));
1572
1573			for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
1574			{
1575				if(shader->usesSampler(i))
1576				{
1577					If(index == i)
1578					{
1579						tmp = sampleTexture(i, uvwq, lod, dsx, dsy, offset, function);
1580						// FIXME: When the sampler states are the same, we could use one sampler and just index the texture
1581					}
1582				}
1583			}
1584		}
1585
1586		Vector4f c;
1587		c.x = tmp[(s.swizzle >> 0) & 0x3];
1588		c.y = tmp[(s.swizzle >> 2) & 0x3];
1589		c.z = tmp[(s.swizzle >> 4) & 0x3];
1590		c.w = tmp[(s.swizzle >> 6) & 0x3];
1591
1592		return c;
1593	}
1594
1595	Vector4f VertexProgram::sampleTexture(int sampler, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
1596	{
1597		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + sampler * sizeof(Texture);
1598		return SamplerCore(constants, state.sampler[sampler]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, lod, dsx, dsy, offset, function);
1599	}
1600}
1601