1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "PixelProgram.hpp"
16
17#include "SamplerCore.hpp"
18#include "Renderer/Primitive.hpp"
19#include "Renderer/Renderer.hpp"
20
21namespace sw
22{
23	extern bool postBlendSRGB;
24	extern bool booleanFaceRegister;
25	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
26	extern bool fullPixelPositionRegister;
27
28	void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
29	{
30		if(shader->getShaderModel() >= 0x0300)
31		{
32			if(shader->isVPosDeclared())
33			{
34				if(!halfIntegerCoordinates)
35				{
36					vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1);
37					vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1);
38				}
39				else
40				{
41					vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f);
42					vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f);
43				}
44
45				if(fullPixelPositionRegister)
46				{
47					vPos.z = z[0]; // FIXME: Centroid?
48					vPos.w = w;    // FIXME: Centroid?
49				}
50			}
51
52			if(shader->isVFaceDeclared())
53			{
54				Float4 area = *Pointer<Float>(primitive + OFFSET(Primitive, area));
55				Float4 face = booleanFaceRegister ? Float4(As<Float4>(CmpNLT(area, Float4(0.0f)))) : area;
56
57				vFace.x = face;
58				vFace.y = face;
59				vFace.z = face;
60				vFace.w = face;
61			}
62		}
63	}
64
65	void PixelProgram::applyShader(Int cMask[4])
66	{
67		enableIndex = 0;
68		stackIndex = 0;
69
70		if(shader->containsLeaveInstruction())
71		{
72			enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
73		}
74
75		for(int i = 0; i < RENDERTARGETS; i++)
76		{
77			if(state.targetFormat[i] != FORMAT_NULL)
78			{
79				oC[i] = Vector4f(0.0f, 0.0f, 0.0f, 0.0f);
80			}
81		}
82
83		// Create all call site return blocks up front
84		for(size_t i = 0; i < shader->getLength(); i++)
85		{
86			const Shader::Instruction *instruction = shader->getInstruction(i);
87			Shader::Opcode opcode = instruction->opcode;
88
89			if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
90			{
91				const Dst &dst = instruction->dst;
92
93				ASSERT(callRetBlock[dst.label].size() == dst.callSite);
94				callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
95			}
96		}
97
98		bool broadcastColor0 = true;
99
100		for(size_t i = 0; i < shader->getLength(); i++)
101		{
102			const Shader::Instruction *instruction = shader->getInstruction(i);
103			Shader::Opcode opcode = instruction->opcode;
104
105			if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
106			{
107				continue;
108			}
109
110			const Dst &dst = instruction->dst;
111			const Src &src0 = instruction->src[0];
112			const Src &src1 = instruction->src[1];
113			const Src &src2 = instruction->src[2];
114			const Src &src3 = instruction->src[3];
115			const Src &src4 = instruction->src[4];
116
117			bool predicate = instruction->predicate;
118			Control control = instruction->control;
119			bool pp = dst.partialPrecision;
120			bool project = instruction->project;
121			bool bias = instruction->bias;
122
123			Vector4f d;
124			Vector4f s0;
125			Vector4f s1;
126			Vector4f s2;
127			Vector4f s3;
128			Vector4f s4;
129
130			if(opcode == Shader::OPCODE_TEXKILL)   // Takes destination as input
131			{
132				if(dst.type == Shader::PARAMETER_TEXTURE)
133				{
134					d.x = v[2 + dst.index].x;
135					d.y = v[2 + dst.index].y;
136					d.z = v[2 + dst.index].z;
137					d.w = v[2 + dst.index].w;
138				}
139				else
140				{
141					d = r[dst.index];
142				}
143			}
144
145			if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
146			if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
147			if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
148			if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3);
149			if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4);
150
151			switch(opcode)
152			{
153			case Shader::OPCODE_PS_2_0:                                                    break;
154			case Shader::OPCODE_PS_2_x:                                                    break;
155			case Shader::OPCODE_PS_3_0:                                                    break;
156			case Shader::OPCODE_DEF:                                                       break;
157			case Shader::OPCODE_DCL:                                                       break;
158			case Shader::OPCODE_NOP:                                                       break;
159			case Shader::OPCODE_MOV:        mov(d, s0);                                    break;
160			case Shader::OPCODE_NEG:        neg(d, s0);                                    break;
161			case Shader::OPCODE_INEG:       ineg(d, s0);                                   break;
162			case Shader::OPCODE_F2B:        f2b(d, s0);                                    break;
163			case Shader::OPCODE_B2F:        b2f(d, s0);                                    break;
164			case Shader::OPCODE_F2I:        f2i(d, s0);                                    break;
165			case Shader::OPCODE_I2F:        i2f(d, s0);                                    break;
166			case Shader::OPCODE_F2U:        f2u(d, s0);                                    break;
167			case Shader::OPCODE_U2F:        u2f(d, s0);                                    break;
168			case Shader::OPCODE_I2B:        i2b(d, s0);                                    break;
169			case Shader::OPCODE_B2I:        b2i(d, s0);                                    break;
170			case Shader::OPCODE_ADD:        add(d, s0, s1);                                break;
171			case Shader::OPCODE_IADD:       iadd(d, s0, s1);                               break;
172			case Shader::OPCODE_SUB:        sub(d, s0, s1);                                break;
173			case Shader::OPCODE_ISUB:       isub(d, s0, s1);                               break;
174			case Shader::OPCODE_MUL:        mul(d, s0, s1);                                break;
175			case Shader::OPCODE_IMUL:       imul(d, s0, s1);                               break;
176			case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);                            break;
177			case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);                           break;
178			case Shader::OPCODE_DP1:        dp1(d, s0, s1);                                break;
179			case Shader::OPCODE_DP2:        dp2(d, s0, s1);                                break;
180			case Shader::OPCODE_DP2ADD:     dp2add(d, s0, s1, s2);                         break;
181			case Shader::OPCODE_DP3:        dp3(d, s0, s1);                                break;
182			case Shader::OPCODE_DP4:        dp4(d, s0, s1);                                break;
183			case Shader::OPCODE_DET2:       det2(d, s0, s1);                               break;
184			case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);                           break;
185			case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);                       break;
186			case Shader::OPCODE_CMP0:       cmp0(d, s0, s1, s2);                           break;
187			case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);                      break;
188			case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);                      break;
189			case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);                         break;
190			case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);                        break;
191			case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);                     break;
192			case Shader::OPCODE_FRC:        frc(d, s0);                                    break;
193			case Shader::OPCODE_TRUNC:      trunc(d, s0);                                  break;
194			case Shader::OPCODE_FLOOR:      floor(d, s0);                                  break;
195			case Shader::OPCODE_ROUND:      round(d, s0);                                  break;
196			case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);                              break;
197			case Shader::OPCODE_CEIL:       ceil(d, s0);                                   break;
198			case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);                              break;
199			case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                               break;
200			case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);                              break;
201			case Shader::OPCODE_LOG2:       log2(d, s0, pp);                               break;
202			case Shader::OPCODE_EXP:        exp(d, s0, pp);                                break;
203			case Shader::OPCODE_LOG:        log(d, s0, pp);                                break;
204			case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                               break;
205			case Shader::OPCODE_DIV:        div(d, s0, s1);                                break;
206			case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                               break;
207			case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                               break;
208			case Shader::OPCODE_MOD:        mod(d, s0, s1);                                break;
209			case Shader::OPCODE_IMOD:       imod(d, s0, s1);                               break;
210			case Shader::OPCODE_UMOD:       umod(d, s0, s1);                               break;
211			case Shader::OPCODE_SHL:        shl(d, s0, s1);                                break;
212			case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                               break;
213			case Shader::OPCODE_USHR:       ushr(d, s0, s1);                               break;
214			case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                               break;
215			case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                               break;
216			case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                                break;
217			case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);                             break;
218			case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);                             break;
219			case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);                             break;
220			case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);                        break;
221			case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);                        break;
222			case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);                        break;
223			case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);                        break;
224			case Shader::OPCODE_MIN:        min(d, s0, s1);                                break;
225			case Shader::OPCODE_IMIN:       imin(d, s0, s1);                               break;
226			case Shader::OPCODE_UMIN:       umin(d, s0, s1);                               break;
227			case Shader::OPCODE_MAX:        max(d, s0, s1);                                break;
228			case Shader::OPCODE_IMAX:       imax(d, s0, s1);                               break;
229			case Shader::OPCODE_UMAX:       umax(d, s0, s1);                               break;
230			case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);                            break;
231			case Shader::OPCODE_STEP:       step(d, s0, s1);                               break;
232			case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);                         break;
233			case Shader::OPCODE_ISINF:      isinf(d, s0);                                  break;
234			case Shader::OPCODE_ISNAN:      isnan(d, s0);                                  break;
235			case Shader::OPCODE_FLOATBITSTOINT:
236			case Shader::OPCODE_FLOATBITSTOUINT:
237			case Shader::OPCODE_INTBITSTOFLOAT:
238			case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                                   break;
239			case Shader::OPCODE_PACKSNORM2x16:   packSnorm2x16(d, s0);                     break;
240			case Shader::OPCODE_PACKUNORM2x16:   packUnorm2x16(d, s0);                     break;
241			case Shader::OPCODE_PACKHALF2x16:    packHalf2x16(d, s0);                      break;
242			case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0);                   break;
243			case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0);                   break;
244			case Shader::OPCODE_UNPACKHALF2x16:  unpackHalf2x16(d, s0);                    break;
245			case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);                           break;
246			case Shader::OPCODE_POW:        pow(d, s0, s1, pp);                            break;
247			case Shader::OPCODE_SGN:        sgn(d, s0);                                    break;
248			case Shader::OPCODE_ISGN:       isgn(d, s0);                                   break;
249			case Shader::OPCODE_CRS:        crs(d, s0, s1);                                break;
250			case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);                       break;
251			case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);                       break;
252			case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);                       break;
253			case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);                       break;
254			case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);                           break;
255			case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);                           break;
256			case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);                           break;
257			case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);                           break;
258			case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);                     break;
259			case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);                     break;
260			case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);                     break;
261			case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);                     break;
262			case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                               break;
263			case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                               break;
264			case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                               break;
265			case Shader::OPCODE_ABS:        abs(d, s0);                                    break;
266			case Shader::OPCODE_IABS:       iabs(d, s0);                                   break;
267			case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);                             break;
268			case Shader::OPCODE_COS:        cos(d, s0, pp);                                break;
269			case Shader::OPCODE_SIN:        sin(d, s0, pp);                                break;
270			case Shader::OPCODE_TAN:        tan(d, s0, pp);                                break;
271			case Shader::OPCODE_ACOS:       acos(d, s0, pp);                               break;
272			case Shader::OPCODE_ASIN:       asin(d, s0, pp);                               break;
273			case Shader::OPCODE_ATAN:       atan(d, s0, pp);                               break;
274			case Shader::OPCODE_ATAN2:      atan2(d, s0, s1, pp);                          break;
275			case Shader::OPCODE_COSH:       cosh(d, s0, pp);                               break;
276			case Shader::OPCODE_SINH:       sinh(d, s0, pp);                               break;
277			case Shader::OPCODE_TANH:       tanh(d, s0, pp);                               break;
278			case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);                              break;
279			case Shader::OPCODE_ASINH:      asinh(d, s0, pp);                              break;
280			case Shader::OPCODE_ATANH:      atanh(d, s0, pp);                              break;
281			case Shader::OPCODE_M4X4:       M4X4(d, s0, src1);                             break;
282			case Shader::OPCODE_M4X3:       M4X3(d, s0, src1);                             break;
283			case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);                             break;
284			case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);                             break;
285			case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);                             break;
286			case Shader::OPCODE_TEX:        TEX(d, s0, src1, project, bias);               break;
287			case Shader::OPCODE_TEXLDD:     TEXGRAD(d, s0, src1, s2, s3);                  break;
288			case Shader::OPCODE_TEXLDL:     TEXLOD(d, s0, src1, s0.w);                     break;
289			case Shader::OPCODE_TEXLOD:     TEXLOD(d, s0, src1, s2.x);                     break;
290			case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);                        break;
291			case Shader::OPCODE_TEXKILL:    TEXKILL(cMask, d, dst.mask);                   break;
292			case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2);                    break;
293			case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x);         break;
294			case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x);                 break;
295			case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break;
296			case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);                  break;
297			case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4);     break;
298			case Shader::OPCODE_TEXBIAS:    TEXBIAS(d, s0, src1, s2.x);                    break;
299			case Shader::OPCODE_TEXOFFSETBIAS: TEXOFFSETBIAS(d, s0, src1, s2, s3.x);       break;
300			case Shader::OPCODE_DISCARD:    DISCARD(cMask, instruction);                   break;
301			case Shader::OPCODE_DFDX:       DFDX(d, s0);                                   break;
302			case Shader::OPCODE_DFDY:       DFDY(d, s0);                                   break;
303			case Shader::OPCODE_FWIDTH:     FWIDTH(d, s0);                                 break;
304			case Shader::OPCODE_BREAK:      BREAK();                                       break;
305			case Shader::OPCODE_BREAKC:     BREAKC(s0, s1, control);                       break;
306			case Shader::OPCODE_BREAKP:     BREAKP(src0);                                  break;
307			case Shader::OPCODE_CONTINUE:   CONTINUE();                                    break;
308			case Shader::OPCODE_TEST:       TEST();                                        break;
309			case Shader::OPCODE_CALL:       CALL(dst.label, dst.callSite);                 break;
310			case Shader::OPCODE_CALLNZ:     CALLNZ(dst.label, dst.callSite, src0);         break;
311			case Shader::OPCODE_ELSE:       ELSE();                                        break;
312			case Shader::OPCODE_ENDIF:      ENDIF();                                       break;
313			case Shader::OPCODE_ENDLOOP:    ENDLOOP();                                     break;
314			case Shader::OPCODE_ENDREP:     ENDREP();                                      break;
315			case Shader::OPCODE_ENDWHILE:   ENDWHILE();                                    break;
316			case Shader::OPCODE_ENDSWITCH:  ENDSWITCH();                                   break;
317			case Shader::OPCODE_IF:         IF(src0);                                      break;
318			case Shader::OPCODE_IFC:        IFC(s0, s1, control);                          break;
319			case Shader::OPCODE_LABEL:      LABEL(dst.index);                              break;
320			case Shader::OPCODE_LOOP:       LOOP(src1);                                    break;
321			case Shader::OPCODE_REP:        REP(src0);                                     break;
322			case Shader::OPCODE_WHILE:      WHILE(src0);                                   break;
323			case Shader::OPCODE_SWITCH:     SWITCH();                                      break;
324			case Shader::OPCODE_RET:        RET();                                         break;
325			case Shader::OPCODE_LEAVE:      LEAVE();                                       break;
326			case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);                       break;
327			case Shader::OPCODE_ALL:        all(d.x, s0);                                  break;
328			case Shader::OPCODE_ANY:        any(d.x, s0);                                  break;
329			case Shader::OPCODE_NOT:        bitwise_not(d, s0);                            break;
330			case Shader::OPCODE_OR:         bitwise_or(d, s0, s1);                         break;
331			case Shader::OPCODE_XOR:        bitwise_xor(d, s0, s1);                        break;
332			case Shader::OPCODE_AND:        bitwise_and(d, s0, s1);                        break;
333			case Shader::OPCODE_EQ:         equal(d, s0, s1);                              break;
334			case Shader::OPCODE_NE:         notEqual(d, s0, s1);                           break;
335			case Shader::OPCODE_END:                                                       break;
336			default:
337				ASSERT(false);
338			}
339
340			if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP)
341			{
342				if(dst.saturate)
343				{
344					if(dst.x) d.x = Max(d.x, Float4(0.0f));
345					if(dst.y) d.y = Max(d.y, Float4(0.0f));
346					if(dst.z) d.z = Max(d.z, Float4(0.0f));
347					if(dst.w) d.w = Max(d.w, Float4(0.0f));
348
349					if(dst.x) d.x = Min(d.x, Float4(1.0f));
350					if(dst.y) d.y = Min(d.y, Float4(1.0f));
351					if(dst.z) d.z = Min(d.z, Float4(1.0f));
352					if(dst.w) d.w = Min(d.w, Float4(1.0f));
353				}
354
355				if(instruction->isPredicated())
356				{
357					Vector4f pDst;   // FIXME: Rename
358
359					switch(dst.type)
360					{
361					case Shader::PARAMETER_TEMP:
362						if(dst.rel.type == Shader::PARAMETER_VOID)
363						{
364							if(dst.x) pDst.x = r[dst.index].x;
365							if(dst.y) pDst.y = r[dst.index].y;
366							if(dst.z) pDst.z = r[dst.index].z;
367							if(dst.w) pDst.w = r[dst.index].w;
368						}
369						else
370						{
371							Int a = relativeAddress(dst);
372
373							if(dst.x) pDst.x = r[dst.index + a].x;
374							if(dst.y) pDst.y = r[dst.index + a].y;
375							if(dst.z) pDst.z = r[dst.index + a].z;
376							if(dst.w) pDst.w = r[dst.index + a].w;
377						}
378						break;
379					case Shader::PARAMETER_COLOROUT:
380						if(dst.rel.type == Shader::PARAMETER_VOID)
381						{
382							if(dst.x) pDst.x = oC[dst.index].x;
383							if(dst.y) pDst.y = oC[dst.index].y;
384							if(dst.z) pDst.z = oC[dst.index].z;
385							if(dst.w) pDst.w = oC[dst.index].w;
386						}
387						else
388						{
389							Int a = relativeAddress(dst) + dst.index;
390
391							if(dst.x) pDst.x = oC[a].x;
392							if(dst.y) pDst.y = oC[a].y;
393							if(dst.z) pDst.z = oC[a].z;
394							if(dst.w) pDst.w = oC[a].w;
395						}
396						break;
397					case Shader::PARAMETER_PREDICATE:
398						if(dst.x) pDst.x = p0.x;
399						if(dst.y) pDst.y = p0.y;
400						if(dst.z) pDst.z = p0.z;
401						if(dst.w) pDst.w = p0.w;
402						break;
403					case Shader::PARAMETER_DEPTHOUT:
404						pDst.x = oDepth;
405						break;
406					default:
407						ASSERT(false);
408					}
409
410					Int4 enable = enableMask(instruction);
411
412					Int4 xEnable = enable;
413					Int4 yEnable = enable;
414					Int4 zEnable = enable;
415					Int4 wEnable = enable;
416
417					if(predicate)
418					{
419						unsigned char pSwizzle = instruction->predicateSwizzle;
420
421						Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
422						Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
423						Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
424						Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
425
426						if(!instruction->predicateNot)
427						{
428							if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
429							if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
430							if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
431							if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
432						}
433						else
434						{
435							if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
436							if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
437							if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
438							if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
439						}
440					}
441
442					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
443					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
444					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
445					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
446
447					if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
448					if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
449					if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
450					if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
451				}
452
453				switch(dst.type)
454				{
455				case Shader::PARAMETER_TEMP:
456					if(dst.rel.type == Shader::PARAMETER_VOID)
457					{
458						if(dst.x) r[dst.index].x = d.x;
459						if(dst.y) r[dst.index].y = d.y;
460						if(dst.z) r[dst.index].z = d.z;
461						if(dst.w) r[dst.index].w = d.w;
462					}
463					else
464					{
465						Int a = relativeAddress(dst);
466
467						if(dst.x) r[dst.index + a].x = d.x;
468						if(dst.y) r[dst.index + a].y = d.y;
469						if(dst.z) r[dst.index + a].z = d.z;
470						if(dst.w) r[dst.index + a].w = d.w;
471					}
472					break;
473				case Shader::PARAMETER_COLOROUT:
474					if(dst.rel.type == Shader::PARAMETER_VOID)
475					{
476						broadcastColor0 = (dst.index == 0) && broadcastColor0;
477
478						if(dst.x) { oC[dst.index].x = d.x; }
479						if(dst.y) { oC[dst.index].y = d.y; }
480						if(dst.z) { oC[dst.index].z = d.z; }
481						if(dst.w) { oC[dst.index].w = d.w; }
482					}
483					else
484					{
485						broadcastColor0 = false;
486						Int a = relativeAddress(dst) + dst.index;
487
488						if(dst.x) { oC[a].x = d.x; }
489						if(dst.y) { oC[a].y = d.y; }
490						if(dst.z) { oC[a].z = d.z; }
491						if(dst.w) { oC[a].w = d.w; }
492					}
493					break;
494				case Shader::PARAMETER_PREDICATE:
495					if(dst.x) p0.x = d.x;
496					if(dst.y) p0.y = d.y;
497					if(dst.z) p0.z = d.z;
498					if(dst.w) p0.w = d.w;
499					break;
500				case Shader::PARAMETER_DEPTHOUT:
501					oDepth = d.x;
502					break;
503				default:
504					ASSERT(false);
505				}
506			}
507		}
508
509		if(currentLabel != -1)
510		{
511			Nucleus::setInsertBlock(returnBlock);
512		}
513
514		if(broadcastColor0)
515		{
516			for(int i = 0; i < RENDERTARGETS; i++)
517			{
518				c[i] = oC[0];
519			}
520		}
521		else
522		{
523			for(int i = 0; i < RENDERTARGETS; i++)
524			{
525				c[i] = oC[i];
526			}
527		}
528
529		clampColor(c);
530
531		if(state.depthOverride)
532		{
533			oDepth = Min(Max(oDepth, Float4(0.0f)), Float4(1.0f));
534		}
535	}
536
537	Bool PixelProgram::alphaTest(Int cMask[4])
538	{
539		if(!state.alphaTestActive())
540		{
541			return true;
542		}
543
544		Int aMask;
545
546		if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
547		{
548			Short4 alpha = RoundShort4(c[0].w * Float4(0x1000));
549
550			PixelRoutine::alphaTest(aMask, alpha);
551
552			for(unsigned int q = 0; q < state.multiSample; q++)
553			{
554				cMask[q] &= aMask;
555			}
556		}
557		else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
558		{
559			alphaToCoverage(cMask, c[0].w);
560		}
561		else ASSERT(false);
562
563		Int pass = cMask[0];
564
565		for(unsigned int q = 1; q < state.multiSample; q++)
566		{
567			pass = pass | cMask[q];
568		}
569
570		return pass != 0x0;
571	}
572
573	void PixelProgram::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
574	{
575		for(int index = 0; index < RENDERTARGETS; index++)
576		{
577			if(!state.colorWriteActive(index))
578			{
579				continue;
580			}
581
582			if(!postBlendSRGB && state.writeSRGB && !isSRGB(index))
583			{
584				c[index].x = linearToSRGB(c[index].x);
585				c[index].y = linearToSRGB(c[index].y);
586				c[index].z = linearToSRGB(c[index].z);
587			}
588
589			if(index == 0)
590			{
591				fogBlend(c[index], fog);
592			}
593
594			switch(state.targetFormat[index])
595			{
596			case FORMAT_R5G6B5:
597			case FORMAT_X8R8G8B8:
598			case FORMAT_X8B8G8R8:
599			case FORMAT_A8R8G8B8:
600			case FORMAT_A8B8G8R8:
601			case FORMAT_SRGB8_X8:
602			case FORMAT_SRGB8_A8:
603			case FORMAT_G8R8:
604			case FORMAT_R8:
605			case FORMAT_A8:
606			case FORMAT_G16R16:
607			case FORMAT_A16B16G16R16:
608				for(unsigned int q = 0; q < state.multiSample; q++)
609				{
610					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
611					Vector4s color;
612
613					if(state.targetFormat[index] == FORMAT_R5G6B5)
614					{
615						color.x = UShort4(c[index].x * Float4(0xFBFF), false);
616						color.y = UShort4(c[index].y * Float4(0xFDFF), false);
617						color.z = UShort4(c[index].z * Float4(0xFBFF), false);
618						color.w = UShort4(c[index].w * Float4(0xFFFF), false);
619					}
620					else
621					{
622						color.x = convertFixed16(c[index].x, false);
623						color.y = convertFixed16(c[index].y, false);
624						color.z = convertFixed16(c[index].z, false);
625						color.w = convertFixed16(c[index].w, false);
626					}
627
628					if(state.multiSampleMask & (1 << q))
629					{
630						alphaBlend(index, buffer, color, x);
631						logicOperation(index, buffer, color, x);
632						writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
633					}
634				}
635				break;
636			case FORMAT_R32F:
637			case FORMAT_G32R32F:
638			case FORMAT_X32B32G32R32F:
639			case FORMAT_A32B32G32R32F:
640			case FORMAT_X32B32G32R32F_UNSIGNED:
641			case FORMAT_R32I:
642			case FORMAT_G32R32I:
643			case FORMAT_A32B32G32R32I:
644			case FORMAT_R32UI:
645			case FORMAT_G32R32UI:
646			case FORMAT_A32B32G32R32UI:
647			case FORMAT_R16I:
648			case FORMAT_G16R16I:
649			case FORMAT_A16B16G16R16I:
650			case FORMAT_R16UI:
651			case FORMAT_G16R16UI:
652			case FORMAT_A16B16G16R16UI:
653			case FORMAT_R8I:
654			case FORMAT_G8R8I:
655			case FORMAT_A8B8G8R8I:
656			case FORMAT_R8UI:
657			case FORMAT_G8R8UI:
658			case FORMAT_A8B8G8R8UI:
659				for(unsigned int q = 0; q < state.multiSample; q++)
660				{
661					Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
662					Vector4f color = c[index];
663
664					if(state.multiSampleMask & (1 << q))
665					{
666						alphaBlend(index, buffer, color, x);
667						writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
668					}
669				}
670				break;
671			default:
672				ASSERT(false);
673			}
674		}
675	}
676
677	Vector4f PixelProgram::sampleTexture(const Src &sampler, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
678	{
679		Vector4f tmp;
680
681		if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID)
682		{
683			tmp = sampleTexture(sampler.index, uvwq, bias, dsx, dsy, offset, function);
684		}
685		else
686		{
687			Int index = As<Int>(Float(fetchRegister(sampler).x.x));
688
689			for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
690			{
691				if(shader->usesSampler(i))
692				{
693					If(index == i)
694					{
695						tmp = sampleTexture(i, uvwq, bias, dsx, dsy, offset, function);
696						// FIXME: When the sampler states are the same, we could use one sampler and just index the texture
697					}
698				}
699			}
700		}
701
702		Vector4f c;
703		c.x = tmp[(sampler.swizzle >> 0) & 0x3];
704		c.y = tmp[(sampler.swizzle >> 2) & 0x3];
705		c.z = tmp[(sampler.swizzle >> 4) & 0x3];
706		c.w = tmp[(sampler.swizzle >> 6) & 0x3];
707
708		return c;
709	}
710
711	Vector4f PixelProgram::sampleTexture(int samplerIndex, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
712	{
713		#if PERF_PROFILE
714			Long texTime = Ticks();
715		#endif
716
717		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + samplerIndex * sizeof(Texture);
718		Vector4f c = SamplerCore(constants, state.sampler[samplerIndex]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, bias, dsx, dsy, offset, function);
719
720		#if PERF_PROFILE
721			cycles[PERF_TEX] += Ticks() - texTime;
722		#endif
723
724		return c;
725	}
726
727	void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS])
728	{
729		for(int index = 0; index < RENDERTARGETS; index++)
730		{
731			if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive()))
732			{
733				continue;
734			}
735
736			switch(state.targetFormat[index])
737			{
738			case FORMAT_NULL:
739				break;
740			case FORMAT_R5G6B5:
741			case FORMAT_A8R8G8B8:
742			case FORMAT_A8B8G8R8:
743			case FORMAT_X8R8G8B8:
744			case FORMAT_X8B8G8R8:
745			case FORMAT_SRGB8_X8:
746			case FORMAT_SRGB8_A8:
747			case FORMAT_G8R8:
748			case FORMAT_R8:
749			case FORMAT_A8:
750			case FORMAT_G16R16:
751			case FORMAT_A16B16G16R16:
752				oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
753				oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
754				oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
755				oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
756				break;
757			case FORMAT_R32F:
758			case FORMAT_G32R32F:
759			case FORMAT_X32B32G32R32F:
760			case FORMAT_A32B32G32R32F:
761			case FORMAT_R32I:
762			case FORMAT_G32R32I:
763			case FORMAT_A32B32G32R32I:
764			case FORMAT_R32UI:
765			case FORMAT_G32R32UI:
766			case FORMAT_A32B32G32R32UI:
767			case FORMAT_R16I:
768			case FORMAT_G16R16I:
769			case FORMAT_A16B16G16R16I:
770			case FORMAT_R16UI:
771			case FORMAT_G16R16UI:
772			case FORMAT_A16B16G16R16UI:
773			case FORMAT_R8I:
774			case FORMAT_G8R8I:
775			case FORMAT_A8B8G8R8I:
776			case FORMAT_R8UI:
777			case FORMAT_G8R8UI:
778			case FORMAT_A8B8G8R8UI:
779				break;
780			case FORMAT_X32B32G32R32F_UNSIGNED:
781				oC[index].x = Max(oC[index].x, Float4(0.0f));
782				oC[index].y = Max(oC[index].y, Float4(0.0f));
783				oC[index].z = Max(oC[index].z, Float4(0.0f));
784				oC[index].w = Max(oC[index].w, Float4(0.0f));
785				break;
786			default:
787				ASSERT(false);
788			}
789		}
790	}
791
792	Int4 PixelProgram::enableMask(const Shader::Instruction *instruction)
793	{
794		Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF);
795
796		if(!whileTest)
797		{
798			if(shader->containsBreakInstruction() && instruction->analysisBreak)
799			{
800				enable &= enableBreak;
801			}
802
803			if(shader->containsContinueInstruction() && instruction->analysisContinue)
804			{
805				enable &= enableContinue;
806			}
807
808			if(shader->containsLeaveInstruction() && instruction->analysisLeave)
809			{
810				enable &= enableLeave;
811			}
812		}
813
814		return enable;
815	}
816
817	Vector4f PixelProgram::fetchRegister(const Src &src, unsigned int offset)
818	{
819		Vector4f reg;
820		unsigned int i = src.index + offset;
821
822		switch(src.type)
823		{
824		case Shader::PARAMETER_TEMP:
825			if(src.rel.type == Shader::PARAMETER_VOID)
826			{
827				reg = r[i];
828			}
829			else
830			{
831				Int a = relativeAddress(src, src.bufferIndex);
832
833				reg = r[i + a];
834			}
835			break;
836		case Shader::PARAMETER_INPUT:
837			{
838				if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
839				{
840					reg = v[i];
841				}
842				else
843				{
844					Int a = relativeAddress(src, src.bufferIndex);
845
846					reg = v[i + a];
847				}
848			}
849			break;
850		case Shader::PARAMETER_CONST:
851			reg = readConstant(src, offset);
852			break;
853		case Shader::PARAMETER_TEXTURE:
854			reg = v[2 + i];
855			break;
856		case Shader::PARAMETER_MISCTYPE:
857			if(src.index == Shader::VPosIndex) reg = vPos;
858			if(src.index == Shader::VFaceIndex) reg = vFace;
859			break;
860		case Shader::PARAMETER_SAMPLER:
861			if(src.rel.type == Shader::PARAMETER_VOID)
862			{
863				reg.x = As<Float4>(Int4(i));
864			}
865			else if(src.rel.type == Shader::PARAMETER_TEMP)
866			{
867				reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
868			}
869			return reg;
870		case Shader::PARAMETER_PREDICATE:   return reg; // Dummy
871		case Shader::PARAMETER_VOID:        return reg; // Dummy
872		case Shader::PARAMETER_FLOAT4LITERAL:
873			reg.x = Float4(src.value[0]);
874			reg.y = Float4(src.value[1]);
875			reg.z = Float4(src.value[2]);
876			reg.w = Float4(src.value[3]);
877			break;
878		case Shader::PARAMETER_CONSTINT:    return reg; // Dummy
879		case Shader::PARAMETER_CONSTBOOL:   return reg; // Dummy
880		case Shader::PARAMETER_LOOP:        return reg; // Dummy
881		case Shader::PARAMETER_COLOROUT:
882			if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
883			{
884				reg = oC[i];
885			}
886			else
887			{
888				Int a = relativeAddress(src, src.bufferIndex);
889
890				reg = oC[i + a];
891			}
892			break;
893		case Shader::PARAMETER_DEPTHOUT:
894			reg.x = oDepth;
895			break;
896		default:
897			ASSERT(false);
898		}
899
900		const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
901		const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
902		const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
903		const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
904
905		Vector4f mod;
906
907		switch(src.modifier)
908		{
909		case Shader::MODIFIER_NONE:
910			mod.x = x;
911			mod.y = y;
912			mod.z = z;
913			mod.w = w;
914			break;
915		case Shader::MODIFIER_NEGATE:
916			mod.x = -x;
917			mod.y = -y;
918			mod.z = -z;
919			mod.w = -w;
920			break;
921		case Shader::MODIFIER_ABS:
922			mod.x = Abs(x);
923			mod.y = Abs(y);
924			mod.z = Abs(z);
925			mod.w = Abs(w);
926			break;
927		case Shader::MODIFIER_ABS_NEGATE:
928			mod.x = -Abs(x);
929			mod.y = -Abs(y);
930			mod.z = -Abs(z);
931			mod.w = -Abs(w);
932			break;
933		case Shader::MODIFIER_NOT:
934			mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
935			mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
936			mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
937			mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
938			break;
939		default:
940			ASSERT(false);
941		}
942
943		return mod;
944	}
945
946	RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index)
947	{
948		if(bufferIndex == -1)
949		{
950			return data + OFFSET(DrawData, ps.c[index]);
951		}
952		else
953		{
954			return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, ps.u[bufferIndex])) + index;
955		}
956	}
957
958	RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset)
959	{
960		return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
961	}
962
963	Vector4f PixelProgram::readConstant(const Src &src, unsigned int offset)
964	{
965		Vector4f c;
966		unsigned int i = src.index + offset;
967
968		if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
969		{
970			c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i));
971
972			c.x = c.x.xxxx;
973			c.y = c.y.yyyy;
974			c.z = c.z.zzzz;
975			c.w = c.w.wwww;
976
977			if(shader->containsDefineInstruction())   // Constant may be known at compile time
978			{
979				for(size_t j = 0; j < shader->getLength(); j++)
980				{
981					const Shader::Instruction &instruction = *shader->getInstruction(j);
982
983					if(instruction.opcode == Shader::OPCODE_DEF)
984					{
985						if(instruction.dst.index == i)
986						{
987							c.x = Float4(instruction.src[0].value[0]);
988							c.y = Float4(instruction.src[0].value[1]);
989							c.z = Float4(instruction.src[0].value[2]);
990							c.w = Float4(instruction.src[0].value[3]);
991
992							break;
993						}
994					}
995				}
996			}
997		}
998		else if(src.rel.type == Shader::PARAMETER_LOOP)
999		{
1000			Int loopCounter = aL[loopDepth];
1001
1002			c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter));
1003
1004			c.x = c.x.xxxx;
1005			c.y = c.y.yyyy;
1006			c.z = c.z.zzzz;
1007			c.w = c.w.wwww;
1008		}
1009		else
1010		{
1011			Int a = relativeAddress(src, src.bufferIndex);
1012
1013			c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
1014
1015			c.x = c.x.xxxx;
1016			c.y = c.y.yyyy;
1017			c.z = c.z.zzzz;
1018			c.w = c.w.wwww;
1019		}
1020
1021		return c;
1022	}
1023
1024	Int PixelProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex)
1025	{
1026		ASSERT(var.rel.deterministic);
1027
1028		if(var.rel.type == Shader::PARAMETER_TEMP)
1029		{
1030			return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
1031		}
1032		else if(var.rel.type == Shader::PARAMETER_INPUT)
1033		{
1034			return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
1035		}
1036		else if(var.rel.type == Shader::PARAMETER_OUTPUT)
1037		{
1038			return As<Int>(Extract(oC[var.rel.index].x, 0)) * var.rel.scale;
1039		}
1040		else if(var.rel.type == Shader::PARAMETER_CONST)
1041		{
1042			return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale;
1043		}
1044		else if(var.rel.type == Shader::PARAMETER_LOOP)
1045		{
1046			return aL[loopDepth];
1047		}
1048		else ASSERT(false);
1049
1050		return 0;
1051	}
1052
1053	Float4 PixelProgram::linearToSRGB(const Float4 &x)   // Approximates x^(1.0/2.2)
1054	{
1055		Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
1056		Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
1057
1058		return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
1059	}
1060
1061	void PixelProgram::M3X2(Vector4f &dst, Vector4f &src0, const Src &src1)
1062	{
1063		Vector4f row0 = fetchRegister(src1, 0);
1064		Vector4f row1 = fetchRegister(src1, 1);
1065
1066		dst.x = dot3(src0, row0);
1067		dst.y = dot3(src0, row1);
1068	}
1069
1070	void PixelProgram::M3X3(Vector4f &dst, Vector4f &src0, const Src &src1)
1071	{
1072		Vector4f row0 = fetchRegister(src1, 0);
1073		Vector4f row1 = fetchRegister(src1, 1);
1074		Vector4f row2 = fetchRegister(src1, 2);
1075
1076		dst.x = dot3(src0, row0);
1077		dst.y = dot3(src0, row1);
1078		dst.z = dot3(src0, row2);
1079	}
1080
1081	void PixelProgram::M3X4(Vector4f &dst, Vector4f &src0, const Src &src1)
1082	{
1083		Vector4f row0 = fetchRegister(src1, 0);
1084		Vector4f row1 = fetchRegister(src1, 1);
1085		Vector4f row2 = fetchRegister(src1, 2);
1086		Vector4f row3 = fetchRegister(src1, 3);
1087
1088		dst.x = dot3(src0, row0);
1089		dst.y = dot3(src0, row1);
1090		dst.z = dot3(src0, row2);
1091		dst.w = dot3(src0, row3);
1092	}
1093
1094	void PixelProgram::M4X3(Vector4f &dst, Vector4f &src0, const Src &src1)
1095	{
1096		Vector4f row0 = fetchRegister(src1, 0);
1097		Vector4f row1 = fetchRegister(src1, 1);
1098		Vector4f row2 = fetchRegister(src1, 2);
1099
1100		dst.x = dot4(src0, row0);
1101		dst.y = dot4(src0, row1);
1102		dst.z = dot4(src0, row2);
1103	}
1104
1105	void PixelProgram::M4X4(Vector4f &dst, Vector4f &src0, const Src &src1)
1106	{
1107		Vector4f row0 = fetchRegister(src1, 0);
1108		Vector4f row1 = fetchRegister(src1, 1);
1109		Vector4f row2 = fetchRegister(src1, 2);
1110		Vector4f row3 = fetchRegister(src1, 3);
1111
1112		dst.x = dot4(src0, row0);
1113		dst.y = dot4(src0, row1);
1114		dst.z = dot4(src0, row2);
1115		dst.w = dot4(src0, row3);
1116	}
1117
1118	void PixelProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
1119	{
1120		if(project)
1121		{
1122			Vector4f proj;
1123			Float4 rw = reciprocal(src0.w);
1124			proj.x = src0.x * rw;
1125			proj.y = src0.y * rw;
1126			proj.z = src0.z * rw;
1127
1128			dst = sampleTexture(src1, proj, src0.x, (src0), (src0), (src0), Implicit);
1129		}
1130		else
1131		{
1132			dst = sampleTexture(src1, src0, src0.x, (src0), (src0), (src0), bias ? Bias : Implicit);
1133		}
1134	}
1135
1136	void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset)
1137	{
1138		dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Implicit, Offset});
1139	}
1140
1141	void PixelProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &lod)
1142	{
1143		dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset});
1144	}
1145
1146	void PixelProgram::TEXBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &bias)
1147	{
1148		dst = sampleTexture(src1, src0, bias, (src0), (src0), (src0), Bias);
1149	}
1150
1151	void PixelProgram::TEXOFFSETBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &bias)
1152	{
1153		dst = sampleTexture(src1, src0, bias, (src0), (src0), offset, {Bias, Offset});
1154	}
1155
1156	void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod)
1157	{
1158		dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch);
1159	}
1160
1161	void PixelProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod)
1162	{
1163		dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset});
1164	}
1165
1166	void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy)
1167	{
1168		dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, (src0), Grad);
1169	}
1170
1171	void PixelProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset)
1172	{
1173		dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset});
1174	}
1175
1176	void PixelProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &lod)
1177	{
1178		dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod);
1179	}
1180
1181	void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
1182	{
1183		Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + src1.index * sizeof(Texture);
1184		dst = SamplerCore::textureSize(texture, lod);
1185	}
1186
1187	void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask)
1188	{
1189		Int kill = -1;
1190
1191		if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f)));
1192		if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f)));
1193		if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f)));
1194		if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f)));
1195
1196		// FIXME: Dynamic branching affects TEXKILL?
1197		//	if(shader->containsDynamicBranching())
1198		//	{
1199		//		kill = ~SignMask(enableMask());
1200		//	}
1201
1202		for(unsigned int q = 0; q < state.multiSample; q++)
1203		{
1204			cMask[q] &= kill;
1205		}
1206
1207		// FIXME: Branch to end of shader if all killed?
1208	}
1209
1210	void PixelProgram::DISCARD(Int cMask[4], const Shader::Instruction *instruction)
1211	{
1212		Int kill = 0;
1213
1214		if(shader->containsDynamicBranching())
1215		{
1216			kill = ~SignMask(enableMask(instruction));
1217		}
1218
1219		for(unsigned int q = 0; q < state.multiSample; q++)
1220		{
1221			cMask[q] &= kill;
1222		}
1223
1224		// FIXME: Branch to end of shader if all killed?
1225	}
1226
1227	void PixelProgram::DFDX(Vector4f &dst, Vector4f &src)
1228	{
1229		dst.x = src.x.yyww - src.x.xxzz;
1230		dst.y = src.y.yyww - src.y.xxzz;
1231		dst.z = src.z.yyww - src.z.xxzz;
1232		dst.w = src.w.yyww - src.w.xxzz;
1233	}
1234
1235	void PixelProgram::DFDY(Vector4f &dst, Vector4f &src)
1236	{
1237		dst.x = src.x.zwzw - src.x.xyxy;
1238		dst.y = src.y.zwzw - src.y.xyxy;
1239		dst.z = src.z.zwzw - src.z.xyxy;
1240		dst.w = src.w.zwzw - src.w.xyxy;
1241	}
1242
1243	void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src)
1244	{
1245		// abs(dFdx(src)) + abs(dFdy(src));
1246		dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy);
1247		dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy);
1248		dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy);
1249		dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy);
1250	}
1251
1252	void PixelProgram::BREAK()
1253	{
1254		enableBreak = enableBreak & ~enableStack[enableIndex];
1255	}
1256
1257	void PixelProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
1258	{
1259		Int4 condition;
1260
1261		switch(control)
1262		{
1263		case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1264		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1265		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1266		case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1267		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1268		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1269		default:
1270			ASSERT(false);
1271		}
1272
1273		BREAK(condition);
1274	}
1275
1276	void PixelProgram::BREAKP(const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1277	{
1278		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1279
1280		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1281		{
1282			condition = ~condition;
1283		}
1284
1285		BREAK(condition);
1286	}
1287
1288	void PixelProgram::BREAK(Int4 &condition)
1289	{
1290		condition &= enableStack[enableIndex];
1291
1292		enableBreak = enableBreak & ~condition;
1293	}
1294
1295	void PixelProgram::CONTINUE()
1296	{
1297		enableContinue = enableContinue & ~enableStack[enableIndex];
1298	}
1299
1300	void PixelProgram::TEST()
1301	{
1302		whileTest = true;
1303	}
1304
1305	void PixelProgram::CALL(int labelIndex, int callSiteIndex)
1306	{
1307		if(!labelBlock[labelIndex])
1308		{
1309			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1310		}
1311
1312		if(callRetBlock[labelIndex].size() > 1)
1313		{
1314			callStack[stackIndex++] = UInt(callSiteIndex);
1315		}
1316
1317		Int4 restoreLeave = enableLeave;
1318
1319		Nucleus::createBr(labelBlock[labelIndex]);
1320		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1321
1322		enableLeave = restoreLeave;
1323	}
1324
1325	void PixelProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
1326	{
1327		if(src.type == Shader::PARAMETER_CONSTBOOL)
1328		{
1329			CALLNZb(labelIndex, callSiteIndex, src);
1330		}
1331		else if(src.type == Shader::PARAMETER_PREDICATE)
1332		{
1333			CALLNZp(labelIndex, callSiteIndex, src);
1334		}
1335		else ASSERT(false);
1336	}
1337
1338	void PixelProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
1339	{
1340		Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
1341
1342		if(boolRegister.modifier == Shader::MODIFIER_NOT)
1343		{
1344			condition = !condition;
1345		}
1346
1347		if(!labelBlock[labelIndex])
1348		{
1349			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1350		}
1351
1352		if(callRetBlock[labelIndex].size() > 1)
1353		{
1354			callStack[stackIndex++] = UInt(callSiteIndex);
1355		}
1356
1357		Int4 restoreLeave = enableLeave;
1358
1359		branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1360		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1361
1362		enableLeave = restoreLeave;
1363	}
1364
1365	void PixelProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
1366	{
1367		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1368
1369		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1370		{
1371			condition = ~condition;
1372		}
1373
1374		condition &= enableStack[enableIndex];
1375
1376		if(!labelBlock[labelIndex])
1377		{
1378			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1379		}
1380
1381		if(callRetBlock[labelIndex].size() > 1)
1382		{
1383			callStack[stackIndex++] = UInt(callSiteIndex);
1384		}
1385
1386		enableIndex++;
1387		enableStack[enableIndex] = condition;
1388		Int4 restoreLeave = enableLeave;
1389
1390		Bool notAllFalse = SignMask(condition) != 0;
1391		branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1392		Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1393
1394		enableIndex--;
1395		enableLeave = restoreLeave;
1396	}
1397
1398	void PixelProgram::ELSE()
1399	{
1400		ifDepth--;
1401
1402		BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1403		BasicBlock *endBlock = Nucleus::createBasicBlock();
1404
1405		if(isConditionalIf[ifDepth])
1406		{
1407			Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1408			Bool notAllFalse = SignMask(condition) != 0;
1409
1410			branch(notAllFalse, falseBlock, endBlock);
1411
1412			enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1413		}
1414		else
1415		{
1416			Nucleus::createBr(endBlock);
1417			Nucleus::setInsertBlock(falseBlock);
1418		}
1419
1420		ifFalseBlock[ifDepth] = endBlock;
1421
1422		ifDepth++;
1423	}
1424
1425	void PixelProgram::ENDIF()
1426	{
1427		ifDepth--;
1428
1429		BasicBlock *endBlock = ifFalseBlock[ifDepth];
1430
1431		Nucleus::createBr(endBlock);
1432		Nucleus::setInsertBlock(endBlock);
1433
1434		if(isConditionalIf[ifDepth])
1435		{
1436			enableIndex--;
1437		}
1438	}
1439
1440	void PixelProgram::ENDLOOP()
1441	{
1442		loopRepDepth--;
1443
1444		aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
1445
1446		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1447		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1448
1449		Nucleus::createBr(testBlock);
1450		Nucleus::setInsertBlock(endBlock);
1451
1452		loopDepth--;
1453		enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1454	}
1455
1456	void PixelProgram::ENDREP()
1457	{
1458		loopRepDepth--;
1459
1460		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1461		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1462
1463		Nucleus::createBr(testBlock);
1464		Nucleus::setInsertBlock(endBlock);
1465
1466		loopDepth--;
1467		enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1468	}
1469
1470	void PixelProgram::ENDWHILE()
1471	{
1472		loopRepDepth--;
1473
1474		BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1475		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1476
1477		Nucleus::createBr(testBlock);
1478		Nucleus::setInsertBlock(endBlock);
1479
1480		enableIndex--;
1481		whileTest = false;
1482	}
1483
1484	void PixelProgram::ENDSWITCH()
1485	{
1486		loopRepDepth--;
1487
1488		BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1489
1490		Nucleus::createBr(endBlock);
1491		Nucleus::setInsertBlock(endBlock);
1492	}
1493
1494	void PixelProgram::IF(const Src &src)
1495	{
1496		if(src.type == Shader::PARAMETER_CONSTBOOL)
1497		{
1498			IFb(src);
1499		}
1500		else if(src.type == Shader::PARAMETER_PREDICATE)
1501		{
1502			IFp(src);
1503		}
1504		else
1505		{
1506			Int4 condition = As<Int4>(fetchRegister(src).x);
1507			IF(condition);
1508		}
1509	}
1510
1511	void PixelProgram::IFb(const Src &boolRegister)
1512	{
1513		ASSERT(ifDepth < 24 + 4);
1514
1515		Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
1516
1517		if(boolRegister.modifier == Shader::MODIFIER_NOT)
1518		{
1519			condition = !condition;
1520		}
1521
1522		BasicBlock *trueBlock = Nucleus::createBasicBlock();
1523		BasicBlock *falseBlock = Nucleus::createBasicBlock();
1524
1525		branch(condition, trueBlock, falseBlock);
1526
1527		isConditionalIf[ifDepth] = false;
1528		ifFalseBlock[ifDepth] = falseBlock;
1529
1530		ifDepth++;
1531	}
1532
1533	void PixelProgram::IFp(const Src &predicateRegister)
1534	{
1535		Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1536
1537		if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1538		{
1539			condition = ~condition;
1540		}
1541
1542		IF(condition);
1543	}
1544
1545	void PixelProgram::IFC(Vector4f &src0, Vector4f &src1, Control control)
1546	{
1547		Int4 condition;
1548
1549		switch(control)
1550		{
1551		case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1552		case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1553		case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1554		case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1555		case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1556		case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1557		default:
1558			ASSERT(false);
1559		}
1560
1561		IF(condition);
1562	}
1563
1564	void PixelProgram::IF(Int4 &condition)
1565	{
1566		condition &= enableStack[enableIndex];
1567
1568		enableIndex++;
1569		enableStack[enableIndex] = condition;
1570
1571		BasicBlock *trueBlock = Nucleus::createBasicBlock();
1572		BasicBlock *falseBlock = Nucleus::createBasicBlock();
1573
1574		Bool notAllFalse = SignMask(condition) != 0;
1575
1576		branch(notAllFalse, trueBlock, falseBlock);
1577
1578		isConditionalIf[ifDepth] = true;
1579		ifFalseBlock[ifDepth] = falseBlock;
1580
1581		ifDepth++;
1582	}
1583
1584	void PixelProgram::LABEL(int labelIndex)
1585	{
1586		if(!labelBlock[labelIndex])
1587		{
1588			labelBlock[labelIndex] = Nucleus::createBasicBlock();
1589		}
1590
1591		Nucleus::setInsertBlock(labelBlock[labelIndex]);
1592		currentLabel = labelIndex;
1593	}
1594
1595	void PixelProgram::LOOP(const Src &integerRegister)
1596	{
1597		loopDepth++;
1598
1599		iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
1600		aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][1]));
1601		increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][2]));
1602
1603		//	If(increment[loopDepth] == 0)
1604		//	{
1605		//		increment[loopDepth] = 1;
1606		//	}
1607
1608		BasicBlock *loopBlock = Nucleus::createBasicBlock();
1609		BasicBlock *testBlock = Nucleus::createBasicBlock();
1610		BasicBlock *endBlock = Nucleus::createBasicBlock();
1611
1612		loopRepTestBlock[loopRepDepth] = testBlock;
1613		loopRepEndBlock[loopRepDepth] = endBlock;
1614
1615		// FIXME: jump(testBlock)
1616		Nucleus::createBr(testBlock);
1617		Nucleus::setInsertBlock(testBlock);
1618
1619		branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1620		Nucleus::setInsertBlock(loopBlock);
1621
1622		iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1623
1624		loopRepDepth++;
1625	}
1626
1627	void PixelProgram::REP(const Src &integerRegister)
1628	{
1629		loopDepth++;
1630
1631		iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
1632		aL[loopDepth] = aL[loopDepth - 1];
1633
1634		BasicBlock *loopBlock = Nucleus::createBasicBlock();
1635		BasicBlock *testBlock = Nucleus::createBasicBlock();
1636		BasicBlock *endBlock = Nucleus::createBasicBlock();
1637
1638		loopRepTestBlock[loopRepDepth] = testBlock;
1639		loopRepEndBlock[loopRepDepth] = endBlock;
1640
1641		// FIXME: jump(testBlock)
1642		Nucleus::createBr(testBlock);
1643		Nucleus::setInsertBlock(testBlock);
1644
1645		branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1646		Nucleus::setInsertBlock(loopBlock);
1647
1648		iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1649
1650		loopRepDepth++;
1651	}
1652
1653	void PixelProgram::WHILE(const Src &temporaryRegister)
1654	{
1655		enableIndex++;
1656
1657		BasicBlock *loopBlock = Nucleus::createBasicBlock();
1658		BasicBlock *testBlock = Nucleus::createBasicBlock();
1659		BasicBlock *endBlock = Nucleus::createBasicBlock();
1660
1661		loopRepTestBlock[loopRepDepth] = testBlock;
1662		loopRepEndBlock[loopRepDepth] = endBlock;
1663
1664		Int4 restoreBreak = enableBreak;
1665		Int4 restoreContinue = enableContinue;
1666
1667		// TODO: jump(testBlock)
1668		Nucleus::createBr(testBlock);
1669		Nucleus::setInsertBlock(testBlock);
1670		enableContinue = restoreContinue;
1671
1672		const Vector4f &src = fetchRegister(temporaryRegister);
1673		Int4 condition = As<Int4>(src.x);
1674		condition &= enableStack[enableIndex - 1];
1675		if(shader->containsLeaveInstruction()) condition &= enableLeave;
1676		if(shader->containsBreakInstruction()) condition &= enableBreak;
1677		enableStack[enableIndex] = condition;
1678
1679		Bool notAllFalse = SignMask(condition) != 0;
1680		branch(notAllFalse, loopBlock, endBlock);
1681
1682		Nucleus::setInsertBlock(endBlock);
1683		enableBreak = restoreBreak;
1684
1685		Nucleus::setInsertBlock(loopBlock);
1686
1687		loopRepDepth++;
1688	}
1689
1690	void PixelProgram::SWITCH()
1691	{
1692		BasicBlock *endBlock = Nucleus::createBasicBlock();
1693
1694		loopRepTestBlock[loopRepDepth] = nullptr;
1695		loopRepEndBlock[loopRepDepth] = endBlock;
1696
1697		Int4 restoreBreak = enableBreak;
1698
1699		BasicBlock *currentBlock = Nucleus::getInsertBlock();
1700
1701		Nucleus::setInsertBlock(endBlock);
1702		enableBreak = restoreBreak;
1703
1704		Nucleus::setInsertBlock(currentBlock);
1705
1706		loopRepDepth++;
1707	}
1708
1709	void PixelProgram::RET()
1710	{
1711		if(currentLabel == -1)
1712		{
1713			returnBlock = Nucleus::createBasicBlock();
1714			Nucleus::createBr(returnBlock);
1715		}
1716		else
1717		{
1718			BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1719
1720			if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1721			{
1722				// FIXME: Encapsulate
1723				UInt index = callStack[--stackIndex];
1724
1725				Value *value = index.loadValue();
1726				SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1727
1728				for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1729				{
1730					Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]);
1731				}
1732			}
1733			else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1734			{
1735				Nucleus::createBr(callRetBlock[currentLabel][0]);
1736			}
1737			else   // Function isn't called
1738			{
1739				Nucleus::createBr(unreachableBlock);
1740			}
1741
1742			Nucleus::setInsertBlock(unreachableBlock);
1743			Nucleus::createUnreachable();
1744		}
1745	}
1746
1747	void PixelProgram::LEAVE()
1748	{
1749		enableLeave = enableLeave & ~enableStack[enableIndex];
1750
1751		// FIXME: Return from function if all instances left
1752		// FIXME: Use enableLeave in other control-flow constructs
1753	}
1754}
1755