1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Shader.hpp"
16
17#include "VertexShader.hpp"
18#include "PixelShader.hpp"
19#include "Math.hpp"
20#include "Debug.hpp"
21
22#include <set>
23#include <fstream>
24#include <sstream>
25#include <stdarg.h>
26
27namespace sw
28{
29	volatile int Shader::serialCounter = 1;
30
31	Shader::Opcode Shader::OPCODE_DP(int i)
32	{
33		switch(i)
34		{
35		default: ASSERT(false);
36		case 1: return OPCODE_DP1;
37		case 2: return OPCODE_DP2;
38		case 3: return OPCODE_DP3;
39		case 4: return OPCODE_DP4;
40		}
41	}
42
43	Shader::Opcode Shader::OPCODE_LEN(int i)
44	{
45		switch(i)
46		{
47		default: ASSERT(false);
48		case 1: return OPCODE_ABS;
49		case 2: return OPCODE_LEN2;
50		case 3: return OPCODE_LEN3;
51		case 4: return OPCODE_LEN4;
52		}
53	}
54
55	Shader::Opcode Shader::OPCODE_DIST(int i)
56	{
57		switch(i)
58		{
59		default: ASSERT(false);
60		case 1: return OPCODE_DIST1;
61		case 2: return OPCODE_DIST2;
62		case 3: return OPCODE_DIST3;
63		case 4: return OPCODE_DIST4;
64		}
65	}
66
67	Shader::Opcode Shader::OPCODE_NRM(int i)
68	{
69		switch(i)
70		{
71		default: ASSERT(false);
72		case 1: return OPCODE_SGN;
73		case 2: return OPCODE_NRM2;
74		case 3: return OPCODE_NRM3;
75		case 4: return OPCODE_NRM4;
76		}
77	}
78
79	Shader::Opcode Shader::OPCODE_FORWARD(int i)
80	{
81		switch(i)
82		{
83		default: ASSERT(false);
84		case 1: return OPCODE_FORWARD1;
85		case 2: return OPCODE_FORWARD2;
86		case 3: return OPCODE_FORWARD3;
87		case 4: return OPCODE_FORWARD4;
88		}
89	}
90
91	Shader::Opcode Shader::OPCODE_REFLECT(int i)
92	{
93		switch(i)
94		{
95		default: ASSERT(false);
96		case 1: return OPCODE_REFLECT1;
97		case 2: return OPCODE_REFLECT2;
98		case 3: return OPCODE_REFLECT3;
99		case 4: return OPCODE_REFLECT4;
100		}
101	}
102
103	Shader::Opcode Shader::OPCODE_REFRACT(int i)
104	{
105		switch(i)
106		{
107		default: ASSERT(false);
108		case 1: return OPCODE_REFRACT1;
109		case 2: return OPCODE_REFRACT2;
110		case 3: return OPCODE_REFRACT3;
111		case 4: return OPCODE_REFRACT4;
112		}
113	}
114
115	Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0)
116	{
117		control = CONTROL_RESERVED0;
118
119		predicate = false;
120		predicateNot = false;
121		predicateSwizzle = 0xE4;
122
123		coissue = false;
124		samplerType = SAMPLER_UNKNOWN;
125		usage = USAGE_POSITION;
126		usageIndex = 0;
127	}
128
129	Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0)
130	{
131		parseOperationToken(*token++, majorVersion);
132
133		samplerType = SAMPLER_UNKNOWN;
134		usage = USAGE_POSITION;
135		usageIndex = 0;
136
137		if(opcode == OPCODE_IF ||
138		   opcode == OPCODE_IFC ||
139		   opcode == OPCODE_LOOP ||
140		   opcode == OPCODE_REP ||
141		   opcode == OPCODE_BREAKC ||
142		   opcode == OPCODE_BREAKP)   // No destination operand
143		{
144			if(size > 0) parseSourceToken(0, token++, majorVersion);
145			if(size > 1) parseSourceToken(1, token++, majorVersion);
146			if(size > 2) parseSourceToken(2, token++, majorVersion);
147			if(size > 3) ASSERT(false);
148		}
149		else if(opcode == OPCODE_DCL)
150		{
151			parseDeclarationToken(*token++);
152			parseDestinationToken(token++, majorVersion);
153		}
154		else
155		{
156			if(size > 0)
157			{
158				parseDestinationToken(token, majorVersion);
159
160				if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3)
161				{
162					token++;
163					size--;
164				}
165
166				token++;
167				size--;
168			}
169
170			if(predicate)
171			{
172				ASSERT(size != 0);
173
174				predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
175				predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
176
177				token++;
178				size--;
179			}
180
181			for(int i = 0; size > 0; i++)
182			{
183				parseSourceToken(i, token, majorVersion);
184
185				token++;
186				size--;
187
188				if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2)
189				{
190					token++;
191					size--;
192				}
193			}
194		}
195	}
196
197	Shader::Instruction::~Instruction()
198	{
199	}
200
201	std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
202	{
203		std::string instructionString;
204
205		if(opcode != OPCODE_DCL)
206		{
207			instructionString += coissue ? "+ " : "";
208
209			if(predicate)
210			{
211				instructionString += predicateNot ? "(!p0" : "(p0";
212				instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle);
213				instructionString += ") ";
214			}
215
216			instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString();
217
218			if(dst.type != PARAMETER_VOID)
219			{
220				instructionString += " " + dst.string(shaderType, version) +
221				                           dst.relativeString() +
222				                           dst.maskString();
223			}
224
225			for(int i = 0; i < 4; i++)
226			{
227				if(src[i].type != PARAMETER_VOID)
228				{
229					instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
230					instructionString += src[i].preModifierString() +
231										 src[i].string(shaderType, version) +
232										 src[i].relativeString() +
233										 src[i].postModifierString() +
234										 src[i].swizzleString();
235				}
236			}
237		}
238		else   // DCL
239		{
240			instructionString += "dcl";
241
242			if(dst.type == PARAMETER_SAMPLER)
243			{
244				switch(samplerType)
245				{
246				case SAMPLER_UNKNOWN: instructionString += " ";        break;
247				case SAMPLER_1D:      instructionString += "_1d ";     break;
248				case SAMPLER_2D:      instructionString += "_2d ";     break;
249				case SAMPLER_CUBE:    instructionString += "_cube ";   break;
250				case SAMPLER_VOLUME:  instructionString += "_volume "; break;
251				default:
252					ASSERT(false);
253				}
254
255				instructionString += dst.string(shaderType, version);
256			}
257			else if(dst.type == PARAMETER_INPUT ||
258				    dst.type == PARAMETER_OUTPUT ||
259				    dst.type == PARAMETER_TEXTURE)
260			{
261				if(version >= 0x0300)
262				{
263					switch(usage)
264					{
265					case USAGE_POSITION:     instructionString += "_position";     break;
266					case USAGE_BLENDWEIGHT:  instructionString += "_blendweight";  break;
267					case USAGE_BLENDINDICES: instructionString += "_blendindices"; break;
268					case USAGE_NORMAL:       instructionString += "_normal";       break;
269					case USAGE_PSIZE:        instructionString += "_psize";        break;
270					case USAGE_TEXCOORD:     instructionString += "_texcoord";     break;
271					case USAGE_TANGENT:      instructionString += "_tangent";      break;
272					case USAGE_BINORMAL:     instructionString += "_binormal";     break;
273					case USAGE_TESSFACTOR:   instructionString += "_tessfactor";   break;
274					case USAGE_POSITIONT:    instructionString += "_positiont";    break;
275					case USAGE_COLOR:        instructionString += "_color";        break;
276					case USAGE_FOG:          instructionString += "_fog";          break;
277					case USAGE_DEPTH:        instructionString += "_depth";        break;
278					case USAGE_SAMPLE:       instructionString += "_sample";       break;
279					default:
280						ASSERT(false);
281					}
282
283					if(usageIndex > 0)
284					{
285						std::ostringstream buffer;
286
287						buffer << (int)usageIndex;
288
289						instructionString += buffer.str();
290					}
291				}
292				else ASSERT(dst.type != PARAMETER_OUTPUT);
293
294				instructionString += " ";
295
296				instructionString += dst.string(shaderType, version);
297				instructionString += dst.maskString();
298			}
299			else if(dst.type == PARAMETER_MISCTYPE)   // vPos and vFace
300			{
301				instructionString += " ";
302
303				instructionString += dst.string(shaderType, version);
304			}
305			else ASSERT(false);
306		}
307
308		return instructionString;
309	}
310
311	std::string Shader::DestinationParameter::modifierString() const
312	{
313		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
314		{
315			return "";
316		}
317
318		std::string modifierString;
319
320		if(integer)
321		{
322			modifierString += "_int";
323		}
324
325		if(saturate)
326		{
327			modifierString += "_sat";
328		}
329
330		if(partialPrecision)
331		{
332			modifierString += "_pp";
333		}
334
335		if(centroid)
336		{
337			modifierString += "_centroid";
338		}
339
340		return modifierString;
341	}
342
343	std::string Shader::DestinationParameter::shiftString() const
344	{
345		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
346		{
347			return "";
348		}
349
350		switch(shift)
351		{
352		case 0:		return "";
353		case 1:		return "_x2";
354		case 2:		return "_x4";
355		case 3:		return "_x8";
356		case -1:	return "_d2";
357		case -2:	return "_d4";
358		case -3:	return "_d8";
359		default:
360			return "";
361		//	ASSERT(false);   // FIXME
362		}
363	}
364
365	std::string Shader::DestinationParameter::maskString() const
366	{
367		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
368		{
369			return "";
370		}
371
372		switch(mask)
373		{
374		case 0x0:	return "";
375		case 0x1:	return ".x";
376		case 0x2:	return ".y";
377		case 0x3:	return ".xy";
378		case 0x4:	return ".z";
379		case 0x5:	return ".xz";
380		case 0x6:	return ".yz";
381		case 0x7:	return ".xyz";
382		case 0x8:	return ".w";
383		case 0x9:	return ".xw";
384		case 0xA:	return ".yw";
385		case 0xB:	return ".xyw";
386		case 0xC:	return ".zw";
387		case 0xD:	return ".xzw";
388		case 0xE:	return ".yzw";
389		case 0xF:	return "";
390		default:
391			ASSERT(false);
392		}
393
394		return "";
395	}
396
397	std::string Shader::SourceParameter::preModifierString() const
398	{
399		if(type == PARAMETER_VOID)
400		{
401			return "";
402		}
403
404		switch(modifier)
405		{
406		case MODIFIER_NONE:			return "";
407		case MODIFIER_NEGATE:		return "-";
408		case MODIFIER_BIAS:			return "";
409		case MODIFIER_BIAS_NEGATE:	return "-";
410		case MODIFIER_SIGN:			return "";
411		case MODIFIER_SIGN_NEGATE:	return "-";
412		case MODIFIER_COMPLEMENT:	return "1-";
413		case MODIFIER_X2:			return "";
414		case MODIFIER_X2_NEGATE:	return "-";
415		case MODIFIER_DZ:			return "";
416		case MODIFIER_DW:			return "";
417		case MODIFIER_ABS:			return "";
418		case MODIFIER_ABS_NEGATE:	return "-";
419		case MODIFIER_NOT:			return "!";
420		default:
421			ASSERT(false);
422		}
423
424		return "";
425	}
426
427	std::string Shader::Parameter::relativeString() const
428	{
429		if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP)
430		{
431			if(rel.type == PARAMETER_VOID)
432			{
433				return "";
434			}
435			else if(rel.type == PARAMETER_ADDR)
436			{
437				switch(rel.swizzle & 0x03)
438				{
439				case 0: return "[a0.x]";
440				case 1: return "[a0.y]";
441				case 2: return "[a0.z]";
442				case 3: return "[a0.w]";
443				}
444			}
445			else if(rel.type == PARAMETER_TEMP)
446			{
447				std::ostringstream buffer;
448				buffer << rel.index;
449
450				switch(rel.swizzle & 0x03)
451				{
452				case 0: return "[r" + buffer.str() + ".x]";
453				case 1: return "[r" + buffer.str() + ".y]";
454				case 2: return "[r" + buffer.str() + ".z]";
455				case 3: return "[r" + buffer.str() + ".w]";
456				}
457			}
458			else if(rel.type == PARAMETER_LOOP)
459			{
460				return "[aL]";
461			}
462			else if(rel.type == PARAMETER_CONST)
463			{
464				std::ostringstream buffer;
465				buffer << rel.index;
466
467				switch(rel.swizzle & 0x03)
468				{
469				case 0: return "[c" + buffer.str() + ".x]";
470				case 1: return "[c" + buffer.str() + ".y]";
471				case 2: return "[c" + buffer.str() + ".z]";
472				case 3: return "[c" + buffer.str() + ".w]";
473				}
474			}
475			else ASSERT(false);
476		}
477
478		return "";
479	}
480
481	std::string Shader::SourceParameter::postModifierString() const
482	{
483		if(type == PARAMETER_VOID)
484		{
485			return "";
486		}
487
488		switch(modifier)
489		{
490		case MODIFIER_NONE:			return "";
491		case MODIFIER_NEGATE:		return "";
492		case MODIFIER_BIAS:			return "_bias";
493		case MODIFIER_BIAS_NEGATE:	return "_bias";
494		case MODIFIER_SIGN:			return "_bx2";
495		case MODIFIER_SIGN_NEGATE:	return "_bx2";
496		case MODIFIER_COMPLEMENT:	return "";
497		case MODIFIER_X2:			return "_x2";
498		case MODIFIER_X2_NEGATE:	return "_x2";
499		case MODIFIER_DZ:			return "_dz";
500		case MODIFIER_DW:			return "_dw";
501		case MODIFIER_ABS:			return "_abs";
502		case MODIFIER_ABS_NEGATE:	return "_abs";
503		case MODIFIER_NOT:			return "";
504		default:
505			ASSERT(false);
506		}
507
508		return "";
509	}
510
511	std::string Shader::SourceParameter::swizzleString() const
512	{
513		return Instruction::swizzleString(type, swizzle);
514	}
515
516	void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion)
517	{
518		if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000)   // Version token
519		{
520			opcode = (Opcode)token;
521
522			control = CONTROL_RESERVED0;
523			predicate = false;
524			coissue = false;
525		}
526		else
527		{
528			opcode = (Opcode)(token & 0x0000FFFF);
529			control = (Control)((token & 0x00FF0000) >> 16);
530
531			int size = (token & 0x0F000000) >> 24;
532
533			predicate = (token & 0x10000000) != 0x00000000;
534			coissue = (token & 0x40000000) != 0x00000000;
535
536			if(majorVersion < 2)
537			{
538				if(size != 0)
539				{
540					ASSERT(false);   // Reserved
541				}
542			}
543
544			if(majorVersion < 2)
545			{
546				if(predicate)
547				{
548					ASSERT(false);
549				}
550			}
551
552			if((token & 0x20000000) != 0x00000000)
553			{
554				ASSERT(false);   // Reserved
555			}
556
557			if(majorVersion >= 2)
558			{
559				if(coissue)
560				{
561					ASSERT(false);   // Reserved
562				}
563			}
564
565			if((token & 0x80000000) != 0x00000000)
566			{
567				ASSERT(false);
568			}
569		}
570	}
571
572	void Shader::Instruction::parseDeclarationToken(unsigned long token)
573	{
574		samplerType = (SamplerType)((token & 0x78000000) >> 27);
575		usage = (Usage)(token & 0x0000001F);
576		usageIndex = (unsigned char)((token & 0x000F0000) >> 16);
577	}
578
579	void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion)
580	{
581		dst.index = (unsigned short)(token[0] & 0x000007FF);
582		dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
583
584		// TODO: Check type and index range
585
586		bool relative = (token[0] & 0x00002000) != 0x00000000;
587		dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
588		dst.rel.swizzle = 0x00;
589		dst.rel.scale = 1;
590
591		if(relative && majorVersion >= 3)
592		{
593			dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
594			dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
595		}
596		else if(relative) ASSERT(false);   // Reserved
597
598		if((token[0] & 0x0000C000) != 0x00000000)
599		{
600			ASSERT(false);   // Reserved
601		}
602
603		dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16);
604		dst.saturate = (token[0] & 0x00100000) != 0;
605		dst.partialPrecision = (token[0] & 0x00200000) != 0;
606		dst.centroid = (token[0] & 0x00400000) != 0;
607		dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4;
608
609		if(majorVersion >= 2)
610		{
611			if(dst.shift)
612			{
613				ASSERT(false);   // Reserved
614			}
615		}
616
617		if((token[0] & 0x80000000) != 0x80000000)
618		{
619			ASSERT(false);
620		}
621	}
622
623	void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion)
624	{
625		// Defaults
626		src[i].index = 0;
627		src[i].type = PARAMETER_VOID;
628		src[i].modifier = MODIFIER_NONE;
629		src[i].swizzle = 0xE4;
630		src[i].rel.type = PARAMETER_VOID;
631		src[i].rel.swizzle = 0x00;
632		src[i].rel.scale = 1;
633
634		switch(opcode)
635		{
636		case OPCODE_DEF:
637			src[0].type = PARAMETER_FLOAT4LITERAL;
638			src[0].value[i] = *(float*)token;
639			break;
640		case OPCODE_DEFB:
641			src[0].type = PARAMETER_BOOL1LITERAL;
642			src[0].boolean[0] = *(int*)token;
643			break;
644		case OPCODE_DEFI:
645			src[0].type = PARAMETER_INT4LITERAL;
646			src[0].integer[i] = *(int*)token;
647			break;
648		default:
649			src[i].index = (unsigned short)(token[0] & 0x000007FF);
650			src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
651
652			// FIXME: Check type and index range
653
654			bool relative = (token[0] & 0x00002000) != 0x00000000;
655			src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
656
657			if((token[0] & 0x0000C000) != 0x00000000)
658			{
659				if(opcode != OPCODE_DEF &&
660				   opcode != OPCODE_DEFI &&
661				   opcode != OPCODE_DEFB)
662				{
663					ASSERT(false);
664				}
665			}
666
667			src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16);
668			src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24);
669
670			if((token[0] & 0x80000000) != 0x80000000)
671			{
672				if(opcode != OPCODE_DEF &&
673				   opcode != OPCODE_DEFI &&
674				   opcode != OPCODE_DEFB)
675				{
676					ASSERT(false);
677				}
678			}
679
680			if(relative && majorVersion >= 2)
681			{
682				src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
683				src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
684			}
685		}
686	}
687
688	std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle)
689	{
690		if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4)
691		{
692			return "";
693		}
694
695		int x = (swizzle & 0x03) >> 0;
696		int y = (swizzle & 0x0C) >> 2;
697		int z = (swizzle & 0x30) >> 4;
698		int w = (swizzle & 0xC0) >> 6;
699
700		std::string swizzleString = ".";
701
702		switch(x)
703		{
704		case 0: swizzleString += "x"; break;
705		case 1: swizzleString += "y"; break;
706		case 2: swizzleString += "z"; break;
707		case 3: swizzleString += "w"; break;
708		}
709
710		if(!(x == y && y == z && z == w))
711		{
712			switch(y)
713			{
714			case 0: swizzleString += "x"; break;
715			case 1: swizzleString += "y"; break;
716			case 2: swizzleString += "z"; break;
717			case 3: swizzleString += "w"; break;
718			}
719
720			if(!(y == z && z == w))
721			{
722				switch(z)
723				{
724				case 0: swizzleString += "x"; break;
725				case 1: swizzleString += "y"; break;
726				case 2: swizzleString += "z"; break;
727				case 3: swizzleString += "w"; break;
728				}
729
730				if(!(z == w))
731				{
732					switch(w)
733					{
734					case 0: swizzleString += "x"; break;
735					case 1: swizzleString += "y"; break;
736					case 2: swizzleString += "z"; break;
737					case 3: swizzleString += "w"; break;
738					}
739				}
740			}
741		}
742
743		return swizzleString;
744	}
745
746	std::string Shader::Instruction::operationString(unsigned short version) const
747	{
748		switch(opcode)
749		{
750		case OPCODE_NULL:			return "null";
751		case OPCODE_NOP:			return "nop";
752		case OPCODE_MOV:			return "mov";
753		case OPCODE_ADD:			return "add";
754		case OPCODE_IADD:			return "iadd";
755		case OPCODE_SUB:			return "sub";
756		case OPCODE_ISUB:			return "isub";
757		case OPCODE_MAD:			return "mad";
758		case OPCODE_IMAD:			return "imad";
759		case OPCODE_MUL:			return "mul";
760		case OPCODE_IMUL:			return "imul";
761		case OPCODE_RCPX:			return "rcpx";
762		case OPCODE_DIV:			return "div";
763		case OPCODE_IDIV:			return "idiv";
764		case OPCODE_UDIV:			return "udiv";
765		case OPCODE_MOD:			return "mod";
766		case OPCODE_IMOD:			return "imod";
767		case OPCODE_UMOD:			return "umod";
768		case OPCODE_SHL:			return "shl";
769		case OPCODE_ISHR:			return "ishr";
770		case OPCODE_USHR:			return "ushr";
771		case OPCODE_RSQX:			return "rsqx";
772		case OPCODE_SQRT:			return "sqrt";
773		case OPCODE_RSQ:			return "rsq";
774		case OPCODE_LEN2:			return "len2";
775		case OPCODE_LEN3:			return "len3";
776		case OPCODE_LEN4:			return "len4";
777		case OPCODE_DIST1:			return "dist1";
778		case OPCODE_DIST2:			return "dist2";
779		case OPCODE_DIST3:			return "dist3";
780		case OPCODE_DIST4:			return "dist4";
781		case OPCODE_DP3:			return "dp3";
782		case OPCODE_DP4:			return "dp4";
783		case OPCODE_DET2:			return "det2";
784		case OPCODE_DET3:			return "det3";
785		case OPCODE_DET4:			return "det4";
786		case OPCODE_MIN:			return "min";
787		case OPCODE_IMIN:			return "imin";
788		case OPCODE_UMIN:			return "umin";
789		case OPCODE_MAX:			return "max";
790		case OPCODE_IMAX:			return "imax";
791		case OPCODE_UMAX:			return "umax";
792		case OPCODE_SLT:			return "slt";
793		case OPCODE_SGE:			return "sge";
794		case OPCODE_EXP2X:			return "exp2x";
795		case OPCODE_LOG2X:			return "log2x";
796		case OPCODE_LIT:			return "lit";
797		case OPCODE_ATT:			return "att";
798		case OPCODE_LRP:			return "lrp";
799		case OPCODE_STEP:			return "step";
800		case OPCODE_SMOOTH:			return "smooth";
801		case OPCODE_FLOATBITSTOINT:	 return "floatBitsToInt";
802		case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt";
803		case OPCODE_INTBITSTOFLOAT:	 return "intBitsToFloat";
804		case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat";
805		case OPCODE_PACKSNORM2x16:	 return "packSnorm2x16";
806		case OPCODE_PACKUNORM2x16:	 return "packUnorm2x16";
807		case OPCODE_PACKHALF2x16:	 return "packHalf2x16";
808		case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16";
809		case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16";
810		case OPCODE_UNPACKHALF2x16:	 return "unpackHalf2x16";
811		case OPCODE_FRC:			return "frc";
812		case OPCODE_M4X4:			return "m4x4";
813		case OPCODE_M4X3:			return "m4x3";
814		case OPCODE_M3X4:			return "m3x4";
815		case OPCODE_M3X3:			return "m3x3";
816		case OPCODE_M3X2:			return "m3x2";
817		case OPCODE_CALL:			return "call";
818		case OPCODE_CALLNZ:			return "callnz";
819		case OPCODE_LOOP:			return "loop";
820		case OPCODE_RET:			return "ret";
821		case OPCODE_ENDLOOP:		return "endloop";
822		case OPCODE_LABEL:			return "label";
823		case OPCODE_DCL:			return "dcl";
824		case OPCODE_POWX:			return "powx";
825		case OPCODE_CRS:			return "crs";
826		case OPCODE_SGN:			return "sgn";
827		case OPCODE_ISGN:			return "isgn";
828		case OPCODE_ABS:			return "abs";
829		case OPCODE_IABS:			return "iabs";
830		case OPCODE_NRM2:			return "nrm2";
831		case OPCODE_NRM3:			return "nrm3";
832		case OPCODE_NRM4:			return "nrm4";
833		case OPCODE_SINCOS:			return "sincos";
834		case OPCODE_REP:			return "rep";
835		case OPCODE_ENDREP:			return "endrep";
836		case OPCODE_IF:				return "if";
837		case OPCODE_IFC:			return "ifc";
838		case OPCODE_ELSE:			return "else";
839		case OPCODE_ENDIF:			return "endif";
840		case OPCODE_BREAK:			return "break";
841		case OPCODE_BREAKC:			return "breakc";
842		case OPCODE_MOVA:			return "mova";
843		case OPCODE_DEFB:			return "defb";
844		case OPCODE_DEFI:			return "defi";
845		case OPCODE_TEXCOORD:		return "texcoord";
846		case OPCODE_TEXKILL:		return "texkill";
847		case OPCODE_DISCARD:		return "discard";
848		case OPCODE_TEX:
849			if(version < 0x0104)	return "tex";
850			else					return "texld";
851		case OPCODE_TEXBEM:			return "texbem";
852		case OPCODE_TEXBEML:		return "texbeml";
853		case OPCODE_TEXREG2AR:		return "texreg2ar";
854		case OPCODE_TEXREG2GB:		return "texreg2gb";
855		case OPCODE_TEXM3X2PAD:		return "texm3x2pad";
856		case OPCODE_TEXM3X2TEX:		return "texm3x2tex";
857		case OPCODE_TEXM3X3PAD:		return "texm3x3pad";
858		case OPCODE_TEXM3X3TEX:		return "texm3x3tex";
859		case OPCODE_RESERVED0:		return "reserved0";
860		case OPCODE_TEXM3X3SPEC:	return "texm3x3spec";
861		case OPCODE_TEXM3X3VSPEC:	return "texm3x3vspec";
862		case OPCODE_EXPP:			return "expp";
863		case OPCODE_LOGP:			return "logp";
864		case OPCODE_CND:			return "cnd";
865		case OPCODE_DEF:			return "def";
866		case OPCODE_TEXREG2RGB:		return "texreg2rgb";
867		case OPCODE_TEXDP3TEX:		return "texdp3tex";
868		case OPCODE_TEXM3X2DEPTH:	return "texm3x2depth";
869		case OPCODE_TEXDP3:			return "texdp3";
870		case OPCODE_TEXM3X3:		return "texm3x3";
871		case OPCODE_TEXDEPTH:		return "texdepth";
872		case OPCODE_CMP0:			return "cmp0";
873		case OPCODE_ICMP:			return "icmp";
874		case OPCODE_UCMP:			return "ucmp";
875		case OPCODE_SELECT:			return "select";
876		case OPCODE_EXTRACT:		return "extract";
877		case OPCODE_INSERT:			return "insert";
878		case OPCODE_BEM:			return "bem";
879		case OPCODE_DP2ADD:			return "dp2add";
880		case OPCODE_DFDX:			return "dFdx";
881		case OPCODE_DFDY:			return "dFdy";
882		case OPCODE_FWIDTH:			return "fwidth";
883		case OPCODE_TEXLDD:			return "texldd";
884		case OPCODE_CMP:			return "cmp";
885		case OPCODE_TEXLDL:			return "texldl";
886		case OPCODE_TEXOFFSET:		return "texoffset";
887		case OPCODE_TEXLDLOFFSET:	return "texldloffset";
888		case OPCODE_TEXELFETCH:		return "texelfetch";
889		case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset";
890		case OPCODE_TEXGRAD:		return "texgrad";
891		case OPCODE_TEXGRADOFFSET:	return "texgradoffset";
892		case OPCODE_BREAKP:			return "breakp";
893		case OPCODE_TEXSIZE:        return "texsize";
894		case OPCODE_PHASE:			return "phase";
895		case OPCODE_COMMENT:		return "comment";
896		case OPCODE_END:			return "end";
897		case OPCODE_PS_1_0:			return "ps_1_0";
898		case OPCODE_PS_1_1:			return "ps_1_1";
899		case OPCODE_PS_1_2:			return "ps_1_2";
900		case OPCODE_PS_1_3:			return "ps_1_3";
901		case OPCODE_PS_1_4:			return "ps_1_4";
902		case OPCODE_PS_2_0:			return "ps_2_0";
903		case OPCODE_PS_2_x:			return "ps_2_x";
904		case OPCODE_PS_3_0:			return "ps_3_0";
905		case OPCODE_VS_1_0:			return "vs_1_0";
906		case OPCODE_VS_1_1:			return "vs_1_1";
907		case OPCODE_VS_2_0:			return "vs_2_0";
908		case OPCODE_VS_2_x:			return "vs_2_x";
909		case OPCODE_VS_2_sw:		return "vs_2_sw";
910		case OPCODE_VS_3_0:			return "vs_3_0";
911		case OPCODE_VS_3_sw:		return "vs_3_sw";
912		case OPCODE_WHILE:          return "while";
913		case OPCODE_ENDWHILE:       return "endwhile";
914		case OPCODE_COS:            return "cos";
915		case OPCODE_SIN:            return "sin";
916		case OPCODE_TAN:            return "tan";
917		case OPCODE_ACOS:           return "acos";
918		case OPCODE_ASIN:           return "asin";
919		case OPCODE_ATAN:           return "atan";
920		case OPCODE_ATAN2:          return "atan2";
921		case OPCODE_COSH:           return "cosh";
922		case OPCODE_SINH:           return "sinh";
923		case OPCODE_TANH:           return "tanh";
924		case OPCODE_ACOSH:          return "acosh";
925		case OPCODE_ASINH:          return "asinh";
926		case OPCODE_ATANH:          return "atanh";
927		case OPCODE_DP1:            return "dp1";
928		case OPCODE_DP2:            return "dp2";
929		case OPCODE_TRUNC:          return "trunc";
930		case OPCODE_FLOOR:          return "floor";
931		case OPCODE_ROUND:          return "round";
932		case OPCODE_ROUNDEVEN:      return "roundEven";
933		case OPCODE_CEIL:           return "ceil";
934		case OPCODE_EXP2:           return "exp2";
935		case OPCODE_LOG2:           return "log2";
936		case OPCODE_EXP:            return "exp";
937		case OPCODE_LOG:            return "log";
938		case OPCODE_POW:            return "pow";
939		case OPCODE_F2B:            return "f2b";
940		case OPCODE_B2F:            return "b2f";
941		case OPCODE_F2I:            return "f2i";
942		case OPCODE_I2F:            return "i2f";
943		case OPCODE_F2U:            return "f2u";
944		case OPCODE_U2F:            return "u2f";
945		case OPCODE_B2I:            return "b2i";
946		case OPCODE_I2B:            return "i2b";
947		case OPCODE_ALL:            return "all";
948		case OPCODE_ANY:            return "any";
949		case OPCODE_NEG:            return "neg";
950		case OPCODE_INEG:           return "ineg";
951		case OPCODE_ISNAN:          return "isnan";
952		case OPCODE_ISINF:          return "isinf";
953		case OPCODE_NOT:            return "not";
954		case OPCODE_OR:             return "or";
955		case OPCODE_XOR:            return "xor";
956		case OPCODE_AND:            return "and";
957		case OPCODE_EQ:             return "eq";
958		case OPCODE_NE:             return "neq";
959		case OPCODE_FORWARD1:       return "forward1";
960		case OPCODE_FORWARD2:       return "forward2";
961		case OPCODE_FORWARD3:       return "forward3";
962		case OPCODE_FORWARD4:       return "forward4";
963		case OPCODE_REFLECT1:       return "reflect1";
964		case OPCODE_REFLECT2:       return "reflect2";
965		case OPCODE_REFLECT3:       return "reflect3";
966		case OPCODE_REFLECT4:       return "reflect4";
967		case OPCODE_REFRACT1:       return "refract1";
968		case OPCODE_REFRACT2:       return "refract2";
969		case OPCODE_REFRACT3:       return "refract3";
970		case OPCODE_REFRACT4:       return "refract4";
971		case OPCODE_LEAVE:          return "leave";
972		case OPCODE_CONTINUE:       return "continue";
973		case OPCODE_TEST:           return "test";
974		case OPCODE_SWITCH:         return "switch";
975		case OPCODE_ENDSWITCH:      return "endswitch";
976		default:
977			ASSERT(false);
978		}
979
980		return "<unknown>";
981	}
982
983	std::string Shader::Instruction::controlString() const
984	{
985		if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP)
986		{
987			if(project) return "p";
988
989			if(bias) return "b";
990
991			// FIXME: LOD
992		}
993
994		switch(control)
995		{
996		case 1: return "_gt";
997		case 2: return "_eq";
998		case 3: return "_ge";
999		case 4: return "_lt";
1000		case 5: return "_ne";
1001		case 6: return "_le";
1002		default:
1003			return "";
1004		//	ASSERT(false);   // FIXME
1005		}
1006	}
1007
1008	std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const
1009	{
1010		std::ostringstream buffer;
1011
1012		if(type == PARAMETER_FLOAT4LITERAL)
1013		{
1014			buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}';
1015
1016			return buffer.str();
1017		}
1018		else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
1019		{
1020			buffer << index;
1021
1022			return typeString(shaderType, version) + buffer.str();
1023		}
1024		else
1025		{
1026			return typeString(shaderType, version);
1027		}
1028	}
1029
1030	std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const
1031	{
1032		switch(type)
1033		{
1034		case PARAMETER_TEMP:			return "r";
1035		case PARAMETER_INPUT:			return "v";
1036		case PARAMETER_CONST:			return "c";
1037		case PARAMETER_TEXTURE:
1038	//	case PARAMETER_ADDR:
1039			if(shaderType == SHADER_PIXEL)	return "t";
1040			else							return "a0";
1041		case PARAMETER_RASTOUT:
1042			if(index == 0)              return "oPos";
1043			else if(index == 1)         return "oFog";
1044			else if(index == 2)         return "oPts";
1045			else                        ASSERT(false);
1046		case PARAMETER_ATTROUT:			return "oD";
1047		case PARAMETER_TEXCRDOUT:
1048	//	case PARAMETER_OUTPUT:			return "";
1049			if(version < 0x0300)		return "oT";
1050			else						return "o";
1051		case PARAMETER_CONSTINT:		return "i";
1052		case PARAMETER_COLOROUT:		return "oC";
1053		case PARAMETER_DEPTHOUT:		return "oDepth";
1054		case PARAMETER_SAMPLER:			return "s";
1055	//	case PARAMETER_CONST2:			return "";
1056	//	case PARAMETER_CONST3:			return "";
1057	//	case PARAMETER_CONST4:			return "";
1058		case PARAMETER_CONSTBOOL:		return "b";
1059		case PARAMETER_LOOP:			return "aL";
1060	//	case PARAMETER_TEMPFLOAT16:		return "";
1061		case PARAMETER_MISCTYPE:
1062			if(index == 0)				return "vPos";
1063			else if(index == 1)			return "vFace";
1064			else						ASSERT(false);
1065		case PARAMETER_LABEL:			return "l";
1066		case PARAMETER_PREDICATE:		return "p0";
1067		case PARAMETER_FLOAT4LITERAL:	return "";
1068		case PARAMETER_BOOL1LITERAL:	return "";
1069		case PARAMETER_INT4LITERAL:		return "";
1070	//	case PARAMETER_VOID:			return "";
1071		default:
1072			ASSERT(false);
1073		}
1074
1075		return "";
1076	}
1077
1078	bool Shader::Instruction::isBranch() const
1079	{
1080		return opcode == OPCODE_IF || opcode == OPCODE_IFC;
1081	}
1082
1083	bool Shader::Instruction::isCall() const
1084	{
1085		return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
1086	}
1087
1088	bool Shader::Instruction::isBreak() const
1089	{
1090		return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP;
1091	}
1092
1093	bool Shader::Instruction::isLoopOrSwitch() const
1094	{
1095		return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE || opcode == OPCODE_SWITCH;
1096	}
1097
1098	bool Shader::Instruction::isEndLoopOrSwitch() const
1099	{
1100		return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE || opcode == OPCODE_ENDSWITCH;;
1101	}
1102
1103	bool Shader::Instruction::isPredicated() const
1104	{
1105		return predicate ||
1106		       analysisBranch ||
1107		       analysisBreak ||
1108		       analysisContinue ||
1109		       analysisLeave;
1110	}
1111
1112	Shader::Shader() : serialID(serialCounter++)
1113	{
1114		usedSamplers = 0;
1115	}
1116
1117	Shader::~Shader()
1118	{
1119		for(unsigned int i = 0; i < instruction.size(); i++)
1120		{
1121			delete instruction[i];
1122			instruction[i] = 0;
1123		}
1124	}
1125
1126	void Shader::parse(const unsigned long *token)
1127	{
1128		minorVersion = (unsigned char)(token[0] & 0x000000FF);
1129		majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8);
1130		shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16);
1131
1132		int length = 0;
1133
1134		if(shaderType == SHADER_VERTEX)
1135		{
1136			length = VertexShader::validate(token);
1137		}
1138		else if(shaderType == SHADER_PIXEL)
1139		{
1140			length = PixelShader::validate(token);
1141		}
1142		else ASSERT(false);
1143
1144		ASSERT(length != 0);
1145		instruction.resize(length);
1146
1147		for(int i = 0; i < length; i++)
1148		{
1149			while((*token & 0x0000FFFF) == 0x0000FFFE)   // Comment token
1150			{
1151				int length = (*token & 0x7FFF0000) >> 16;
1152
1153				token += length + 1;
1154			}
1155
1156			int tokenCount = size(*token);
1157
1158			instruction[i] = new Instruction(token, tokenCount, majorVersion);
1159
1160			token += 1 + tokenCount;
1161		}
1162	}
1163
1164	int Shader::size(unsigned long opcode) const
1165	{
1166		return size(opcode, version);
1167	}
1168
1169	int Shader::size(unsigned long opcode, unsigned short version)
1170	{
1171		if(version > 0x0300)
1172		{
1173			ASSERT(false);
1174		}
1175
1176		static const char size[] =
1177		{
1178			0,   // NOP = 0
1179			2,   // MOV
1180			3,   // ADD
1181			3,   // SUB
1182			4,   // MAD
1183			3,   // MUL
1184			2,   // RCP
1185			2,   // RSQ
1186			3,   // DP3
1187			3,   // DP4
1188			3,   // MIN
1189			3,   // MAX
1190			3,   // SLT
1191			3,   // SGE
1192			2,   // EXP
1193			2,   // LOG
1194			2,   // LIT
1195			3,   // DST
1196			4,   // LRP
1197			2,   // FRC
1198			3,   // M4x4
1199			3,   // M4x3
1200			3,   // M3x4
1201			3,   // M3x3
1202			3,   // M3x2
1203			1,   // CALL
1204			2,   // CALLNZ
1205			2,   // LOOP
1206			0,   // RET
1207			0,   // ENDLOOP
1208			1,   // LABEL
1209			2,   // DCL
1210			3,   // POW
1211			3,   // CRS
1212			4,   // SGN
1213			2,   // ABS
1214			2,   // NRM
1215			4,   // SINCOS
1216			1,   // REP
1217			0,   // ENDREP
1218			1,   // IF
1219			2,   // IFC
1220			0,   // ELSE
1221			0,   // ENDIF
1222			0,   // BREAK
1223			2,   // BREAKC
1224			2,   // MOVA
1225			2,   // DEFB
1226			5,   // DEFI
1227			-1,  // 49
1228			-1,  // 50
1229			-1,  // 51
1230			-1,  // 52
1231			-1,  // 53
1232			-1,  // 54
1233			-1,  // 55
1234			-1,  // 56
1235			-1,  // 57
1236			-1,  // 58
1237			-1,  // 59
1238			-1,  // 60
1239			-1,  // 61
1240			-1,  // 62
1241			-1,  // 63
1242			1,   // TEXCOORD = 64
1243			1,   // TEXKILL
1244			1,   // TEX
1245			2,   // TEXBEM
1246			2,   // TEXBEML
1247			2,   // TEXREG2AR
1248			2,   // TEXREG2GB
1249			2,   // TEXM3x2PAD
1250			2,   // TEXM3x2TEX
1251			2,   // TEXM3x3PAD
1252			2,   // TEXM3x3TEX
1253			-1,  // RESERVED0
1254			3,   // TEXM3x3SPEC
1255			2,   // TEXM3x3VSPEC
1256			2,   // EXPP
1257			2,   // LOGP
1258			4,   // CND
1259			5,   // DEF
1260			2,   // TEXREG2RGB
1261			2,   // TEXDP3TEX
1262			2,   // TEXM3x2DEPTH
1263			2,   // TEXDP3
1264			2,   // TEXM3x3
1265			1,   // TEXDEPTH
1266			4,   // CMP
1267			3,   // BEM
1268			4,   // DP2ADD
1269			2,   // DSX
1270			2,   // DSY
1271			5,   // TEXLDD
1272			3,   // SETP
1273			3,   // TEXLDL
1274			2,   // BREAKP
1275			-1,  // 97
1276			-1,  // 98
1277			-1,  // 99
1278			-1,  // 100
1279			-1,  // 101
1280			-1,  // 102
1281			-1,  // 103
1282			-1,  // 104
1283			-1,  // 105
1284			-1,  // 106
1285			-1,  // 107
1286			-1,  // 108
1287			-1,  // 109
1288			-1,  // 110
1289			-1,  // 111
1290			-1,  // 112
1291		};
1292
1293		int length = 0;
1294
1295		if((opcode & 0x0000FFFF) == OPCODE_COMMENT)
1296		{
1297			return (opcode & 0x7FFF0000) >> 16;
1298		}
1299
1300		if(opcode != OPCODE_PS_1_0 &&
1301		   opcode != OPCODE_PS_1_1 &&
1302		   opcode != OPCODE_PS_1_2 &&
1303		   opcode != OPCODE_PS_1_3 &&
1304		   opcode != OPCODE_PS_1_4 &&
1305		   opcode != OPCODE_PS_2_0 &&
1306		   opcode != OPCODE_PS_2_x &&
1307		   opcode != OPCODE_PS_3_0 &&
1308		   opcode != OPCODE_VS_1_0 &&
1309		   opcode != OPCODE_VS_1_1 &&
1310		   opcode != OPCODE_VS_2_0 &&
1311		   opcode != OPCODE_VS_2_x &&
1312		   opcode != OPCODE_VS_2_sw &&
1313		   opcode != OPCODE_VS_3_0 &&
1314		   opcode != OPCODE_VS_3_sw &&
1315		   opcode != OPCODE_PHASE &&
1316		   opcode != OPCODE_END)
1317		{
1318			if(version >= 0x0200)
1319			{
1320				length = (opcode & 0x0F000000) >> 24;
1321			}
1322			else
1323			{
1324				length = size[opcode & 0x0000FFFF];
1325			}
1326		}
1327
1328		if(length < 0)
1329		{
1330			ASSERT(false);
1331		}
1332
1333		if(version == 0x0104)
1334		{
1335			switch(opcode & 0x0000FFFF)
1336			{
1337			case OPCODE_TEX:
1338				length += 1;
1339				break;
1340			case OPCODE_TEXCOORD:
1341				length += 1;
1342				break;
1343			default:
1344				break;
1345			}
1346		}
1347
1348		return length;
1349	}
1350
1351	bool Shader::maskContainsComponent(int mask, int component)
1352	{
1353		return (mask & (1 << component)) != 0;
1354	}
1355
1356	bool Shader::swizzleContainsComponent(int swizzle, int component)
1357	{
1358		if((swizzle & 0x03) >> 0 == component) return true;
1359		if((swizzle & 0x0C) >> 2 == component) return true;
1360		if((swizzle & 0x30) >> 4 == component) return true;
1361		if((swizzle & 0xC0) >> 6 == component) return true;
1362
1363		return false;
1364	}
1365
1366	bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask)
1367	{
1368		if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true;
1369		if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true;
1370		if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true;
1371		if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true;
1372
1373		return false;
1374	}
1375
1376	bool Shader::containsDynamicBranching() const
1377	{
1378		return dynamicBranching;
1379	}
1380
1381	bool Shader::containsBreakInstruction() const
1382	{
1383		return containsBreak;
1384	}
1385
1386	bool Shader::containsContinueInstruction() const
1387	{
1388		return containsContinue;
1389	}
1390
1391	bool Shader::containsLeaveInstruction() const
1392	{
1393		return containsLeave;
1394	}
1395
1396	bool Shader::containsDefineInstruction() const
1397	{
1398		return containsDefine;
1399	}
1400
1401	bool Shader::usesSampler(int index) const
1402	{
1403		return (usedSamplers & (1 << index)) != 0;
1404	}
1405
1406	int Shader::getSerialID() const
1407	{
1408		return serialID;
1409	}
1410
1411	size_t Shader::getLength() const
1412	{
1413		return instruction.size();
1414	}
1415
1416	Shader::ShaderType Shader::getShaderType() const
1417	{
1418		return shaderType;
1419	}
1420
1421	unsigned short Shader::getVersion() const
1422	{
1423		return version;
1424	}
1425
1426	void Shader::print(const char *fileName, ...) const
1427	{
1428		char fullName[1024 + 1];
1429
1430		va_list vararg;
1431		va_start(vararg, fileName);
1432		vsnprintf(fullName, 1024, fileName, vararg);
1433		va_end(vararg);
1434
1435		std::ofstream file(fullName, std::ofstream::out);
1436
1437		for(unsigned int i = 0; i < instruction.size(); i++)
1438		{
1439			file << instruction[i]->string(shaderType, version) << std::endl;
1440		}
1441	}
1442
1443	void Shader::printInstruction(int index, const char *fileName) const
1444	{
1445		std::ofstream file(fileName, std::ofstream::out | std::ofstream::app);
1446
1447		file << instruction[index]->string(shaderType, version) << std::endl;
1448	}
1449
1450	void Shader::append(Instruction *instruction)
1451	{
1452		this->instruction.push_back(instruction);
1453	}
1454
1455	void Shader::declareSampler(int i)
1456	{
1457		usedSamplers |= 1 << i;
1458	}
1459
1460	const Shader::Instruction *Shader::getInstruction(unsigned int i) const
1461	{
1462		ASSERT(i < instruction.size());
1463
1464		return instruction[i];
1465	}
1466
1467	void Shader::optimize()
1468	{
1469		optimizeLeave();
1470		optimizeCall();
1471		removeNull();
1472	}
1473
1474	void Shader::optimizeLeave()
1475	{
1476		// A return (leave) right before the end of a function or the shader can be removed
1477		for(unsigned int i = 0; i < instruction.size(); i++)
1478		{
1479			if(instruction[i]->opcode == OPCODE_LEAVE)
1480			{
1481				if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET)
1482				{
1483					instruction[i]->opcode = OPCODE_NULL;
1484				}
1485			}
1486		}
1487	}
1488
1489	void Shader::optimizeCall()
1490	{
1491		// Eliminate uncalled functions
1492		std::set<int> calledFunctions;
1493		bool rescan = true;
1494
1495		while(rescan)
1496		{
1497			calledFunctions.clear();
1498			rescan = false;
1499
1500			for(unsigned int i = 0; i < instruction.size(); i++)
1501			{
1502				if(instruction[i]->isCall())
1503				{
1504					calledFunctions.insert(instruction[i]->dst.label);
1505				}
1506			}
1507
1508			if(!calledFunctions.empty())
1509			{
1510				for(unsigned int i = 0; i < instruction.size(); i++)
1511				{
1512					if(instruction[i]->opcode == OPCODE_LABEL)
1513					{
1514						if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end())
1515						{
1516							for( ; i < instruction.size(); i++)
1517							{
1518								Opcode oldOpcode = instruction[i]->opcode;
1519								instruction[i]->opcode = OPCODE_NULL;
1520
1521								if(oldOpcode == OPCODE_RET)
1522								{
1523									rescan = true;
1524									break;
1525								}
1526							}
1527						}
1528					}
1529				}
1530			}
1531		}
1532
1533		// Optimize the entry call
1534		if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET)
1535		{
1536			if(calledFunctions.size() == 1)
1537			{
1538				instruction[0]->opcode = OPCODE_NULL;
1539				instruction[1]->opcode = OPCODE_NULL;
1540
1541				for(size_t i = 2; i < instruction.size(); i++)
1542				{
1543					if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET)
1544					{
1545						instruction[i]->opcode = OPCODE_NULL;
1546					}
1547				}
1548			}
1549		}
1550	}
1551
1552	void Shader::removeNull()
1553	{
1554		size_t size = 0;
1555		for(size_t i = 0; i < instruction.size(); i++)
1556		{
1557			if(instruction[i]->opcode != OPCODE_NULL)
1558			{
1559				instruction[size] = instruction[i];
1560				size++;
1561			}
1562			else
1563			{
1564				delete instruction[i];
1565			}
1566		}
1567
1568		instruction.resize(size);
1569	}
1570
1571	void Shader::analyzeDirtyConstants()
1572	{
1573		dirtyConstantsF = 0;
1574		dirtyConstantsI = 0;
1575		dirtyConstantsB = 0;
1576
1577		for(unsigned int i = 0; i < instruction.size(); i++)
1578		{
1579			switch(instruction[i]->opcode)
1580			{
1581			case OPCODE_DEF:
1582				if(instruction[i]->dst.index + 1 > dirtyConstantsF)
1583				{
1584					dirtyConstantsF = instruction[i]->dst.index + 1;
1585				}
1586				break;
1587			case OPCODE_DEFI:
1588				if(instruction[i]->dst.index + 1 > dirtyConstantsI)
1589				{
1590					dirtyConstantsI = instruction[i]->dst.index + 1;
1591				}
1592				break;
1593			case OPCODE_DEFB:
1594				if(instruction[i]->dst.index + 1 > dirtyConstantsB)
1595				{
1596					dirtyConstantsB = instruction[i]->dst.index + 1;
1597				}
1598				break;
1599			default:
1600				break;
1601			}
1602		}
1603	}
1604
1605	void Shader::analyzeDynamicBranching()
1606	{
1607		dynamicBranching = false;
1608		containsLeave = false;
1609		containsBreak = false;
1610		containsContinue = false;
1611		containsDefine = false;
1612
1613		// Determine global presence of branching instructions
1614		for(unsigned int i = 0; i < instruction.size(); i++)
1615		{
1616			switch(instruction[i]->opcode)
1617			{
1618			case OPCODE_CALLNZ:
1619			case OPCODE_IF:
1620			case OPCODE_IFC:
1621			case OPCODE_BREAK:
1622			case OPCODE_BREAKC:
1623			case OPCODE_CMP:
1624			case OPCODE_BREAKP:
1625			case OPCODE_LEAVE:
1626			case OPCODE_CONTINUE:
1627				if(instruction[i]->src[0].type != PARAMETER_CONSTBOOL)
1628				{
1629					dynamicBranching = true;
1630				}
1631
1632				if(instruction[i]->opcode == OPCODE_LEAVE)
1633				{
1634					containsLeave = true;
1635				}
1636
1637				if(instruction[i]->isBreak())
1638				{
1639					containsBreak = true;
1640				}
1641
1642				if(instruction[i]->opcode == OPCODE_CONTINUE)
1643				{
1644					containsContinue = true;
1645				}
1646			case OPCODE_DEF:
1647			case OPCODE_DEFB:
1648			case OPCODE_DEFI:
1649				containsDefine = true;
1650			default:
1651				break;
1652			}
1653		}
1654
1655		// Conservatively determine which instructions are affected by dynamic branching
1656		int branchDepth = 0;
1657		int breakDepth = 0;
1658		int continueDepth = 0;
1659		bool leaveReturn = false;
1660
1661		for(unsigned int i = 0; i < instruction.size(); i++)
1662		{
1663			// If statements
1664			if(instruction[i]->isBranch())
1665			{
1666				branchDepth++;
1667			}
1668			else if(instruction[i]->opcode == OPCODE_ENDIF)
1669			{
1670				branchDepth--;
1671			}
1672
1673			if(branchDepth > 0)
1674			{
1675				instruction[i]->analysisBranch = true;
1676
1677				if(instruction[i]->isCall())
1678				{
1679					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1680				}
1681			}
1682
1683			// Break statemement
1684			if(instruction[i]->isBreak())
1685			{
1686				breakDepth++;
1687			}
1688
1689			if(breakDepth > 0)
1690			{
1691				if(instruction[i]->isLoopOrSwitch())   // Nested loop or switch, don't make the end of it disable the break execution mask
1692				{
1693					breakDepth++;
1694				}
1695				else if(instruction[i]->isEndLoopOrSwitch())
1696				{
1697					breakDepth--;
1698				}
1699
1700				instruction[i]->analysisBreak = true;
1701
1702				if(instruction[i]->isCall())
1703				{
1704					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1705				}
1706			}
1707
1708			// Continue statement
1709			if(instruction[i]->opcode == OPCODE_CONTINUE)
1710			{
1711				continueDepth++;
1712			}
1713
1714			if(continueDepth > 0)
1715			{
1716				if(instruction[i]->isLoopOrSwitch())   // Nested loop or switch, don't make the end of it disable the break execution mask
1717				{
1718					continueDepth++;
1719				}
1720				else if(instruction[i]->isEndLoopOrSwitch())
1721				{
1722					continueDepth--;
1723				}
1724
1725				instruction[i]->analysisContinue = true;
1726
1727				if(instruction[i]->isCall())
1728				{
1729					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE);
1730				}
1731			}
1732
1733			// Return (leave) statement
1734			if(instruction[i]->opcode == OPCODE_LEAVE)
1735			{
1736				leaveReturn = true;
1737			}
1738			else if(instruction[i]->opcode == OPCODE_RET)   // End of the function
1739			{
1740				leaveReturn = false;
1741			}
1742
1743			if(leaveReturn)
1744			{
1745				instruction[i]->analysisLeave = true;
1746
1747				if(instruction[i]->isCall())
1748				{
1749					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE);
1750				}
1751			}
1752		}
1753	}
1754
1755	void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag)
1756	{
1757		bool marker = false;
1758		for(unsigned int i = 0; i < instruction.size(); i++)
1759		{
1760			if(!marker)
1761			{
1762				if(instruction[i]->opcode == OPCODE_LABEL && instruction[i]->dst.label == functionLabel)
1763				{
1764					marker = true;
1765				}
1766			}
1767			else
1768			{
1769				if(instruction[i]->opcode == OPCODE_RET)
1770				{
1771					break;
1772				}
1773				else if(instruction[i]->isCall())
1774				{
1775					markFunctionAnalysis(instruction[i]->dst.label, flag);
1776				}
1777
1778				instruction[i]->analysis |= flag;
1779			}
1780		}
1781	}
1782
1783	void Shader::analyzeSamplers()
1784	{
1785		for(unsigned int i = 0; i < instruction.size(); i++)
1786		{
1787			switch(instruction[i]->opcode)
1788			{
1789			case OPCODE_TEX:
1790			case OPCODE_TEXBEM:
1791			case OPCODE_TEXBEML:
1792			case OPCODE_TEXREG2AR:
1793			case OPCODE_TEXREG2GB:
1794			case OPCODE_TEXM3X2TEX:
1795			case OPCODE_TEXM3X3TEX:
1796			case OPCODE_TEXM3X3SPEC:
1797			case OPCODE_TEXM3X3VSPEC:
1798			case OPCODE_TEXREG2RGB:
1799			case OPCODE_TEXDP3TEX:
1800			case OPCODE_TEXM3X2DEPTH:
1801			case OPCODE_TEXLDD:
1802			case OPCODE_TEXLDL:
1803			case OPCODE_TEXOFFSET:
1804			case OPCODE_TEXLDLOFFSET:
1805			case OPCODE_TEXELFETCH:
1806			case OPCODE_TEXELFETCHOFFSET:
1807			case OPCODE_TEXGRAD:
1808			case OPCODE_TEXGRADOFFSET:
1809				{
1810					Parameter &dst = instruction[i]->dst;
1811					Parameter &src1 = instruction[i]->src[1];
1812
1813					if(majorVersion >= 2)
1814					{
1815						usedSamplers |= 1 << src1.index;
1816					}
1817					else
1818					{
1819						usedSamplers |= 1 << dst.index;
1820					}
1821				}
1822				break;
1823			default:
1824				break;
1825			}
1826		}
1827	}
1828
1829	// Assigns a unique index to each call instruction, on a per label basis.
1830	// This is used to know what basic block to return to.
1831	void Shader::analyzeCallSites()
1832	{
1833		int callSiteIndex[2048] = {0};
1834
1835		for(unsigned int i = 0; i < instruction.size(); i++)
1836		{
1837			if(instruction[i]->opcode == OPCODE_CALL || instruction[i]->opcode == OPCODE_CALLNZ)
1838			{
1839				int label = instruction[i]->dst.label;
1840
1841				instruction[i]->dst.callSite = callSiteIndex[label]++;
1842			}
1843		}
1844	}
1845
1846	void Shader::analyzeDynamicIndexing()
1847	{
1848		dynamicallyIndexedTemporaries = false;
1849		dynamicallyIndexedInput = false;
1850		dynamicallyIndexedOutput = false;
1851
1852		for(unsigned int i = 0; i < instruction.size(); i++)
1853		{
1854			if(instruction[i]->dst.rel.type == PARAMETER_ADDR ||
1855			   instruction[i]->dst.rel.type == PARAMETER_LOOP ||
1856			   instruction[i]->dst.rel.type == PARAMETER_TEMP ||
1857			   instruction[i]->dst.rel.type == PARAMETER_CONST)
1858			{
1859				switch(instruction[i]->dst.type)
1860				{
1861				case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1862				case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1863				case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1864				default: break;
1865				}
1866			}
1867
1868			for(int j = 0; j < 3; j++)
1869			{
1870				if(instruction[i]->src[j].rel.type == PARAMETER_ADDR ||
1871				   instruction[i]->src[j].rel.type == PARAMETER_LOOP ||
1872				   instruction[i]->src[j].rel.type == PARAMETER_TEMP ||
1873				   instruction[i]->src[j].rel.type == PARAMETER_CONST)
1874				{
1875					switch(instruction[i]->src[j].type)
1876					{
1877					case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1878					case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1879					case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1880					default: break;
1881					}
1882				}
1883			}
1884		}
1885	}
1886}
1887