1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Shader.hpp"
16
17#include "VertexShader.hpp"
18#include "PixelShader.hpp"
19#include "Common/Math.hpp"
20#include "Common/Debug.hpp"
21
22#include <set>
23#include <fstream>
24#include <sstream>
25#include <stdarg.h>
26
27namespace sw
28{
29	volatile int Shader::serialCounter = 1;
30
31	Shader::Opcode Shader::OPCODE_DP(int i)
32	{
33		switch(i)
34		{
35		default: ASSERT(false);
36		case 1: return OPCODE_DP1;
37		case 2: return OPCODE_DP2;
38		case 3: return OPCODE_DP3;
39		case 4: return OPCODE_DP4;
40		}
41	}
42
43	Shader::Opcode Shader::OPCODE_LEN(int i)
44	{
45		switch(i)
46		{
47		default: ASSERT(false);
48		case 1: return OPCODE_ABS;
49		case 2: return OPCODE_LEN2;
50		case 3: return OPCODE_LEN3;
51		case 4: return OPCODE_LEN4;
52		}
53	}
54
55	Shader::Opcode Shader::OPCODE_DIST(int i)
56	{
57		switch(i)
58		{
59		default: ASSERT(false);
60		case 1: return OPCODE_DIST1;
61		case 2: return OPCODE_DIST2;
62		case 3: return OPCODE_DIST3;
63		case 4: return OPCODE_DIST4;
64		}
65	}
66
67	Shader::Opcode Shader::OPCODE_NRM(int i)
68	{
69		switch(i)
70		{
71		default: ASSERT(false);
72		case 1: return OPCODE_SGN;
73		case 2: return OPCODE_NRM2;
74		case 3: return OPCODE_NRM3;
75		case 4: return OPCODE_NRM4;
76		}
77	}
78
79	Shader::Opcode Shader::OPCODE_FORWARD(int i)
80	{
81		switch(i)
82		{
83		default: ASSERT(false);
84		case 1: return OPCODE_FORWARD1;
85		case 2: return OPCODE_FORWARD2;
86		case 3: return OPCODE_FORWARD3;
87		case 4: return OPCODE_FORWARD4;
88		}
89	}
90
91	Shader::Opcode Shader::OPCODE_REFLECT(int i)
92	{
93		switch(i)
94		{
95		default: ASSERT(false);
96		case 1: return OPCODE_REFLECT1;
97		case 2: return OPCODE_REFLECT2;
98		case 3: return OPCODE_REFLECT3;
99		case 4: return OPCODE_REFLECT4;
100		}
101	}
102
103	Shader::Opcode Shader::OPCODE_REFRACT(int i)
104	{
105		switch(i)
106		{
107		default: ASSERT(false);
108		case 1: return OPCODE_REFRACT1;
109		case 2: return OPCODE_REFRACT2;
110		case 3: return OPCODE_REFRACT3;
111		case 4: return OPCODE_REFRACT4;
112		}
113	}
114
115	Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0)
116	{
117		control = CONTROL_RESERVED0;
118
119		predicate = false;
120		predicateNot = false;
121		predicateSwizzle = 0xE4;
122
123		coissue = false;
124		samplerType = SAMPLER_UNKNOWN;
125		usage = USAGE_POSITION;
126		usageIndex = 0;
127	}
128
129	Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0)
130	{
131		parseOperationToken(*token++, majorVersion);
132
133		samplerType = SAMPLER_UNKNOWN;
134		usage = USAGE_POSITION;
135		usageIndex = 0;
136
137		if(opcode == OPCODE_IF ||
138		   opcode == OPCODE_IFC ||
139		   opcode == OPCODE_LOOP ||
140		   opcode == OPCODE_REP ||
141		   opcode == OPCODE_BREAKC ||
142		   opcode == OPCODE_BREAKP)   // No destination operand
143		{
144			if(size > 0) parseSourceToken(0, token++, majorVersion);
145			if(size > 1) parseSourceToken(1, token++, majorVersion);
146			if(size > 2) parseSourceToken(2, token++, majorVersion);
147			if(size > 3) ASSERT(false);
148		}
149		else if(opcode == OPCODE_DCL)
150		{
151			parseDeclarationToken(*token++);
152			parseDestinationToken(token++, majorVersion);
153		}
154		else
155		{
156			if(size > 0)
157			{
158				parseDestinationToken(token, majorVersion);
159
160				if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3)
161				{
162					token++;
163					size--;
164				}
165
166				token++;
167				size--;
168			}
169
170			if(predicate)
171			{
172				ASSERT(size != 0);
173
174				predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
175				predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
176
177				token++;
178				size--;
179			}
180
181			for(int i = 0; size > 0; i++)
182			{
183				parseSourceToken(i, token, majorVersion);
184
185				token++;
186				size--;
187
188				if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2)
189				{
190					token++;
191					size--;
192				}
193			}
194		}
195	}
196
197	Shader::Instruction::~Instruction()
198	{
199	}
200
201	std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
202	{
203		std::string instructionString;
204
205		if(opcode != OPCODE_DCL)
206		{
207			instructionString += coissue ? "+ " : "";
208
209			if(predicate)
210			{
211				instructionString += predicateNot ? "(!p0" : "(p0";
212				instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle);
213				instructionString += ") ";
214			}
215
216			instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString();
217
218			if(dst.type != PARAMETER_VOID)
219			{
220				instructionString += " " + dst.string(shaderType, version) +
221				                           dst.relativeString() +
222				                           dst.maskString();
223			}
224
225			for(int i = 0; i < 4; i++)
226			{
227				if(src[i].type != PARAMETER_VOID)
228				{
229					instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
230					instructionString += src[i].preModifierString() +
231										 src[i].string(shaderType, version) +
232										 src[i].relativeString() +
233										 src[i].postModifierString() +
234										 src[i].swizzleString();
235				}
236			}
237		}
238		else   // DCL
239		{
240			instructionString += "dcl";
241
242			if(dst.type == PARAMETER_SAMPLER)
243			{
244				switch(samplerType)
245				{
246				case SAMPLER_UNKNOWN: instructionString += " ";        break;
247				case SAMPLER_1D:      instructionString += "_1d ";     break;
248				case SAMPLER_2D:      instructionString += "_2d ";     break;
249				case SAMPLER_CUBE:    instructionString += "_cube ";   break;
250				case SAMPLER_VOLUME:  instructionString += "_volume "; break;
251				default:
252					ASSERT(false);
253				}
254
255				instructionString += dst.string(shaderType, version);
256			}
257			else if(dst.type == PARAMETER_INPUT ||
258				    dst.type == PARAMETER_OUTPUT ||
259				    dst.type == PARAMETER_TEXTURE)
260			{
261				if(version >= 0x0300)
262				{
263					switch(usage)
264					{
265					case USAGE_POSITION:     instructionString += "_position";     break;
266					case USAGE_BLENDWEIGHT:  instructionString += "_blendweight";  break;
267					case USAGE_BLENDINDICES: instructionString += "_blendindices"; break;
268					case USAGE_NORMAL:       instructionString += "_normal";       break;
269					case USAGE_PSIZE:        instructionString += "_psize";        break;
270					case USAGE_TEXCOORD:     instructionString += "_texcoord";     break;
271					case USAGE_TANGENT:      instructionString += "_tangent";      break;
272					case USAGE_BINORMAL:     instructionString += "_binormal";     break;
273					case USAGE_TESSFACTOR:   instructionString += "_tessfactor";   break;
274					case USAGE_POSITIONT:    instructionString += "_positiont";    break;
275					case USAGE_COLOR:        instructionString += "_color";        break;
276					case USAGE_FOG:          instructionString += "_fog";          break;
277					case USAGE_DEPTH:        instructionString += "_depth";        break;
278					case USAGE_SAMPLE:       instructionString += "_sample";       break;
279					default:
280						ASSERT(false);
281					}
282
283					if(usageIndex > 0)
284					{
285						std::ostringstream buffer;
286
287						buffer << (int)usageIndex;
288
289						instructionString += buffer.str();
290					}
291				}
292				else ASSERT(dst.type != PARAMETER_OUTPUT);
293
294				instructionString += " ";
295
296				instructionString += dst.string(shaderType, version);
297				instructionString += dst.maskString();
298			}
299			else if(dst.type == PARAMETER_MISCTYPE)   // vPos and vFace
300			{
301				instructionString += " ";
302
303				instructionString += dst.string(shaderType, version);
304			}
305			else ASSERT(false);
306		}
307
308		return instructionString;
309	}
310
311	std::string Shader::DestinationParameter::modifierString() const
312	{
313		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
314		{
315			return "";
316		}
317
318		std::string modifierString;
319
320		if(saturate)
321		{
322			modifierString += "_sat";
323		}
324
325		if(partialPrecision)
326		{
327			modifierString += "_pp";
328		}
329
330		if(centroid)
331		{
332			modifierString += "_centroid";
333		}
334
335		return modifierString;
336	}
337
338	std::string Shader::DestinationParameter::shiftString() const
339	{
340		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
341		{
342			return "";
343		}
344
345		switch(shift)
346		{
347		case 0:		return "";
348		case 1:		return "_x2";
349		case 2:		return "_x4";
350		case 3:		return "_x8";
351		case -1:	return "_d2";
352		case -2:	return "_d4";
353		case -3:	return "_d8";
354		default:
355			return "";
356		//	ASSERT(false);   // FIXME
357		}
358	}
359
360	std::string Shader::DestinationParameter::maskString() const
361	{
362		if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
363		{
364			return "";
365		}
366
367		switch(mask)
368		{
369		case 0x0:	return "";
370		case 0x1:	return ".x";
371		case 0x2:	return ".y";
372		case 0x3:	return ".xy";
373		case 0x4:	return ".z";
374		case 0x5:	return ".xz";
375		case 0x6:	return ".yz";
376		case 0x7:	return ".xyz";
377		case 0x8:	return ".w";
378		case 0x9:	return ".xw";
379		case 0xA:	return ".yw";
380		case 0xB:	return ".xyw";
381		case 0xC:	return ".zw";
382		case 0xD:	return ".xzw";
383		case 0xE:	return ".yzw";
384		case 0xF:	return "";
385		default:
386			ASSERT(false);
387		}
388
389		return "";
390	}
391
392	std::string Shader::SourceParameter::preModifierString() const
393	{
394		if(type == PARAMETER_VOID)
395		{
396			return "";
397		}
398
399		switch(modifier)
400		{
401		case MODIFIER_NONE:			return "";
402		case MODIFIER_NEGATE:		return "-";
403		case MODIFIER_BIAS:			return "";
404		case MODIFIER_BIAS_NEGATE:	return "-";
405		case MODIFIER_SIGN:			return "";
406		case MODIFIER_SIGN_NEGATE:	return "-";
407		case MODIFIER_COMPLEMENT:	return "1-";
408		case MODIFIER_X2:			return "";
409		case MODIFIER_X2_NEGATE:	return "-";
410		case MODIFIER_DZ:			return "";
411		case MODIFIER_DW:			return "";
412		case MODIFIER_ABS:			return "";
413		case MODIFIER_ABS_NEGATE:	return "-";
414		case MODIFIER_NOT:			return "!";
415		default:
416			ASSERT(false);
417		}
418
419		return "";
420	}
421
422	std::string Shader::Parameter::relativeString() const
423	{
424		if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP)
425		{
426			if(rel.type == PARAMETER_VOID)
427			{
428				return "";
429			}
430			else if(rel.type == PARAMETER_ADDR)
431			{
432				switch(rel.swizzle & 0x03)
433				{
434				case 0: return "[a0.x]";
435				case 1: return "[a0.y]";
436				case 2: return "[a0.z]";
437				case 3: return "[a0.w]";
438				}
439			}
440			else if(rel.type == PARAMETER_TEMP)
441			{
442				std::ostringstream buffer;
443				buffer << rel.index;
444
445				switch(rel.swizzle & 0x03)
446				{
447				case 0: return "[r" + buffer.str() + ".x]";
448				case 1: return "[r" + buffer.str() + ".y]";
449				case 2: return "[r" + buffer.str() + ".z]";
450				case 3: return "[r" + buffer.str() + ".w]";
451				}
452			}
453			else if(rel.type == PARAMETER_LOOP)
454			{
455				return "[aL]";
456			}
457			else if(rel.type == PARAMETER_CONST)
458			{
459				std::ostringstream buffer;
460				buffer << rel.index;
461
462				switch(rel.swizzle & 0x03)
463				{
464				case 0: return "[c" + buffer.str() + ".x]";
465				case 1: return "[c" + buffer.str() + ".y]";
466				case 2: return "[c" + buffer.str() + ".z]";
467				case 3: return "[c" + buffer.str() + ".w]";
468				}
469			}
470			else ASSERT(false);
471		}
472
473		return "";
474	}
475
476	std::string Shader::SourceParameter::postModifierString() const
477	{
478		if(type == PARAMETER_VOID)
479		{
480			return "";
481		}
482
483		switch(modifier)
484		{
485		case MODIFIER_NONE:			return "";
486		case MODIFIER_NEGATE:		return "";
487		case MODIFIER_BIAS:			return "_bias";
488		case MODIFIER_BIAS_NEGATE:	return "_bias";
489		case MODIFIER_SIGN:			return "_bx2";
490		case MODIFIER_SIGN_NEGATE:	return "_bx2";
491		case MODIFIER_COMPLEMENT:	return "";
492		case MODIFIER_X2:			return "_x2";
493		case MODIFIER_X2_NEGATE:	return "_x2";
494		case MODIFIER_DZ:			return "_dz";
495		case MODIFIER_DW:			return "_dw";
496		case MODIFIER_ABS:			return "_abs";
497		case MODIFIER_ABS_NEGATE:	return "_abs";
498		case MODIFIER_NOT:			return "";
499		default:
500			ASSERT(false);
501		}
502
503		return "";
504	}
505
506	std::string Shader::SourceParameter::string(ShaderType shaderType, unsigned short version) const
507	{
508		if(type == PARAMETER_CONST && bufferIndex >= 0)
509		{
510			std::ostringstream buffer;
511			buffer << bufferIndex;
512
513			std::ostringstream offset;
514			offset << index;
515
516			return "cb" + buffer.str() + "[" + offset.str() + "]";
517		}
518		else
519		{
520			return Parameter::string(shaderType, version);
521		}
522	}
523
524	std::string Shader::SourceParameter::swizzleString() const
525	{
526		return Instruction::swizzleString(type, swizzle);
527	}
528
529	void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion)
530	{
531		if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000)   // Version token
532		{
533			opcode = (Opcode)token;
534
535			control = CONTROL_RESERVED0;
536			predicate = false;
537			coissue = false;
538		}
539		else
540		{
541			opcode = (Opcode)(token & 0x0000FFFF);
542			control = (Control)((token & 0x00FF0000) >> 16);
543
544			int size = (token & 0x0F000000) >> 24;
545
546			predicate = (token & 0x10000000) != 0x00000000;
547			coissue = (token & 0x40000000) != 0x00000000;
548
549			if(majorVersion < 2)
550			{
551				if(size != 0)
552				{
553					ASSERT(false);   // Reserved
554				}
555			}
556
557			if(majorVersion < 2)
558			{
559				if(predicate)
560				{
561					ASSERT(false);
562				}
563			}
564
565			if((token & 0x20000000) != 0x00000000)
566			{
567				ASSERT(false);   // Reserved
568			}
569
570			if(majorVersion >= 2)
571			{
572				if(coissue)
573				{
574					ASSERT(false);   // Reserved
575				}
576			}
577
578			if((token & 0x80000000) != 0x00000000)
579			{
580				ASSERT(false);
581			}
582		}
583	}
584
585	void Shader::Instruction::parseDeclarationToken(unsigned long token)
586	{
587		samplerType = (SamplerType)((token & 0x78000000) >> 27);
588		usage = (Usage)(token & 0x0000001F);
589		usageIndex = (unsigned char)((token & 0x000F0000) >> 16);
590	}
591
592	void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion)
593	{
594		dst.index = (unsigned short)(token[0] & 0x000007FF);
595		dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
596
597		// TODO: Check type and index range
598
599		bool relative = (token[0] & 0x00002000) != 0x00000000;
600		dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
601		dst.rel.swizzle = 0x00;
602		dst.rel.scale = 1;
603
604		if(relative && majorVersion >= 3)
605		{
606			dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
607			dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
608		}
609		else if(relative) ASSERT(false);   // Reserved
610
611		if((token[0] & 0x0000C000) != 0x00000000)
612		{
613			ASSERT(false);   // Reserved
614		}
615
616		dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16);
617		dst.saturate = (token[0] & 0x00100000) != 0;
618		dst.partialPrecision = (token[0] & 0x00200000) != 0;
619		dst.centroid = (token[0] & 0x00400000) != 0;
620		dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4;
621
622		if(majorVersion >= 2)
623		{
624			if(dst.shift)
625			{
626				ASSERT(false);   // Reserved
627			}
628		}
629
630		if((token[0] & 0x80000000) != 0x80000000)
631		{
632			ASSERT(false);
633		}
634	}
635
636	void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion)
637	{
638		// Defaults
639		src[i].index = 0;
640		src[i].type = PARAMETER_VOID;
641		src[i].modifier = MODIFIER_NONE;
642		src[i].swizzle = 0xE4;
643		src[i].rel.type = PARAMETER_VOID;
644		src[i].rel.swizzle = 0x00;
645		src[i].rel.scale = 1;
646
647		switch(opcode)
648		{
649		case OPCODE_DEF:
650			src[0].type = PARAMETER_FLOAT4LITERAL;
651			src[0].value[i] = *(float*)token;
652			break;
653		case OPCODE_DEFB:
654			src[0].type = PARAMETER_BOOL1LITERAL;
655			src[0].boolean[0] = *(int*)token;
656			break;
657		case OPCODE_DEFI:
658			src[0].type = PARAMETER_INT4LITERAL;
659			src[0].integer[i] = *(int*)token;
660			break;
661		default:
662			src[i].index = (unsigned short)(token[0] & 0x000007FF);
663			src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
664
665			// FIXME: Check type and index range
666
667			bool relative = (token[0] & 0x00002000) != 0x00000000;
668			src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
669
670			if((token[0] & 0x0000C000) != 0x00000000)
671			{
672				if(opcode != OPCODE_DEF &&
673				   opcode != OPCODE_DEFI &&
674				   opcode != OPCODE_DEFB)
675				{
676					ASSERT(false);
677				}
678			}
679
680			src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16);
681			src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24);
682
683			if((token[0] & 0x80000000) != 0x80000000)
684			{
685				if(opcode != OPCODE_DEF &&
686				   opcode != OPCODE_DEFI &&
687				   opcode != OPCODE_DEFB)
688				{
689					ASSERT(false);
690				}
691			}
692
693			if(relative && majorVersion >= 2)
694			{
695				src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
696				src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
697			}
698		}
699	}
700
701	std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle)
702	{
703		if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4)
704		{
705			return "";
706		}
707
708		int x = (swizzle & 0x03) >> 0;
709		int y = (swizzle & 0x0C) >> 2;
710		int z = (swizzle & 0x30) >> 4;
711		int w = (swizzle & 0xC0) >> 6;
712
713		std::string swizzleString = ".";
714
715		switch(x)
716		{
717		case 0: swizzleString += "x"; break;
718		case 1: swizzleString += "y"; break;
719		case 2: swizzleString += "z"; break;
720		case 3: swizzleString += "w"; break;
721		}
722
723		if(!(x == y && y == z && z == w))
724		{
725			switch(y)
726			{
727			case 0: swizzleString += "x"; break;
728			case 1: swizzleString += "y"; break;
729			case 2: swizzleString += "z"; break;
730			case 3: swizzleString += "w"; break;
731			}
732
733			if(!(y == z && z == w))
734			{
735				switch(z)
736				{
737				case 0: swizzleString += "x"; break;
738				case 1: swizzleString += "y"; break;
739				case 2: swizzleString += "z"; break;
740				case 3: swizzleString += "w"; break;
741				}
742
743				if(!(z == w))
744				{
745					switch(w)
746					{
747					case 0: swizzleString += "x"; break;
748					case 1: swizzleString += "y"; break;
749					case 2: swizzleString += "z"; break;
750					case 3: swizzleString += "w"; break;
751					}
752				}
753			}
754		}
755
756		return swizzleString;
757	}
758
759	std::string Shader::Instruction::operationString(unsigned short version) const
760	{
761		switch(opcode)
762		{
763		case OPCODE_NULL:            return "null";
764		case OPCODE_NOP:             return "nop";
765		case OPCODE_MOV:             return "mov";
766		case OPCODE_ADD:             return "add";
767		case OPCODE_IADD:            return "iadd";
768		case OPCODE_SUB:             return "sub";
769		case OPCODE_ISUB:            return "isub";
770		case OPCODE_MAD:             return "mad";
771		case OPCODE_IMAD:            return "imad";
772		case OPCODE_MUL:             return "mul";
773		case OPCODE_IMUL:            return "imul";
774		case OPCODE_RCPX:            return "rcpx";
775		case OPCODE_DIV:             return "div";
776		case OPCODE_IDIV:            return "idiv";
777		case OPCODE_UDIV:            return "udiv";
778		case OPCODE_MOD:             return "mod";
779		case OPCODE_IMOD:            return "imod";
780		case OPCODE_UMOD:            return "umod";
781		case OPCODE_SHL:             return "shl";
782		case OPCODE_ISHR:            return "ishr";
783		case OPCODE_USHR:            return "ushr";
784		case OPCODE_RSQX:            return "rsqx";
785		case OPCODE_SQRT:            return "sqrt";
786		case OPCODE_RSQ:             return "rsq";
787		case OPCODE_LEN2:            return "len2";
788		case OPCODE_LEN3:            return "len3";
789		case OPCODE_LEN4:            return "len4";
790		case OPCODE_DIST1:           return "dist1";
791		case OPCODE_DIST2:           return "dist2";
792		case OPCODE_DIST3:           return "dist3";
793		case OPCODE_DIST4:           return "dist4";
794		case OPCODE_DP3:             return "dp3";
795		case OPCODE_DP4:             return "dp4";
796		case OPCODE_DET2:            return "det2";
797		case OPCODE_DET3:            return "det3";
798		case OPCODE_DET4:            return "det4";
799		case OPCODE_MIN:             return "min";
800		case OPCODE_IMIN:            return "imin";
801		case OPCODE_UMIN:            return "umin";
802		case OPCODE_MAX:             return "max";
803		case OPCODE_IMAX:            return "imax";
804		case OPCODE_UMAX:            return "umax";
805		case OPCODE_SLT:             return "slt";
806		case OPCODE_SGE:             return "sge";
807		case OPCODE_EXP2X:           return "exp2x";
808		case OPCODE_LOG2X:           return "log2x";
809		case OPCODE_LIT:             return "lit";
810		case OPCODE_ATT:             return "att";
811		case OPCODE_LRP:             return "lrp";
812		case OPCODE_STEP:            return "step";
813		case OPCODE_SMOOTH:          return "smooth";
814		case OPCODE_FLOATBITSTOINT:  return "floatBitsToInt";
815		case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt";
816		case OPCODE_INTBITSTOFLOAT:  return "intBitsToFloat";
817		case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat";
818		case OPCODE_PACKSNORM2x16:   return "packSnorm2x16";
819		case OPCODE_PACKUNORM2x16:   return "packUnorm2x16";
820		case OPCODE_PACKHALF2x16:    return "packHalf2x16";
821		case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16";
822		case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16";
823		case OPCODE_UNPACKHALF2x16:  return "unpackHalf2x16";
824		case OPCODE_FRC:             return "frc";
825		case OPCODE_M4X4:            return "m4x4";
826		case OPCODE_M4X3:            return "m4x3";
827		case OPCODE_M3X4:            return "m3x4";
828		case OPCODE_M3X3:            return "m3x3";
829		case OPCODE_M3X2:            return "m3x2";
830		case OPCODE_CALL:            return "call";
831		case OPCODE_CALLNZ:          return "callnz";
832		case OPCODE_LOOP:            return "loop";
833		case OPCODE_RET:             return "ret";
834		case OPCODE_ENDLOOP:         return "endloop";
835		case OPCODE_LABEL:           return "label";
836		case OPCODE_DCL:             return "dcl";
837		case OPCODE_POWX:            return "powx";
838		case OPCODE_CRS:             return "crs";
839		case OPCODE_SGN:             return "sgn";
840		case OPCODE_ISGN:            return "isgn";
841		case OPCODE_ABS:             return "abs";
842		case OPCODE_IABS:            return "iabs";
843		case OPCODE_NRM2:            return "nrm2";
844		case OPCODE_NRM3:            return "nrm3";
845		case OPCODE_NRM4:            return "nrm4";
846		case OPCODE_SINCOS:          return "sincos";
847		case OPCODE_REP:             return "rep";
848		case OPCODE_ENDREP:          return "endrep";
849		case OPCODE_IF:              return "if";
850		case OPCODE_IFC:             return "ifc";
851		case OPCODE_ELSE:            return "else";
852		case OPCODE_ENDIF:           return "endif";
853		case OPCODE_BREAK:           return "break";
854		case OPCODE_BREAKC:          return "breakc";
855		case OPCODE_MOVA:            return "mova";
856		case OPCODE_DEFB:            return "defb";
857		case OPCODE_DEFI:            return "defi";
858		case OPCODE_TEXCOORD:        return "texcoord";
859		case OPCODE_TEXKILL:         return "texkill";
860		case OPCODE_DISCARD:         return "discard";
861		case OPCODE_TEX:
862			if(version < 0x0104)     return "tex";
863			else                     return "texld";
864		case OPCODE_TEXBEM:          return "texbem";
865		case OPCODE_TEXBEML:         return "texbeml";
866		case OPCODE_TEXREG2AR:       return "texreg2ar";
867		case OPCODE_TEXREG2GB:       return "texreg2gb";
868		case OPCODE_TEXM3X2PAD:      return "texm3x2pad";
869		case OPCODE_TEXM3X2TEX:      return "texm3x2tex";
870		case OPCODE_TEXM3X3PAD:      return "texm3x3pad";
871		case OPCODE_TEXM3X3TEX:      return "texm3x3tex";
872		case OPCODE_RESERVED0:       return "reserved0";
873		case OPCODE_TEXM3X3SPEC:     return "texm3x3spec";
874		case OPCODE_TEXM3X3VSPEC:    return "texm3x3vspec";
875		case OPCODE_EXPP:            return "expp";
876		case OPCODE_LOGP:            return "logp";
877		case OPCODE_CND:             return "cnd";
878		case OPCODE_DEF:             return "def";
879		case OPCODE_TEXREG2RGB:      return "texreg2rgb";
880		case OPCODE_TEXDP3TEX:       return "texdp3tex";
881		case OPCODE_TEXM3X2DEPTH:    return "texm3x2depth";
882		case OPCODE_TEXDP3:          return "texdp3";
883		case OPCODE_TEXM3X3:         return "texm3x3";
884		case OPCODE_TEXDEPTH:        return "texdepth";
885		case OPCODE_CMP0:            return "cmp0";
886		case OPCODE_ICMP:            return "icmp";
887		case OPCODE_UCMP:            return "ucmp";
888		case OPCODE_SELECT:          return "select";
889		case OPCODE_EXTRACT:         return "extract";
890		case OPCODE_INSERT:          return "insert";
891		case OPCODE_BEM:             return "bem";
892		case OPCODE_DP2ADD:          return "dp2add";
893		case OPCODE_DFDX:            return "dFdx";
894		case OPCODE_DFDY:            return "dFdy";
895		case OPCODE_FWIDTH:          return "fwidth";
896		case OPCODE_TEXLDD:          return "texldd";
897		case OPCODE_CMP:             return "cmp";
898		case OPCODE_TEXLDL:          return "texldl";
899		case OPCODE_TEXBIAS:         return "texbias";
900		case OPCODE_TEXOFFSET:       return "texoffset";
901		case OPCODE_TEXOFFSETBIAS:   return "texoffsetbias";
902		case OPCODE_TEXLODOFFSET:    return "texlodoffset";
903		case OPCODE_TEXELFETCH:      return "texelfetch";
904		case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset";
905		case OPCODE_TEXGRAD:         return "texgrad";
906		case OPCODE_TEXGRADOFFSET:   return "texgradoffset";
907		case OPCODE_BREAKP:          return "breakp";
908		case OPCODE_TEXSIZE:         return "texsize";
909		case OPCODE_PHASE:           return "phase";
910		case OPCODE_COMMENT:         return "comment";
911		case OPCODE_END:             return "end";
912		case OPCODE_PS_1_0:          return "ps_1_0";
913		case OPCODE_PS_1_1:          return "ps_1_1";
914		case OPCODE_PS_1_2:          return "ps_1_2";
915		case OPCODE_PS_1_3:          return "ps_1_3";
916		case OPCODE_PS_1_4:          return "ps_1_4";
917		case OPCODE_PS_2_0:          return "ps_2_0";
918		case OPCODE_PS_2_x:          return "ps_2_x";
919		case OPCODE_PS_3_0:          return "ps_3_0";
920		case OPCODE_VS_1_0:          return "vs_1_0";
921		case OPCODE_VS_1_1:          return "vs_1_1";
922		case OPCODE_VS_2_0:          return "vs_2_0";
923		case OPCODE_VS_2_x:          return "vs_2_x";
924		case OPCODE_VS_2_sw:         return "vs_2_sw";
925		case OPCODE_VS_3_0:          return "vs_3_0";
926		case OPCODE_VS_3_sw:         return "vs_3_sw";
927		case OPCODE_WHILE:           return "while";
928		case OPCODE_ENDWHILE:        return "endwhile";
929		case OPCODE_COS:             return "cos";
930		case OPCODE_SIN:             return "sin";
931		case OPCODE_TAN:             return "tan";
932		case OPCODE_ACOS:            return "acos";
933		case OPCODE_ASIN:            return "asin";
934		case OPCODE_ATAN:            return "atan";
935		case OPCODE_ATAN2:           return "atan2";
936		case OPCODE_COSH:            return "cosh";
937		case OPCODE_SINH:            return "sinh";
938		case OPCODE_TANH:            return "tanh";
939		case OPCODE_ACOSH:           return "acosh";
940		case OPCODE_ASINH:           return "asinh";
941		case OPCODE_ATANH:           return "atanh";
942		case OPCODE_DP1:             return "dp1";
943		case OPCODE_DP2:             return "dp2";
944		case OPCODE_TRUNC:           return "trunc";
945		case OPCODE_FLOOR:           return "floor";
946		case OPCODE_ROUND:           return "round";
947		case OPCODE_ROUNDEVEN:       return "roundEven";
948		case OPCODE_CEIL:            return "ceil";
949		case OPCODE_EXP2:            return "exp2";
950		case OPCODE_LOG2:            return "log2";
951		case OPCODE_EXP:             return "exp";
952		case OPCODE_LOG:             return "log";
953		case OPCODE_POW:             return "pow";
954		case OPCODE_F2B:             return "f2b";
955		case OPCODE_B2F:             return "b2f";
956		case OPCODE_F2I:             return "f2i";
957		case OPCODE_I2F:             return "i2f";
958		case OPCODE_F2U:             return "f2u";
959		case OPCODE_U2F:             return "u2f";
960		case OPCODE_B2I:             return "b2i";
961		case OPCODE_I2B:             return "i2b";
962		case OPCODE_ALL:             return "all";
963		case OPCODE_ANY:             return "any";
964		case OPCODE_NEG:             return "neg";
965		case OPCODE_INEG:            return "ineg";
966		case OPCODE_ISNAN:           return "isnan";
967		case OPCODE_ISINF:           return "isinf";
968		case OPCODE_NOT:             return "not";
969		case OPCODE_OR:              return "or";
970		case OPCODE_XOR:             return "xor";
971		case OPCODE_AND:             return "and";
972		case OPCODE_EQ:              return "eq";
973		case OPCODE_NE:              return "neq";
974		case OPCODE_FORWARD1:        return "forward1";
975		case OPCODE_FORWARD2:        return "forward2";
976		case OPCODE_FORWARD3:        return "forward3";
977		case OPCODE_FORWARD4:        return "forward4";
978		case OPCODE_REFLECT1:        return "reflect1";
979		case OPCODE_REFLECT2:        return "reflect2";
980		case OPCODE_REFLECT3:        return "reflect3";
981		case OPCODE_REFLECT4:        return "reflect4";
982		case OPCODE_REFRACT1:        return "refract1";
983		case OPCODE_REFRACT2:        return "refract2";
984		case OPCODE_REFRACT3:        return "refract3";
985		case OPCODE_REFRACT4:        return "refract4";
986		case OPCODE_LEAVE:           return "leave";
987		case OPCODE_CONTINUE:        return "continue";
988		case OPCODE_TEST:            return "test";
989		case OPCODE_SWITCH:          return "switch";
990		case OPCODE_ENDSWITCH:       return "endswitch";
991		default:
992			ASSERT(false);
993		}
994
995		return "<unknown>";
996	}
997
998	std::string Shader::Instruction::controlString() const
999	{
1000		if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP)
1001		{
1002			if(project) return "p";
1003
1004			if(bias) return "b";
1005
1006			// FIXME: LOD
1007		}
1008
1009		switch(control)
1010		{
1011		case 1: return "_gt";
1012		case 2: return "_eq";
1013		case 3: return "_ge";
1014		case 4: return "_lt";
1015		case 5: return "_ne";
1016		case 6: return "_le";
1017		default:
1018			return "";
1019		//	ASSERT(false);   // FIXME
1020		}
1021	}
1022
1023	std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const
1024	{
1025		std::ostringstream buffer;
1026
1027		if(type == PARAMETER_FLOAT4LITERAL)
1028		{
1029			buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}';
1030
1031			return buffer.str();
1032		}
1033		else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
1034		{
1035			buffer << index;
1036
1037			return typeString(shaderType, version) + buffer.str();
1038		}
1039		else
1040		{
1041			return typeString(shaderType, version);
1042		}
1043	}
1044
1045	std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const
1046	{
1047		switch(type)
1048		{
1049		case PARAMETER_TEMP:			return "r";
1050		case PARAMETER_INPUT:			return "v";
1051		case PARAMETER_CONST:			return "c";
1052		case PARAMETER_TEXTURE:
1053	//	case PARAMETER_ADDR:
1054			if(shaderType == SHADER_PIXEL)	return "t";
1055			else							return "a0";
1056		case PARAMETER_RASTOUT:
1057			if(index == 0)              return "oPos";
1058			else if(index == 1)         return "oFog";
1059			else if(index == 2)         return "oPts";
1060			else                        ASSERT(false);
1061		case PARAMETER_ATTROUT:			return "oD";
1062		case PARAMETER_TEXCRDOUT:
1063	//	case PARAMETER_OUTPUT:			return "";
1064			if(version < 0x0300)		return "oT";
1065			else						return "o";
1066		case PARAMETER_CONSTINT:		return "i";
1067		case PARAMETER_COLOROUT:		return "oC";
1068		case PARAMETER_DEPTHOUT:		return "oDepth";
1069		case PARAMETER_SAMPLER:			return "s";
1070	//	case PARAMETER_CONST2:			return "";
1071	//	case PARAMETER_CONST3:			return "";
1072	//	case PARAMETER_CONST4:			return "";
1073		case PARAMETER_CONSTBOOL:		return "b";
1074		case PARAMETER_LOOP:			return "aL";
1075	//	case PARAMETER_TEMPFLOAT16:		return "";
1076		case PARAMETER_MISCTYPE:
1077			switch(index)
1078			{
1079			case VPosIndex:				return "vPos";
1080			case VFaceIndex:			return "vFace";
1081			case InstanceIDIndex:		return "iID";
1082			case VertexIDIndex:			return "vID";
1083			default: ASSERT(false);
1084			}
1085		case PARAMETER_LABEL:			return "l";
1086		case PARAMETER_PREDICATE:		return "p0";
1087		case PARAMETER_FLOAT4LITERAL:	return "";
1088		case PARAMETER_BOOL1LITERAL:	return "";
1089		case PARAMETER_INT4LITERAL:		return "";
1090	//	case PARAMETER_VOID:			return "";
1091		default:
1092			ASSERT(false);
1093		}
1094
1095		return "";
1096	}
1097
1098	bool Shader::Instruction::isBranch() const
1099	{
1100		return opcode == OPCODE_IF || opcode == OPCODE_IFC;
1101	}
1102
1103	bool Shader::Instruction::isCall() const
1104	{
1105		return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
1106	}
1107
1108	bool Shader::Instruction::isBreak() const
1109	{
1110		return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP;
1111	}
1112
1113	bool Shader::Instruction::isLoop() const
1114	{
1115		return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE;
1116	}
1117
1118	bool Shader::Instruction::isEndLoop() const
1119	{
1120		return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE;
1121	}
1122
1123	bool Shader::Instruction::isPredicated() const
1124	{
1125		return predicate ||
1126		       analysisBranch ||
1127		       analysisBreak ||
1128		       analysisContinue ||
1129		       analysisLeave;
1130	}
1131
1132	Shader::Shader() : serialID(serialCounter++)
1133	{
1134		usedSamplers = 0;
1135	}
1136
1137	Shader::~Shader()
1138	{
1139		for(auto &inst : instruction)
1140		{
1141			delete inst;
1142			inst = 0;
1143		}
1144	}
1145
1146	void Shader::parse(const unsigned long *token)
1147	{
1148		minorVersion = (unsigned char)(token[0] & 0x000000FF);
1149		majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8);
1150		shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16);
1151
1152		int length = 0;
1153
1154		if(shaderType == SHADER_VERTEX)
1155		{
1156			length = VertexShader::validate(token);
1157		}
1158		else if(shaderType == SHADER_PIXEL)
1159		{
1160			length = PixelShader::validate(token);
1161		}
1162		else ASSERT(false);
1163
1164		ASSERT(length != 0);
1165		instruction.resize(length);
1166
1167		for(int i = 0; i < length; i++)
1168		{
1169			while((*token & 0x0000FFFF) == 0x0000FFFE)   // Comment token
1170			{
1171				int length = (*token & 0x7FFF0000) >> 16;
1172
1173				token += length + 1;
1174			}
1175
1176			int tokenCount = size(*token);
1177
1178			instruction[i] = new Instruction(token, tokenCount, majorVersion);
1179
1180			token += 1 + tokenCount;
1181		}
1182	}
1183
1184	int Shader::size(unsigned long opcode) const
1185	{
1186		return size(opcode, shaderModel);
1187	}
1188
1189	int Shader::size(unsigned long opcode, unsigned short shaderModel)
1190	{
1191		if(shaderModel > 0x0300)
1192		{
1193			ASSERT(false);
1194		}
1195
1196		static const signed char size[] =
1197		{
1198			0,   // NOP = 0
1199			2,   // MOV
1200			3,   // ADD
1201			3,   // SUB
1202			4,   // MAD
1203			3,   // MUL
1204			2,   // RCP
1205			2,   // RSQ
1206			3,   // DP3
1207			3,   // DP4
1208			3,   // MIN
1209			3,   // MAX
1210			3,   // SLT
1211			3,   // SGE
1212			2,   // EXP
1213			2,   // LOG
1214			2,   // LIT
1215			3,   // DST
1216			4,   // LRP
1217			2,   // FRC
1218			3,   // M4x4
1219			3,   // M4x3
1220			3,   // M3x4
1221			3,   // M3x3
1222			3,   // M3x2
1223			1,   // CALL
1224			2,   // CALLNZ
1225			2,   // LOOP
1226			0,   // RET
1227			0,   // ENDLOOP
1228			1,   // LABEL
1229			2,   // DCL
1230			3,   // POW
1231			3,   // CRS
1232			4,   // SGN
1233			2,   // ABS
1234			2,   // NRM
1235			4,   // SINCOS
1236			1,   // REP
1237			0,   // ENDREP
1238			1,   // IF
1239			2,   // IFC
1240			0,   // ELSE
1241			0,   // ENDIF
1242			0,   // BREAK
1243			2,   // BREAKC
1244			2,   // MOVA
1245			2,   // DEFB
1246			5,   // DEFI
1247			-1,  // 49
1248			-1,  // 50
1249			-1,  // 51
1250			-1,  // 52
1251			-1,  // 53
1252			-1,  // 54
1253			-1,  // 55
1254			-1,  // 56
1255			-1,  // 57
1256			-1,  // 58
1257			-1,  // 59
1258			-1,  // 60
1259			-1,  // 61
1260			-1,  // 62
1261			-1,  // 63
1262			1,   // TEXCOORD = 64
1263			1,   // TEXKILL
1264			1,   // TEX
1265			2,   // TEXBEM
1266			2,   // TEXBEML
1267			2,   // TEXREG2AR
1268			2,   // TEXREG2GB
1269			2,   // TEXM3x2PAD
1270			2,   // TEXM3x2TEX
1271			2,   // TEXM3x3PAD
1272			2,   // TEXM3x3TEX
1273			-1,  // RESERVED0
1274			3,   // TEXM3x3SPEC
1275			2,   // TEXM3x3VSPEC
1276			2,   // EXPP
1277			2,   // LOGP
1278			4,   // CND
1279			5,   // DEF
1280			2,   // TEXREG2RGB
1281			2,   // TEXDP3TEX
1282			2,   // TEXM3x2DEPTH
1283			2,   // TEXDP3
1284			2,   // TEXM3x3
1285			1,   // TEXDEPTH
1286			4,   // CMP
1287			3,   // BEM
1288			4,   // DP2ADD
1289			2,   // DSX
1290			2,   // DSY
1291			5,   // TEXLDD
1292			3,   // SETP
1293			3,   // TEXLDL
1294			2,   // BREAKP
1295			-1,  // 97
1296			-1,  // 98
1297			-1,  // 99
1298			-1,  // 100
1299			-1,  // 101
1300			-1,  // 102
1301			-1,  // 103
1302			-1,  // 104
1303			-1,  // 105
1304			-1,  // 106
1305			-1,  // 107
1306			-1,  // 108
1307			-1,  // 109
1308			-1,  // 110
1309			-1,  // 111
1310			-1,  // 112
1311		};
1312
1313		int length = 0;
1314
1315		if((opcode & 0x0000FFFF) == OPCODE_COMMENT)
1316		{
1317			return (opcode & 0x7FFF0000) >> 16;
1318		}
1319
1320		if(opcode != OPCODE_PS_1_0 &&
1321		   opcode != OPCODE_PS_1_1 &&
1322		   opcode != OPCODE_PS_1_2 &&
1323		   opcode != OPCODE_PS_1_3 &&
1324		   opcode != OPCODE_PS_1_4 &&
1325		   opcode != OPCODE_PS_2_0 &&
1326		   opcode != OPCODE_PS_2_x &&
1327		   opcode != OPCODE_PS_3_0 &&
1328		   opcode != OPCODE_VS_1_0 &&
1329		   opcode != OPCODE_VS_1_1 &&
1330		   opcode != OPCODE_VS_2_0 &&
1331		   opcode != OPCODE_VS_2_x &&
1332		   opcode != OPCODE_VS_2_sw &&
1333		   opcode != OPCODE_VS_3_0 &&
1334		   opcode != OPCODE_VS_3_sw &&
1335		   opcode != OPCODE_PHASE &&
1336		   opcode != OPCODE_END)
1337		{
1338			if(shaderModel >= 0x0200)
1339			{
1340				length = (opcode & 0x0F000000) >> 24;
1341			}
1342			else
1343			{
1344				length = size[opcode & 0x0000FFFF];
1345			}
1346		}
1347
1348		if(length < 0)
1349		{
1350			ASSERT(false);
1351		}
1352
1353		if(shaderModel == 0x0104)
1354		{
1355			switch(opcode & 0x0000FFFF)
1356			{
1357			case OPCODE_TEX:
1358				length += 1;
1359				break;
1360			case OPCODE_TEXCOORD:
1361				length += 1;
1362				break;
1363			default:
1364				break;
1365			}
1366		}
1367
1368		return length;
1369	}
1370
1371	bool Shader::maskContainsComponent(int mask, int component)
1372	{
1373		return (mask & (1 << component)) != 0;
1374	}
1375
1376	bool Shader::swizzleContainsComponent(int swizzle, int component)
1377	{
1378		if((swizzle & 0x03) >> 0 == component) return true;
1379		if((swizzle & 0x0C) >> 2 == component) return true;
1380		if((swizzle & 0x30) >> 4 == component) return true;
1381		if((swizzle & 0xC0) >> 6 == component) return true;
1382
1383		return false;
1384	}
1385
1386	bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask)
1387	{
1388		if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true;
1389		if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true;
1390		if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true;
1391		if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true;
1392
1393		return false;
1394	}
1395
1396	bool Shader::containsDynamicBranching() const
1397	{
1398		return dynamicBranching;
1399	}
1400
1401	bool Shader::containsBreakInstruction() const
1402	{
1403		return containsBreak;
1404	}
1405
1406	bool Shader::containsContinueInstruction() const
1407	{
1408		return containsContinue;
1409	}
1410
1411	bool Shader::containsLeaveInstruction() const
1412	{
1413		return containsLeave;
1414	}
1415
1416	bool Shader::containsDefineInstruction() const
1417	{
1418		return containsDefine;
1419	}
1420
1421	bool Shader::usesSampler(int index) const
1422	{
1423		return (usedSamplers & (1 << index)) != 0;
1424	}
1425
1426	int Shader::getSerialID() const
1427	{
1428		return serialID;
1429	}
1430
1431	size_t Shader::getLength() const
1432	{
1433		return instruction.size();
1434	}
1435
1436	Shader::ShaderType Shader::getShaderType() const
1437	{
1438		return shaderType;
1439	}
1440
1441	unsigned short Shader::getShaderModel() const
1442	{
1443		return shaderModel;
1444	}
1445
1446	void Shader::print(const char *fileName, ...) const
1447	{
1448		char fullName[1024 + 1];
1449
1450		va_list vararg;
1451		va_start(vararg, fileName);
1452		vsnprintf(fullName, 1024, fileName, vararg);
1453		va_end(vararg);
1454
1455		std::ofstream file(fullName, std::ofstream::out);
1456
1457		for(const auto &inst : instruction)
1458		{
1459			file << inst->string(shaderType, shaderModel) << std::endl;
1460		}
1461	}
1462
1463	void Shader::printInstruction(int index, const char *fileName) const
1464	{
1465		std::ofstream file(fileName, std::ofstream::out | std::ofstream::app);
1466
1467		file << instruction[index]->string(shaderType, shaderModel) << std::endl;
1468	}
1469
1470	void Shader::append(Instruction *instruction)
1471	{
1472		this->instruction.push_back(instruction);
1473	}
1474
1475	void Shader::declareSampler(int i)
1476	{
1477		if(i >= 0 && i < 16)
1478		{
1479			usedSamplers |= 1 << i;
1480		}
1481	}
1482
1483	const Shader::Instruction *Shader::getInstruction(size_t i) const
1484	{
1485		ASSERT(i < instruction.size());
1486
1487		return instruction[i];
1488	}
1489
1490	void Shader::optimize()
1491	{
1492		optimizeLeave();
1493		optimizeCall();
1494		removeNull();
1495	}
1496
1497	void Shader::optimizeLeave()
1498	{
1499		// A return (leave) right before the end of a function or the shader can be removed
1500		for(unsigned int i = 0; i < instruction.size(); i++)
1501		{
1502			if(instruction[i]->opcode == OPCODE_LEAVE)
1503			{
1504				if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET)
1505				{
1506					instruction[i]->opcode = OPCODE_NULL;
1507				}
1508			}
1509		}
1510	}
1511
1512	void Shader::optimizeCall()
1513	{
1514		// Eliminate uncalled functions
1515		std::set<int> calledFunctions;
1516		bool rescan = true;
1517
1518		while(rescan)
1519		{
1520			calledFunctions.clear();
1521			rescan = false;
1522
1523			for(const auto &inst : instruction)
1524			{
1525				if(inst->isCall())
1526				{
1527					calledFunctions.insert(inst->dst.label);
1528				}
1529			}
1530
1531			if(!calledFunctions.empty())
1532			{
1533				for(unsigned int i = 0; i < instruction.size(); i++)
1534				{
1535					if(instruction[i]->opcode == OPCODE_LABEL)
1536					{
1537						if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end())
1538						{
1539							for( ; i < instruction.size(); i++)
1540							{
1541								Opcode oldOpcode = instruction[i]->opcode;
1542								instruction[i]->opcode = OPCODE_NULL;
1543
1544								if(oldOpcode == OPCODE_RET)
1545								{
1546									rescan = true;
1547									break;
1548								}
1549							}
1550						}
1551					}
1552				}
1553			}
1554		}
1555
1556		// Optimize the entry call
1557		if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET)
1558		{
1559			if(calledFunctions.size() == 1)
1560			{
1561				instruction[0]->opcode = OPCODE_NULL;
1562				instruction[1]->opcode = OPCODE_NULL;
1563
1564				for(size_t i = 2; i < instruction.size(); i++)
1565				{
1566					if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET)
1567					{
1568						instruction[i]->opcode = OPCODE_NULL;
1569					}
1570				}
1571			}
1572		}
1573	}
1574
1575	void Shader::removeNull()
1576	{
1577		size_t size = 0;
1578		for(size_t i = 0; i < instruction.size(); i++)
1579		{
1580			if(instruction[i]->opcode != OPCODE_NULL)
1581			{
1582				instruction[size] = instruction[i];
1583				size++;
1584			}
1585			else
1586			{
1587				delete instruction[i];
1588			}
1589		}
1590
1591		instruction.resize(size);
1592	}
1593
1594	void Shader::analyzeDirtyConstants()
1595	{
1596		dirtyConstantsF = 0;
1597		dirtyConstantsI = 0;
1598		dirtyConstantsB = 0;
1599
1600		for(const auto &inst : instruction)
1601		{
1602			switch(inst->opcode)
1603			{
1604			case OPCODE_DEF:
1605				if(inst->dst.index + 1 > dirtyConstantsF)
1606				{
1607					dirtyConstantsF = inst->dst.index + 1;
1608				}
1609				break;
1610			case OPCODE_DEFI:
1611				if(inst->dst.index + 1 > dirtyConstantsI)
1612				{
1613					dirtyConstantsI = inst->dst.index + 1;
1614				}
1615				break;
1616			case OPCODE_DEFB:
1617				if(inst->dst.index + 1 > dirtyConstantsB)
1618				{
1619					dirtyConstantsB = inst->dst.index + 1;
1620				}
1621				break;
1622			default:
1623				break;
1624			}
1625		}
1626	}
1627
1628	void Shader::analyzeDynamicBranching()
1629	{
1630		dynamicBranching = false;
1631		containsLeave = false;
1632		containsBreak = false;
1633		containsContinue = false;
1634		containsDefine = false;
1635
1636		// Determine global presence of branching instructions
1637		for(const auto &inst : instruction)
1638		{
1639			switch(inst->opcode)
1640			{
1641			case OPCODE_CALLNZ:
1642			case OPCODE_IF:
1643			case OPCODE_IFC:
1644			case OPCODE_BREAK:
1645			case OPCODE_BREAKC:
1646			case OPCODE_CMP:
1647			case OPCODE_BREAKP:
1648			case OPCODE_LEAVE:
1649			case OPCODE_CONTINUE:
1650				if(inst->src[0].type != PARAMETER_CONSTBOOL)
1651				{
1652					dynamicBranching = true;
1653				}
1654
1655				if(inst->opcode == OPCODE_LEAVE)
1656				{
1657					containsLeave = true;
1658				}
1659
1660				if(inst->isBreak())
1661				{
1662					containsBreak = true;
1663				}
1664
1665				if(inst->opcode == OPCODE_CONTINUE)
1666				{
1667					containsContinue = true;
1668				}
1669			case OPCODE_DEF:
1670			case OPCODE_DEFB:
1671			case OPCODE_DEFI:
1672				containsDefine = true;
1673			default:
1674				break;
1675			}
1676		}
1677
1678		// Conservatively determine which instructions are affected by dynamic branching
1679		int branchDepth = 0;
1680		int breakDepth = 0;
1681		int continueDepth = 0;
1682		bool leaveReturn = false;
1683		unsigned int functionBegin = 0;
1684
1685		for(unsigned int i = 0; i < instruction.size(); i++)
1686		{
1687			// If statements and loops
1688			if(instruction[i]->isBranch() || instruction[i]->isLoop())
1689			{
1690				branchDepth++;
1691			}
1692			else if(instruction[i]->opcode == OPCODE_ENDIF || instruction[i]->isEndLoop())
1693			{
1694				branchDepth--;
1695			}
1696
1697			if(branchDepth > 0)
1698			{
1699				instruction[i]->analysisBranch = true;
1700
1701				if(instruction[i]->isCall())
1702				{
1703					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1704				}
1705			}
1706
1707			// Break statemement
1708			if(instruction[i]->isBreak())
1709			{
1710				breakDepth++;
1711			}
1712
1713			if(breakDepth > 0)
1714			{
1715				if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH)   // Nested loop or switch, don't make the end of it disable the break execution mask
1716				{
1717					breakDepth++;
1718				}
1719				else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1720				{
1721					breakDepth--;
1722				}
1723
1724				instruction[i]->analysisBreak = true;
1725
1726				if(instruction[i]->isCall())
1727				{
1728					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1729				}
1730			}
1731
1732			// Continue statement
1733			if(instruction[i]->opcode == OPCODE_CONTINUE)
1734			{
1735				continueDepth++;
1736			}
1737
1738			if(continueDepth > 0)
1739			{
1740				if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH)   // Nested loop or switch, don't make the end of it disable the break execution mask
1741				{
1742					continueDepth++;
1743				}
1744				else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1745				{
1746					continueDepth--;
1747				}
1748
1749				instruction[i]->analysisContinue = true;
1750
1751				if(instruction[i]->isCall())
1752				{
1753					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE);
1754				}
1755			}
1756
1757			// Return (leave) statement
1758			if(instruction[i]->opcode == OPCODE_LEAVE)
1759			{
1760				leaveReturn = true;
1761
1762				// Mark loop body instructions prior to the return statement
1763				for(unsigned int l = functionBegin; l < i; l++)
1764				{
1765					if(instruction[l]->isLoop())
1766					{
1767						for(unsigned int r = l + 1; r < i; r++)
1768						{
1769							instruction[r]->analysisLeave = true;
1770						}
1771
1772						break;
1773					}
1774				}
1775			}
1776			else if(instruction[i]->opcode == OPCODE_RET)   // End of the function
1777			{
1778				leaveReturn = false;
1779			}
1780			else if(instruction[i]->opcode == OPCODE_LABEL)
1781			{
1782				functionBegin = i;
1783			}
1784
1785			if(leaveReturn)
1786			{
1787				instruction[i]->analysisLeave = true;
1788
1789				if(instruction[i]->isCall())
1790				{
1791					markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE);
1792				}
1793			}
1794		}
1795	}
1796
1797	void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag)
1798	{
1799		bool marker = false;
1800		for(auto &inst : instruction)
1801		{
1802			if(!marker)
1803			{
1804				if(inst->opcode == OPCODE_LABEL && inst->dst.label == functionLabel)
1805				{
1806					marker = true;
1807				}
1808			}
1809			else
1810			{
1811				if(inst->opcode == OPCODE_RET)
1812				{
1813					break;
1814				}
1815				else if(inst->isCall())
1816				{
1817					markFunctionAnalysis(inst->dst.label, flag);
1818				}
1819
1820				inst->analysis |= flag;
1821			}
1822		}
1823	}
1824
1825	void Shader::analyzeSamplers()
1826	{
1827		for(const auto &inst : instruction)
1828		{
1829			switch(inst->opcode)
1830			{
1831			case OPCODE_TEX:
1832			case OPCODE_TEXBEM:
1833			case OPCODE_TEXBEML:
1834			case OPCODE_TEXREG2AR:
1835			case OPCODE_TEXREG2GB:
1836			case OPCODE_TEXM3X2TEX:
1837			case OPCODE_TEXM3X3TEX:
1838			case OPCODE_TEXM3X3SPEC:
1839			case OPCODE_TEXM3X3VSPEC:
1840			case OPCODE_TEXREG2RGB:
1841			case OPCODE_TEXDP3TEX:
1842			case OPCODE_TEXM3X2DEPTH:
1843			case OPCODE_TEXLDD:
1844			case OPCODE_TEXLDL:
1845			case OPCODE_TEXLOD:
1846			case OPCODE_TEXOFFSET:
1847			case OPCODE_TEXOFFSETBIAS:
1848			case OPCODE_TEXLODOFFSET:
1849			case OPCODE_TEXELFETCH:
1850			case OPCODE_TEXELFETCHOFFSET:
1851			case OPCODE_TEXGRAD:
1852			case OPCODE_TEXGRADOFFSET:
1853				{
1854					Parameter &dst = inst->dst;
1855					Parameter &src1 = inst->src[1];
1856
1857					if(majorVersion >= 2)
1858					{
1859						usedSamplers |= 1 << src1.index;
1860					}
1861					else
1862					{
1863						usedSamplers |= 1 << dst.index;
1864					}
1865				}
1866				break;
1867			default:
1868				break;
1869			}
1870		}
1871	}
1872
1873	// Assigns a unique index to each call instruction, on a per label basis.
1874	// This is used to know what basic block to return to.
1875	void Shader::analyzeCallSites()
1876	{
1877		int callSiteIndex[2048] = {0};
1878
1879		for(auto &inst : instruction)
1880		{
1881			if(inst->opcode == OPCODE_CALL || inst->opcode == OPCODE_CALLNZ)
1882			{
1883				int label = inst->dst.label;
1884
1885				inst->dst.callSite = callSiteIndex[label]++;
1886			}
1887		}
1888	}
1889
1890	void Shader::analyzeDynamicIndexing()
1891	{
1892		dynamicallyIndexedTemporaries = false;
1893		dynamicallyIndexedInput = false;
1894		dynamicallyIndexedOutput = false;
1895
1896		for(const auto &inst : instruction)
1897		{
1898			if(inst->dst.rel.type == PARAMETER_ADDR ||
1899			   inst->dst.rel.type == PARAMETER_LOOP ||
1900			   inst->dst.rel.type == PARAMETER_TEMP ||
1901			   inst->dst.rel.type == PARAMETER_CONST)
1902			{
1903				switch(inst->dst.type)
1904				{
1905				case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1906				case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1907				case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1908				default: break;
1909				}
1910			}
1911
1912			for(int j = 0; j < 3; j++)
1913			{
1914				if(inst->src[j].rel.type == PARAMETER_ADDR ||
1915				   inst->src[j].rel.type == PARAMETER_LOOP ||
1916				   inst->src[j].rel.type == PARAMETER_TEMP ||
1917				   inst->src[j].rel.type == PARAMETER_CONST)
1918				{
1919					switch(inst->src[j].type)
1920					{
1921					case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1922					case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1923					case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1924					default: break;
1925					}
1926				}
1927			}
1928		}
1929	}
1930}
1931