1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "OutputASM.h"
16#include "Common/Math.hpp"
17
18#include "common/debug.h"
19#include "InfoSink.h"
20
21#include "libGLESv2/Shader.h"
22
23#include <GLES2/gl2.h>
24#include <GLES2/gl2ext.h>
25#include <GLES3/gl3.h>
26
27#include <stdlib.h>
28
29namespace glsl
30{
31	// Integer to TString conversion
32	TString str(int i)
33	{
34		char buffer[20];
35		sprintf(buffer, "%d", i);
36		return buffer;
37	}
38
39	class Temporary : public TIntermSymbol
40	{
41	public:
42		Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)
43		{
44		}
45
46		~Temporary()
47		{
48			assembler->freeTemporary(this);
49		}
50
51	private:
52		OutputASM *const assembler;
53	};
54
55	class Constant : public TIntermConstantUnion
56	{
57	public:
58		Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))
59		{
60			constants[0].setFConst(x);
61			constants[1].setFConst(y);
62			constants[2].setFConst(z);
63			constants[3].setFConst(w);
64		}
65
66		Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))
67		{
68			constants[0].setBConst(b);
69		}
70
71		Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))
72		{
73			constants[0].setIConst(i);
74		}
75
76		~Constant()
77		{
78		}
79
80	private:
81		ConstantUnion constants[4];
82	};
83
84	Uniform::Uniform(GLenum type, GLenum precision, const std::string &name, int arraySize, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) :
85		type(type), precision(precision), name(name), arraySize(arraySize), registerIndex(registerIndex), blockId(blockId), blockInfo(blockMemberInfo)
86	{
87	}
88
89	UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize,
90	                           TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :
91		name(name), dataSize(dataSize), arraySize(arraySize), layout(layout),
92		isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)
93	{
94	}
95
96	BlockLayoutEncoder::BlockLayoutEncoder(bool rowMajor)
97		: mCurrentOffset(0), isRowMajor(rowMajor)
98	{
99	}
100
101	BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type)
102	{
103		int arrayStride;
104		int matrixStride;
105
106		getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride);
107
108		const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent),
109		                                 static_cast<int>(arrayStride * BytesPerComponent),
110		                                 static_cast<int>(matrixStride * BytesPerComponent),
111		                                 (matrixStride > 0) && isRowMajor);
112
113		advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride);
114
115		return memberInfo;
116	}
117
118	// static
119	size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info)
120	{
121		return (info.offset / BytesPerComponent) / ComponentsPerRegister;
122	}
123
124	// static
125	size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info)
126	{
127		return (info.offset / BytesPerComponent) % ComponentsPerRegister;
128	}
129
130	void BlockLayoutEncoder::nextRegister()
131	{
132		mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister);
133	}
134
135	Std140BlockEncoder::Std140BlockEncoder(bool rowMajor) : BlockLayoutEncoder(rowMajor)
136	{
137	}
138
139	void Std140BlockEncoder::enterAggregateType()
140	{
141		nextRegister();
142	}
143
144	void Std140BlockEncoder::exitAggregateType()
145	{
146		nextRegister();
147	}
148
149	void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut)
150	{
151		size_t baseAlignment = 0;
152		int matrixStride = 0;
153		int arrayStride = 0;
154
155		if(type.isMatrix())
156		{
157			baseAlignment = ComponentsPerRegister;
158			matrixStride = ComponentsPerRegister;
159
160			if(arraySize > 0)
161			{
162				const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
163				arrayStride = ComponentsPerRegister * numRegisters;
164			}
165		}
166		else if(arraySize > 0)
167		{
168			baseAlignment = ComponentsPerRegister;
169			arrayStride = ComponentsPerRegister;
170		}
171		else
172		{
173			const size_t numComponents = type.getElementSize();
174			baseAlignment = (numComponents == 3 ? 4u : numComponents);
175		}
176
177		mCurrentOffset = sw::align(mCurrentOffset, baseAlignment);
178
179		*matrixStrideOut = matrixStride;
180		*arrayStrideOut = arrayStride;
181	}
182
183	void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride)
184	{
185		if(arraySize > 0)
186		{
187			mCurrentOffset += arrayStride * arraySize;
188		}
189		else if(type.isMatrix())
190		{
191			ASSERT(matrixStride == ComponentsPerRegister);
192			const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();
193			mCurrentOffset += ComponentsPerRegister * numRegisters;
194		}
195		else
196		{
197			mCurrentOffset += type.getElementSize();
198		}
199	}
200
201	Attribute::Attribute()
202	{
203		type = GL_NONE;
204		arraySize = 0;
205		registerIndex = 0;
206	}
207
208	Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int location, int registerIndex)
209	{
210		this->type = type;
211		this->name = name;
212		this->arraySize = arraySize;
213		this->location = location;
214		this->registerIndex = registerIndex;
215	}
216
217	sw::PixelShader *Shader::getPixelShader() const
218	{
219		return 0;
220	}
221
222	sw::VertexShader *Shader::getVertexShader() const
223	{
224		return 0;
225	}
226
227	OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false)
228	{
229		TString name = TFunction::unmangleName(nodeName);
230
231		if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D")
232		{
233			method = IMPLICIT;
234		}
235		else if(name == "texture2DProj" || name == "textureProj")
236		{
237			method = IMPLICIT;
238			proj = true;
239		}
240		else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")
241		{
242			method = LOD;
243		}
244		else if(name == "texture2DProjLod" || name == "textureProjLod")
245		{
246			method = LOD;
247			proj = true;
248		}
249		else if(name == "textureSize")
250		{
251			method = SIZE;
252		}
253		else if(name == "textureOffset")
254		{
255			method = IMPLICIT;
256			offset = true;
257		}
258		else if(name == "textureProjOffset")
259		{
260			method = IMPLICIT;
261			offset = true;
262			proj = true;
263		}
264		else if(name == "textureLodOffset")
265		{
266			method = LOD;
267			offset = true;
268		}
269		else if(name == "textureProjLodOffset")
270		{
271			method = LOD;
272			proj = true;
273			offset = true;
274		}
275		else if(name == "texelFetch")
276		{
277			method = FETCH;
278		}
279		else if(name == "texelFetchOffset")
280		{
281			method = FETCH;
282			offset = true;
283		}
284		else if(name == "textureGrad")
285		{
286			method = GRAD;
287		}
288		else if(name == "textureGradOffset")
289		{
290			method = GRAD;
291			offset = true;
292		}
293		else if(name == "textureProjGrad")
294		{
295			method = GRAD;
296			proj = true;
297		}
298		else if(name == "textureProjGradOffset")
299		{
300			method = GRAD;
301			proj = true;
302			offset = true;
303		}
304		else UNREACHABLE(0);
305	}
306
307	OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context)
308	{
309		shader = 0;
310		pixelShader = 0;
311		vertexShader = 0;
312
313		if(shaderObject)
314		{
315			shader = shaderObject->getShader();
316			pixelShader = shaderObject->getPixelShader();
317			vertexShader = shaderObject->getVertexShader();
318		}
319
320		functionArray.push_back(Function(0, "main(", 0, 0));
321		currentFunction = 0;
322		outputQualifier = EvqOutput; // Set outputQualifier to any value other than EvqFragColor or EvqFragData
323	}
324
325	OutputASM::~OutputASM()
326	{
327	}
328
329	void OutputASM::output()
330	{
331		if(shader)
332		{
333			emitShader(GLOBAL);
334
335			if(functionArray.size() > 1)   // Only call main() when there are other functions
336			{
337				Instruction *callMain = emit(sw::Shader::OPCODE_CALL);
338				callMain->dst.type = sw::Shader::PARAMETER_LABEL;
339				callMain->dst.index = 0;   // main()
340
341				emit(sw::Shader::OPCODE_RET);
342			}
343
344			emitShader(FUNCTION);
345		}
346	}
347
348	void OutputASM::emitShader(Scope scope)
349	{
350		emitScope = scope;
351		currentScope = GLOBAL;
352		mContext.getTreeRoot()->traverse(this);
353	}
354
355	void OutputASM::freeTemporary(Temporary *temporary)
356	{
357		free(temporaries, temporary);
358	}
359
360	sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const
361	{
362		TBasicType baseType = in->getType().getBasicType();
363
364		switch(op)
365		{
366		case sw::Shader::OPCODE_NEG:
367			switch(baseType)
368			{
369			case EbtInt:
370			case EbtUInt:
371				return sw::Shader::OPCODE_INEG;
372			case EbtFloat:
373			default:
374				return op;
375			}
376		case sw::Shader::OPCODE_ABS:
377			switch(baseType)
378			{
379			case EbtInt:
380				return sw::Shader::OPCODE_IABS;
381			case EbtFloat:
382			default:
383				return op;
384			}
385		case sw::Shader::OPCODE_SGN:
386			switch(baseType)
387			{
388			case EbtInt:
389				return sw::Shader::OPCODE_ISGN;
390			case EbtFloat:
391			default:
392				return op;
393			}
394		case sw::Shader::OPCODE_ADD:
395			switch(baseType)
396			{
397			case EbtInt:
398			case EbtUInt:
399				return sw::Shader::OPCODE_IADD;
400			case EbtFloat:
401			default:
402				return op;
403			}
404		case sw::Shader::OPCODE_SUB:
405			switch(baseType)
406			{
407			case EbtInt:
408			case EbtUInt:
409				return sw::Shader::OPCODE_ISUB;
410			case EbtFloat:
411			default:
412				return op;
413			}
414		case sw::Shader::OPCODE_MUL:
415			switch(baseType)
416			{
417			case EbtInt:
418			case EbtUInt:
419				return sw::Shader::OPCODE_IMUL;
420			case EbtFloat:
421			default:
422				return op;
423			}
424		case sw::Shader::OPCODE_DIV:
425			switch(baseType)
426			{
427			case EbtInt:
428				return sw::Shader::OPCODE_IDIV;
429			case EbtUInt:
430				return sw::Shader::OPCODE_UDIV;
431			case EbtFloat:
432			default:
433				return op;
434			}
435		case sw::Shader::OPCODE_IMOD:
436			return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;
437		case sw::Shader::OPCODE_ISHR:
438			return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;
439		case sw::Shader::OPCODE_MIN:
440			switch(baseType)
441			{
442			case EbtInt:
443				return sw::Shader::OPCODE_IMIN;
444			case EbtUInt:
445				return sw::Shader::OPCODE_UMIN;
446			case EbtFloat:
447			default:
448				return op;
449			}
450		case sw::Shader::OPCODE_MAX:
451			switch(baseType)
452			{
453			case EbtInt:
454				return sw::Shader::OPCODE_IMAX;
455			case EbtUInt:
456				return sw::Shader::OPCODE_UMAX;
457			case EbtFloat:
458			default:
459				return op;
460			}
461		default:
462			return op;
463		}
464	}
465
466	void OutputASM::visitSymbol(TIntermSymbol *symbol)
467	{
468		// Vertex varyings don't have to be actively used to successfully link
469		// against pixel shaders that use them. So make sure they're declared.
470		if(symbol->getQualifier() == EvqVaryingOut || symbol->getQualifier() == EvqInvariantVaryingOut || symbol->getQualifier() == EvqVertexOut)
471		{
472			if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings
473			{
474				declareVarying(symbol, -1);
475			}
476		}
477
478		TInterfaceBlock* block = symbol->getType().getInterfaceBlock();
479		// OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables:
480		// "All members of a named uniform block declared with a shared or std140 layout qualifier
481		// are considered active, even if they are not referenced in any shader in the program.
482		// The uniform block itself is also considered active, even if no member of the block is referenced."
483		if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140)))
484		{
485			uniformRegister(symbol);
486		}
487	}
488
489	bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)
490	{
491		if(currentScope != emitScope)
492		{
493			return false;
494		}
495
496		TIntermTyped *result = node;
497		TIntermTyped *left = node->getLeft();
498		TIntermTyped *right = node->getRight();
499		const TType &leftType = left->getType();
500		const TType &rightType = right->getType();
501
502		if(isSamplerRegister(result))
503		{
504			return false;   // Don't traverse, the register index is determined statically
505		}
506
507		switch(node->getOp())
508		{
509		case EOpAssign:
510			if(visit == PostVisit)
511			{
512				assignLvalue(left, right);
513				copy(result, right);
514			}
515			break;
516		case EOpInitialize:
517			if(visit == PostVisit)
518			{
519				copy(left, right);
520			}
521			break;
522		case EOpMatrixTimesScalarAssign:
523			if(visit == PostVisit)
524			{
525				for(int i = 0; i < leftType.getNominalSize(); i++)
526				{
527					emit(sw::Shader::OPCODE_MUL, result, i, left, i, right);
528				}
529
530				assignLvalue(left, result);
531			}
532			break;
533		case EOpVectorTimesMatrixAssign:
534			if(visit == PostVisit)
535			{
536				int size = leftType.getNominalSize();
537
538				for(int i = 0; i < size; i++)
539				{
540					Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i);
541					dot->dst.mask = 1 << i;
542				}
543
544				assignLvalue(left, result);
545			}
546			break;
547		case EOpMatrixTimesMatrixAssign:
548			if(visit == PostVisit)
549			{
550				int dim = leftType.getNominalSize();
551
552				for(int i = 0; i < dim; i++)
553				{
554					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
555					mul->src[1].swizzle = 0x00;
556
557					for(int j = 1; j < dim; j++)
558					{
559						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
560						mad->src[1].swizzle = j * 0x55;
561					}
562				}
563
564				assignLvalue(left, result);
565			}
566			break;
567		case EOpIndexDirect:
568			if(visit == PostVisit)
569			{
570				int index = right->getAsConstantUnion()->getIConst(0);
571
572				if(result->isMatrix() || result->isStruct() || result->isInterfaceBlock())
573				{
574					ASSERT(left->isArray());
575					copy(result, left, index * left->elementRegisterCount());
576				}
577				else if(result->isRegister())
578				{
579					int srcIndex = 0;
580					if(left->isRegister())
581					{
582						srcIndex = 0;
583					}
584					else if(left->isArray())
585					{
586						srcIndex = index * left->elementRegisterCount();
587					}
588					else if(left->isMatrix())
589					{
590						ASSERT(index < left->getNominalSize());   // FIXME: Report semantic error
591						srcIndex = index;
592					}
593					else UNREACHABLE(0);
594
595					Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, 0, left, srcIndex);
596
597					if(left->isRegister())
598					{
599						mov->src[0].swizzle = index;
600					}
601				}
602				else UNREACHABLE(0);
603			}
604			break;
605		case EOpIndexIndirect:
606			if(visit == PostVisit)
607			{
608				if(left->isArray() || left->isMatrix())
609				{
610					for(int index = 0; index < result->totalRegisterCount(); index++)
611					{
612						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index, left, index);
613						mov->dst.mask = writeMask(result, index);
614
615						if(left->totalRegisterCount() > 1)
616						{
617							sw::Shader::SourceParameter relativeRegister;
618							argument(relativeRegister, right);
619
620							mov->src[0].rel.type = relativeRegister.type;
621							mov->src[0].rel.index = relativeRegister.index;
622							mov->src[0].rel.scale =	result->totalRegisterCount();
623							mov->src[0].rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);
624						}
625					}
626				}
627				else if(left->isRegister())
628				{
629					emit(sw::Shader::OPCODE_EXTRACT, result, left, right);
630				}
631				else UNREACHABLE(0);
632			}
633			break;
634		case EOpIndexDirectStruct:
635		case EOpIndexDirectInterfaceBlock:
636			if(visit == PostVisit)
637			{
638				ASSERT(leftType.isStruct() || (leftType.isInterfaceBlock()));
639
640				const TFieldList& fields = (node->getOp() == EOpIndexDirectStruct) ?
641				                           leftType.getStruct()->fields() :
642				                           leftType.getInterfaceBlock()->fields();
643				int index = right->getAsConstantUnion()->getIConst(0);
644				int fieldOffset = 0;
645
646				for(int i = 0; i < index; i++)
647				{
648					fieldOffset += fields[i]->type()->totalRegisterCount();
649				}
650
651				copy(result, left, fieldOffset);
652			}
653			break;
654		case EOpVectorSwizzle:
655			if(visit == PostVisit)
656			{
657				int swizzle = 0;
658				TIntermAggregate *components = right->getAsAggregate();
659
660				if(components)
661				{
662					TIntermSequence &sequence = components->getSequence();
663					int component = 0;
664
665					for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)
666					{
667						TIntermConstantUnion *element = (*sit)->getAsConstantUnion();
668
669						if(element)
670						{
671							int i = element->getUnionArrayPointer()[0].getIConst();
672							swizzle |= i << (component * 2);
673							component++;
674						}
675						else UNREACHABLE(0);
676					}
677				}
678				else UNREACHABLE(0);
679
680				Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);
681				mov->src[0].swizzle = swizzle;
682			}
683			break;
684		case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;
685		case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;
686		case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;
687		case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;
688		case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;
689		case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;
690		case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;
691		case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;
692		case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;
693		case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;
694		case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;
695		case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;
696		case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;
697		case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;
698		case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;
699		case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;
700		case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;
701		case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;
702		case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;
703		case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;
704		case EOpEqual:
705			if(visit == PostVisit)
706			{
707				emitBinary(sw::Shader::OPCODE_EQ, result, left, right);
708
709				for(int index = 1; index < left->totalRegisterCount(); index++)
710				{
711					Temporary equal(this);
712					emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index);
713					emit(sw::Shader::OPCODE_AND, result, result, &equal);
714				}
715			}
716			break;
717		case EOpNotEqual:
718			if(visit == PostVisit)
719			{
720				emitBinary(sw::Shader::OPCODE_NE, result, left, right);
721
722				for(int index = 1; index < left->totalRegisterCount(); index++)
723				{
724					Temporary notEqual(this);
725					emit(sw::Shader::OPCODE_NE, &notEqual, 0, left, index, right, index);
726					emit(sw::Shader::OPCODE_OR, result, result, &notEqual);
727				}
728			}
729			break;
730		case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;
731		case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;
732		case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;
733		case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;
734		case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;
735		case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;
736		case EOpMatrixTimesScalar:
737			if(visit == PostVisit)
738			{
739				if(left->isMatrix())
740				{
741					for(int i = 0; i < leftType.getNominalSize(); i++)
742					{
743						emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0);
744					}
745				}
746				else if(right->isMatrix())
747				{
748					for(int i = 0; i < rightType.getNominalSize(); i++)
749					{
750						emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
751					}
752				}
753				else UNREACHABLE(0);
754			}
755			break;
756		case EOpVectorTimesMatrix:
757			if(visit == PostVisit)
758			{
759				sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());
760
761				int size = rightType.getNominalSize();
762				for(int i = 0; i < size; i++)
763				{
764					Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i);
765					dot->dst.mask = 1 << i;
766				}
767			}
768			break;
769		case EOpMatrixTimesVector:
770			if(visit == PostVisit)
771			{
772				Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);
773				mul->src[1].swizzle = 0x00;
774
775				int size = rightType.getNominalSize();
776				for(int i = 1; i < size; i++)
777				{
778					Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result);
779					mad->src[1].swizzle = i * 0x55;
780				}
781			}
782			break;
783		case EOpMatrixTimesMatrix:
784			if(visit == PostVisit)
785			{
786				int dim = leftType.getNominalSize();
787
788				int size = rightType.getNominalSize();
789				for(int i = 0; i < size; i++)
790				{
791					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);
792					mul->src[1].swizzle = 0x00;
793
794					for(int j = 1; j < dim; j++)
795					{
796						Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);
797						mad->src[1].swizzle = j * 0x55;
798					}
799				}
800			}
801			break;
802		case EOpLogicalOr:
803			if(trivial(right, 6))
804			{
805				if(visit == PostVisit)
806				{
807					emit(sw::Shader::OPCODE_OR, result, left, right);
808				}
809			}
810			else   // Short-circuit evaluation
811			{
812				if(visit == InVisit)
813				{
814					emit(sw::Shader::OPCODE_MOV, result, left);
815					Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);
816					ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;
817				}
818				else if(visit == PostVisit)
819				{
820					emit(sw::Shader::OPCODE_MOV, result, right);
821					emit(sw::Shader::OPCODE_ENDIF);
822				}
823			}
824			break;
825		case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;
826		case EOpLogicalAnd:
827			if(trivial(right, 6))
828			{
829				if(visit == PostVisit)
830				{
831					emit(sw::Shader::OPCODE_AND, result, left, right);
832				}
833			}
834			else   // Short-circuit evaluation
835			{
836				if(visit == InVisit)
837				{
838					emit(sw::Shader::OPCODE_MOV, result, left);
839					emit(sw::Shader::OPCODE_IF, 0, result);
840				}
841				else if(visit == PostVisit)
842				{
843					emit(sw::Shader::OPCODE_MOV, result, right);
844					emit(sw::Shader::OPCODE_ENDIF);
845				}
846			}
847			break;
848		default: UNREACHABLE(node->getOp());
849		}
850
851		return true;
852	}
853
854	void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)
855	{
856		switch(size)
857		{
858		case 1: // Used for cofactor computation only
859			{
860				// For a 2x2 matrix, the cofactor is simply a transposed move or negate
861				bool isMov = (row == col);
862				sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;
863				Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row);
864				mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);
865				mov->dst.mask = 1 << outRow;
866			}
867			break;
868		case 2:
869			{
870				static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy
871
872				bool isCofactor = (col >= 0) && (row >= 0);
873				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
874				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
875				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
876
877				Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1);
878				det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];
879				det->dst.mask = 1 << outRow;
880			}
881			break;
882		case 3:
883			{
884				static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw
885
886				bool isCofactor = (col >= 0) && (row >= 0);
887				int col0 = (isCofactor && (col <= 0)) ? 1 : 0;
888				int col1 = (isCofactor && (col <= 1)) ? 2 : 1;
889				int col2 = (isCofactor && (col <= 2)) ? 3 : 2;
890				bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));
891
892				Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2);
893				det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];
894				det->dst.mask = 1 << outRow;
895			}
896			break;
897		case 4:
898			{
899				Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3);
900				det->dst.mask = 1 << outRow;
901			}
902			break;
903		default:
904			UNREACHABLE(size);
905			break;
906		}
907	}
908
909	bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)
910	{
911		if(currentScope != emitScope)
912		{
913			return false;
914		}
915
916		TIntermTyped *result = node;
917		TIntermTyped *arg = node->getOperand();
918		TBasicType basicType = arg->getType().getBasicType();
919
920		union
921		{
922			float f;
923			int i;
924		} one_value;
925
926		if(basicType == EbtInt || basicType == EbtUInt)
927		{
928			one_value.i = 1;
929		}
930		else
931		{
932			one_value.f = 1.0f;
933		}
934
935		Constant one(one_value.f, one_value.f, one_value.f, one_value.f);
936		Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);
937		Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);
938
939		switch(node->getOp())
940		{
941		case EOpNegative:
942			if(visit == PostVisit)
943			{
944				sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);
945				for(int index = 0; index < arg->totalRegisterCount(); index++)
946				{
947					emit(negOpcode, result, index, arg, index);
948				}
949			}
950			break;
951		case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
952		case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
953		case EOpBitwiseNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;
954		case EOpPostIncrement:
955			if(visit == PostVisit)
956			{
957				copy(result, arg);
958
959				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
960				for(int index = 0; index < arg->totalRegisterCount(); index++)
961				{
962					emit(addOpcode, arg, index, arg, index, &one);
963				}
964
965				assignLvalue(arg, arg);
966			}
967			break;
968		case EOpPostDecrement:
969			if(visit == PostVisit)
970			{
971				copy(result, arg);
972
973				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
974				for(int index = 0; index < arg->totalRegisterCount(); index++)
975				{
976					emit(subOpcode, arg, index, arg, index, &one);
977				}
978
979				assignLvalue(arg, arg);
980			}
981			break;
982		case EOpPreIncrement:
983			if(visit == PostVisit)
984			{
985				sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);
986				for(int index = 0; index < arg->totalRegisterCount(); index++)
987				{
988					emit(addOpcode, result, index, arg, index, &one);
989				}
990
991				assignLvalue(arg, result);
992			}
993			break;
994		case EOpPreDecrement:
995			if(visit == PostVisit)
996			{
997				sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);
998				for(int index = 0; index < arg->totalRegisterCount(); index++)
999				{
1000					emit(subOpcode, result, index, arg, index, &one);
1001				}
1002
1003				assignLvalue(arg, result);
1004			}
1005			break;
1006		case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;
1007		case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;
1008		case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;
1009		case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;
1010		case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;
1011		case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;
1012		case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;
1013		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;
1014		case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;
1015		case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;
1016		case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;
1017		case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;
1018		case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;
1019		case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;
1020		case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;
1021		case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;
1022		case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;
1023		case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;
1024		case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;
1025		case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;
1026		case EOpAbs:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break;
1027		case EOpSign:             if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break;
1028		case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;
1029		case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;
1030		case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;
1031		case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;
1032		case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;
1033		case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;
1034		case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;
1035		case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;
1036		case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;
1037		case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;
1038		case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;
1039		case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;
1040		case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;
1041		case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;
1042		case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;
1043		case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;
1044		case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;
1045		case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;
1046		case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;
1047		case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;
1048		case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;
1049		case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;
1050		case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;
1051		case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;
1052		case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;
1053		case EOpTranspose:
1054			if(visit == PostVisit)
1055			{
1056				int numCols = arg->getNominalSize();
1057				int numRows = arg->getSecondarySize();
1058				for(int i = 0; i < numCols; ++i)
1059				{
1060					for(int j = 0; j < numRows; ++j)
1061					{
1062						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i);
1063						mov->src[0].swizzle = 0x55 * j;
1064						mov->dst.mask = 1 << i;
1065					}
1066				}
1067			}
1068			break;
1069		case EOpDeterminant:
1070			if(visit == PostVisit)
1071			{
1072				int size = arg->getNominalSize();
1073				ASSERT(size == arg->getSecondarySize());
1074
1075				emitDeterminant(result, arg, size);
1076			}
1077			break;
1078		case EOpInverse:
1079			if(visit == PostVisit)
1080			{
1081				int size = arg->getNominalSize();
1082				ASSERT(size == arg->getSecondarySize());
1083
1084				// Compute transposed matrix of cofactors
1085				for(int i = 0; i < size; ++i)
1086				{
1087					for(int j = 0; j < size; ++j)
1088					{
1089						// For a 2x2 matrix, the cofactor is simply a transposed move or negate
1090						// For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant
1091						emitDeterminant(result, arg, size - 1, j, i, i, j);
1092					}
1093				}
1094
1095				// Compute 1 / determinant
1096				Temporary invDet(this);
1097				emitDeterminant(&invDet, arg, size);
1098				Constant one(1.0f, 1.0f, 1.0f, 1.0f);
1099				Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);
1100				div->src[1].swizzle = 0x00; // xxxx
1101
1102				// Divide transposed matrix of cofactors by determinant
1103				for(int i = 0; i < size; ++i)
1104				{
1105					emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet);
1106				}
1107			}
1108			break;
1109		default: UNREACHABLE(node->getOp());
1110		}
1111
1112		return true;
1113	}
1114
1115	bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)
1116	{
1117		if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)
1118		{
1119			return false;
1120		}
1121
1122		Constant zero(0.0f, 0.0f, 0.0f, 0.0f);
1123
1124		TIntermTyped *result = node;
1125		const TType &resultType = node->getType();
1126		TIntermSequence &arg = node->getSequence();
1127		size_t argumentCount = arg.size();
1128
1129		switch(node->getOp())
1130		{
1131		case EOpSequence:             break;
1132		case EOpDeclaration:          break;
1133		case EOpInvariantDeclaration: break;
1134		case EOpPrototype:            break;
1135		case EOpComma:
1136			if(visit == PostVisit)
1137			{
1138				copy(result, arg[1]);
1139			}
1140			break;
1141		case EOpFunction:
1142			if(visit == PreVisit)
1143			{
1144				const TString &name = node->getName();
1145
1146				if(emitScope == FUNCTION)
1147				{
1148					if(functionArray.size() > 1)   // No need for a label when there's only main()
1149					{
1150						Instruction *label = emit(sw::Shader::OPCODE_LABEL);
1151						label->dst.type = sw::Shader::PARAMETER_LABEL;
1152
1153						const Function *function = findFunction(name);
1154						ASSERT(function);   // Should have been added during global pass
1155						label->dst.index = function->label;
1156						currentFunction = function->label;
1157					}
1158				}
1159				else if(emitScope == GLOBAL)
1160				{
1161					if(name != "main(")
1162					{
1163						TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();
1164						functionArray.push_back(Function(functionArray.size(), name, &arguments, node));
1165					}
1166				}
1167				else UNREACHABLE(emitScope);
1168
1169				currentScope = FUNCTION;
1170			}
1171			else if(visit == PostVisit)
1172			{
1173				if(emitScope == FUNCTION)
1174				{
1175					if(functionArray.size() > 1)   // No need to return when there's only main()
1176					{
1177						emit(sw::Shader::OPCODE_RET);
1178					}
1179				}
1180
1181				currentScope = GLOBAL;
1182			}
1183			break;
1184		case EOpFunctionCall:
1185			if(visit == PostVisit)
1186			{
1187				if(node->isUserDefined())
1188				{
1189					const TString &name = node->getName();
1190					const Function *function = findFunction(name);
1191
1192					if(!function)
1193					{
1194						mContext.error(node->getLine(), "function definition not found", name.c_str());
1195						return false;
1196					}
1197
1198					TIntermSequence &arguments = *function->arg;
1199
1200					for(size_t i = 0; i < argumentCount; i++)
1201					{
1202						TIntermTyped *in = arguments[i]->getAsTyped();
1203
1204						if(in->getQualifier() == EvqIn ||
1205						   in->getQualifier() == EvqInOut ||
1206						   in->getQualifier() == EvqConstReadOnly)
1207						{
1208							copy(in, arg[i]);
1209						}
1210					}
1211
1212					Instruction *call = emit(sw::Shader::OPCODE_CALL);
1213					call->dst.type = sw::Shader::PARAMETER_LABEL;
1214					call->dst.index = function->label;
1215
1216					if(function->ret && function->ret->getType().getBasicType() != EbtVoid)
1217					{
1218						copy(result, function->ret);
1219					}
1220
1221					for(size_t i = 0; i < argumentCount; i++)
1222					{
1223						TIntermTyped *argument = arguments[i]->getAsTyped();
1224						TIntermTyped *out = arg[i]->getAsTyped();
1225
1226						if(argument->getQualifier() == EvqOut ||
1227						   argument->getQualifier() == EvqInOut)
1228						{
1229							assignLvalue(out, argument);
1230						}
1231					}
1232				}
1233				else
1234				{
1235					const TextureFunction textureFunction(node->getName());
1236					TIntermTyped *t = arg[1]->getAsTyped();
1237
1238					Temporary coord(this);
1239
1240					if(textureFunction.proj)
1241					{
1242						Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]);
1243						rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1);
1244						rcp->dst.mask = 0x7;
1245
1246						Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord);
1247						mul->dst.mask = 0x7;
1248					}
1249					else
1250					{
1251						emit(sw::Shader::OPCODE_MOV, &coord, arg[1]);
1252					}
1253
1254					switch(textureFunction.method)
1255					{
1256					case TextureFunction::IMPLICIT:
1257						{
1258							TIntermNode* offset = textureFunction.offset ? arg[2] : 0;
1259
1260							if(argumentCount == 2 || (textureFunction.offset && argumentCount == 3))
1261							{
1262								emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,
1263								     result, &coord, arg[0], offset);
1264							}
1265							else if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))   // bias
1266							{
1267								Instruction *bias = emit(sw::Shader::OPCODE_MOV, &coord, arg[textureFunction.offset ? 3 : 2]);
1268								bias->dst.mask = 0x8;
1269
1270								Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,
1271								                        result, &coord, arg[0], offset); // FIXME: Implement an efficient TEXLDB instruction
1272								tex->bias = true;
1273							}
1274							else UNREACHABLE(argumentCount);
1275						}
1276						break;
1277					case TextureFunction::LOD:
1278						{
1279							Instruction *lod = emit(sw::Shader::OPCODE_MOV, &coord, arg[2]);
1280							lod->dst.mask = 0x8;
1281
1282							emit(textureFunction.offset ? sw::Shader::OPCODE_TEXLDLOFFSET : sw::Shader::OPCODE_TEXLDL,
1283							     result, &coord, arg[0], textureFunction.offset ? arg[3] : nullptr);
1284						}
1285						break;
1286					case TextureFunction::FETCH:
1287						{
1288							if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))
1289							{
1290								Instruction *lod = emit(sw::Shader::OPCODE_MOV, &coord, arg[2]);
1291								lod->dst.mask = 0x8;
1292
1293								TIntermNode *offset = textureFunction.offset ? arg[3] : nullptr;
1294
1295								emit(textureFunction.offset ? sw::Shader::OPCODE_TEXELFETCHOFFSET : sw::Shader::OPCODE_TEXELFETCH,
1296								     result, &coord, arg[0], offset);
1297							}
1298							else UNREACHABLE(argumentCount);
1299						}
1300						break;
1301					case TextureFunction::GRAD:
1302						{
1303							if(argumentCount == 4 || (textureFunction.offset && argumentCount == 5))
1304							{
1305								TIntermNode *offset = textureFunction.offset ? arg[4] : nullptr;
1306
1307								emit(textureFunction.offset ? sw::Shader::OPCODE_TEXGRADOFFSET : sw::Shader::OPCODE_TEXGRAD,
1308								     result, &coord, arg[0], arg[2], arg[3], offset);
1309							}
1310							else UNREACHABLE(argumentCount);
1311						}
1312						break;
1313					case TextureFunction::SIZE:
1314						emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], arg[0]);
1315						break;
1316					default:
1317						UNREACHABLE(textureFunction.method);
1318					}
1319				}
1320			}
1321			break;
1322		case EOpParameters:
1323			break;
1324		case EOpConstructFloat:
1325		case EOpConstructVec2:
1326		case EOpConstructVec3:
1327		case EOpConstructVec4:
1328		case EOpConstructBool:
1329		case EOpConstructBVec2:
1330		case EOpConstructBVec3:
1331		case EOpConstructBVec4:
1332		case EOpConstructInt:
1333		case EOpConstructIVec2:
1334		case EOpConstructIVec3:
1335		case EOpConstructIVec4:
1336		case EOpConstructUInt:
1337		case EOpConstructUVec2:
1338		case EOpConstructUVec3:
1339		case EOpConstructUVec4:
1340			if(visit == PostVisit)
1341			{
1342				int component = 0;
1343				int arrayMaxIndex = result->isArray() ? result->getArraySize() - 1 : 0;
1344				int arrayComponents = result->getType().getElementSize();
1345				for(size_t i = 0; i < argumentCount; i++)
1346				{
1347					TIntermTyped *argi = arg[i]->getAsTyped();
1348					int size = argi->getNominalSize();
1349					int arrayIndex = std::min(component / arrayComponents, arrayMaxIndex);
1350					int swizzle = component - (arrayIndex * arrayComponents);
1351
1352					if(!argi->isMatrix())
1353					{
1354						Instruction *mov = emitCast(result, arrayIndex, argi, 0);
1355						mov->dst.mask = (0xF << swizzle) & 0xF;
1356						mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1357
1358						component += size;
1359					}
1360					else   // Matrix
1361					{
1362						int column = 0;
1363
1364						while(component < resultType.getNominalSize())
1365						{
1366							Instruction *mov = emitCast(result, arrayIndex, argi, column);
1367							mov->dst.mask = (0xF << swizzle) & 0xF;
1368							mov->src[0].swizzle = readSwizzle(argi, size) << (swizzle * 2);
1369
1370							column++;
1371							component += size;
1372						}
1373					}
1374				}
1375			}
1376			break;
1377		case EOpConstructMat2:
1378		case EOpConstructMat2x3:
1379		case EOpConstructMat2x4:
1380		case EOpConstructMat3x2:
1381		case EOpConstructMat3:
1382		case EOpConstructMat3x4:
1383		case EOpConstructMat4x2:
1384		case EOpConstructMat4x3:
1385		case EOpConstructMat4:
1386			if(visit == PostVisit)
1387			{
1388				TIntermTyped *arg0 = arg[0]->getAsTyped();
1389				const int outCols = result->getNominalSize();
1390				const int outRows = result->getSecondarySize();
1391
1392				if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix
1393				{
1394					for(int i = 0; i < outCols; i++)
1395					{
1396						emit(sw::Shader::OPCODE_MOV, result, i, &zero);
1397						Instruction *mov = emitCast(result, i, arg0, 0);
1398						mov->dst.mask = 1 << i;
1399						ASSERT(mov->src[0].swizzle == 0x00);
1400					}
1401				}
1402				else if(arg0->isMatrix())
1403				{
1404					int arraySize = result->isArray() ? result->getArraySize() : 1;
1405
1406					for(int n = 0; n < arraySize; n++)
1407					{
1408						TIntermTyped *argi = arg[n]->getAsTyped();
1409						const int inCols = argi->getNominalSize();
1410						const int inRows = argi->getSecondarySize();
1411
1412						for(int i = 0; i < outCols; i++)
1413						{
1414							if(i >= inCols || outRows > inRows)
1415							{
1416								// Initialize to identity matrix
1417								Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));
1418								emitCast(result, i + n * outCols, &col, 0);
1419							}
1420
1421							if(i < inCols)
1422							{
1423								Instruction *mov = emitCast(result, i + n * outCols, argi, i);
1424								mov->dst.mask = 0xF >> (4 - inRows);
1425							}
1426						}
1427					}
1428				}
1429				else
1430				{
1431					int column = 0;
1432					int row = 0;
1433
1434					for(size_t i = 0; i < argumentCount; i++)
1435					{
1436						TIntermTyped *argi = arg[i]->getAsTyped();
1437						int size = argi->getNominalSize();
1438						int element = 0;
1439
1440						while(element < size)
1441						{
1442							Instruction *mov = emitCast(result, column, argi, 0);
1443							mov->dst.mask = (0xF << row) & 0xF;
1444							mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;
1445
1446							int end = row + size - element;
1447							column = end >= outRows ? column + 1 : column;
1448							element = element + outRows - row;
1449							row = end >= outRows ? 0 : end;
1450						}
1451					}
1452				}
1453			}
1454			break;
1455		case EOpConstructStruct:
1456			if(visit == PostVisit)
1457			{
1458				int offset = 0;
1459				for(size_t i = 0; i < argumentCount; i++)
1460				{
1461					TIntermTyped *argi = arg[i]->getAsTyped();
1462					int size = argi->totalRegisterCount();
1463
1464					for(int index = 0; index < size; index++)
1465					{
1466						Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index);
1467						mov->dst.mask = writeMask(result, offset + index);
1468					}
1469
1470					offset += size;
1471				}
1472			}
1473			break;
1474		case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;
1475		case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;
1476		case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;
1477		case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;
1478		case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;
1479		case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;
1480		case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;
1481		case EOpModf:
1482			if(visit == PostVisit)
1483			{
1484				TIntermTyped* arg1 = arg[1]->getAsTyped();
1485				emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]);
1486				assignLvalue(arg1, arg1);
1487				emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1);
1488			}
1489			break;
1490		case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;
1491		case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;
1492		case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;
1493		case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;
1494		case EOpClamp:
1495			if(visit == PostVisit)
1496			{
1497				emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);
1498				emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);
1499			}
1500			break;
1501		case EOpMix:         if(visit == PostVisit) emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]); break;
1502		case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;
1503		case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;
1504		case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;
1505		case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;
1506		case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;
1507		case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1508		case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;
1509		case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;
1510		case EOpMul:
1511			if(visit == PostVisit)
1512			{
1513				TIntermTyped *arg0 = arg[0]->getAsTyped();
1514				ASSERT((arg0->getNominalSize() == arg[1]->getAsTyped()->getNominalSize()) &&
1515				       (arg0->getSecondarySize() == arg[1]->getAsTyped()->getSecondarySize()));
1516
1517				int size = arg0->getNominalSize();
1518				for(int i = 0; i < size; i++)
1519				{
1520					emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i);
1521				}
1522			}
1523			break;
1524		case EOpOuterProduct:
1525			if(visit == PostVisit)
1526			{
1527				for(int i = 0; i < dim(arg[1]); i++)
1528				{
1529					Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]);
1530					mul->src[1].swizzle = 0x55 * i;
1531				}
1532			}
1533			break;
1534		default: UNREACHABLE(node->getOp());
1535		}
1536
1537		return true;
1538	}
1539
1540	bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)
1541	{
1542		if(currentScope != emitScope)
1543		{
1544			return false;
1545		}
1546
1547		TIntermTyped *condition = node->getCondition();
1548		TIntermNode *trueBlock = node->getTrueBlock();
1549		TIntermNode *falseBlock = node->getFalseBlock();
1550		TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
1551
1552		condition->traverse(this);
1553
1554		if(node->usesTernaryOperator())
1555		{
1556			if(constantCondition)
1557			{
1558				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1559
1560				if(trueCondition)
1561				{
1562					trueBlock->traverse(this);
1563					copy(node, trueBlock);
1564				}
1565				else
1566				{
1567					falseBlock->traverse(this);
1568					copy(node, falseBlock);
1569				}
1570			}
1571			else if(trivial(node, 6))   // Fast to compute both potential results and no side effects
1572			{
1573				trueBlock->traverse(this);
1574				falseBlock->traverse(this);
1575				emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);
1576			}
1577			else
1578			{
1579				emit(sw::Shader::OPCODE_IF, 0, condition);
1580
1581				if(trueBlock)
1582				{
1583					trueBlock->traverse(this);
1584					copy(node, trueBlock);
1585				}
1586
1587				if(falseBlock)
1588				{
1589					emit(sw::Shader::OPCODE_ELSE);
1590					falseBlock->traverse(this);
1591					copy(node, falseBlock);
1592				}
1593
1594				emit(sw::Shader::OPCODE_ENDIF);
1595			}
1596		}
1597		else  // if/else statement
1598		{
1599			if(constantCondition)
1600			{
1601				bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
1602
1603				if(trueCondition)
1604				{
1605					if(trueBlock)
1606					{
1607						trueBlock->traverse(this);
1608					}
1609				}
1610				else
1611				{
1612					if(falseBlock)
1613					{
1614						falseBlock->traverse(this);
1615					}
1616				}
1617			}
1618			else
1619			{
1620				emit(sw::Shader::OPCODE_IF, 0, condition);
1621
1622				if(trueBlock)
1623				{
1624					trueBlock->traverse(this);
1625				}
1626
1627				if(falseBlock)
1628				{
1629					emit(sw::Shader::OPCODE_ELSE);
1630					falseBlock->traverse(this);
1631				}
1632
1633				emit(sw::Shader::OPCODE_ENDIF);
1634			}
1635		}
1636
1637		return false;
1638	}
1639
1640	bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)
1641	{
1642		if(currentScope != emitScope)
1643		{
1644			return false;
1645		}
1646
1647		unsigned int iterations = loopCount(node);
1648
1649		if(iterations == 0)
1650		{
1651			return false;
1652		}
1653
1654		bool unroll = (iterations <= 4);
1655
1656		if(unroll)
1657		{
1658			LoopUnrollable loopUnrollable;
1659			unroll = loopUnrollable.traverse(node);
1660		}
1661
1662		TIntermNode *init = node->getInit();
1663		TIntermTyped *condition = node->getCondition();
1664		TIntermTyped *expression = node->getExpression();
1665		TIntermNode *body = node->getBody();
1666		Constant True(true);
1667
1668		if(node->getType() == ELoopDoWhile)
1669		{
1670			Temporary iterate(this);
1671			emit(sw::Shader::OPCODE_MOV, &iterate, &True);
1672
1673			emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while
1674
1675			if(body)
1676			{
1677				body->traverse(this);
1678			}
1679
1680			emit(sw::Shader::OPCODE_TEST);
1681
1682			condition->traverse(this);
1683			emit(sw::Shader::OPCODE_MOV, &iterate, condition);
1684
1685			emit(sw::Shader::OPCODE_ENDWHILE);
1686		}
1687		else
1688		{
1689			if(init)
1690			{
1691				init->traverse(this);
1692			}
1693
1694			if(unroll)
1695			{
1696				for(unsigned int i = 0; i < iterations; i++)
1697				{
1698				//	condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop
1699
1700					if(body)
1701					{
1702						body->traverse(this);
1703					}
1704
1705					if(expression)
1706					{
1707						expression->traverse(this);
1708					}
1709				}
1710			}
1711			else
1712			{
1713				if(condition)
1714				{
1715					condition->traverse(this);
1716				}
1717				else
1718				{
1719					condition = &True;
1720				}
1721
1722				emit(sw::Shader::OPCODE_WHILE, 0, condition);
1723
1724				if(body)
1725				{
1726					body->traverse(this);
1727				}
1728
1729				emit(sw::Shader::OPCODE_TEST);
1730
1731				if(expression)
1732				{
1733					expression->traverse(this);
1734				}
1735
1736				if(condition)
1737				{
1738					condition->traverse(this);
1739				}
1740
1741				emit(sw::Shader::OPCODE_ENDWHILE);
1742			}
1743		}
1744
1745		return false;
1746	}
1747
1748	bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)
1749	{
1750		if(currentScope != emitScope)
1751		{
1752			return false;
1753		}
1754
1755		switch(node->getFlowOp())
1756		{
1757		case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;
1758		case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;
1759		case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;
1760		case EOpReturn:
1761			if(visit == PostVisit)
1762			{
1763				TIntermTyped *value = node->getExpression();
1764
1765				if(value)
1766				{
1767					copy(functionArray[currentFunction].ret, value);
1768				}
1769
1770				emit(sw::Shader::OPCODE_LEAVE);
1771			}
1772			break;
1773		default: UNREACHABLE(node->getFlowOp());
1774		}
1775
1776		return true;
1777	}
1778
1779	bool OutputASM::visitSwitch(Visit visit, TIntermSwitch *node)
1780	{
1781		if(currentScope != emitScope)
1782		{
1783			return false;
1784		}
1785
1786		TIntermTyped* switchValue = node->getInit();
1787		TIntermAggregate* opList = node->getStatementList();
1788
1789		if(!switchValue || !opList)
1790		{
1791			return false;
1792		}
1793
1794		switchValue->traverse(this);
1795
1796		emit(sw::Shader::OPCODE_SWITCH);
1797
1798		TIntermSequence& sequence = opList->getSequence();
1799		TIntermSequence::iterator it = sequence.begin();
1800		TIntermSequence::iterator defaultIt = sequence.end();
1801		int nbCases = 0;
1802		for(; it != sequence.end(); ++it)
1803		{
1804			TIntermCase* currentCase = (*it)->getAsCaseNode();
1805			if(currentCase)
1806			{
1807				TIntermSequence::iterator caseIt = it;
1808
1809				TIntermTyped* condition = currentCase->getCondition();
1810				if(condition) // non default case
1811				{
1812					if(nbCases != 0)
1813					{
1814						emit(sw::Shader::OPCODE_ELSE);
1815					}
1816
1817					condition->traverse(this);
1818					Temporary result(this);
1819					emitBinary(sw::Shader::OPCODE_EQ, &result, switchValue, condition);
1820					emit(sw::Shader::OPCODE_IF, 0, &result);
1821					nbCases++;
1822
1823					for(++caseIt; caseIt != sequence.end(); ++caseIt)
1824					{
1825						(*caseIt)->traverse(this);
1826						if((*caseIt)->getAsBranchNode()) // Kill, Break, Continue or Return
1827						{
1828							break;
1829						}
1830					}
1831				}
1832				else
1833				{
1834					defaultIt = it; // The default case might not be the last case, keep it for last
1835				}
1836			}
1837		}
1838
1839		// If there's a default case, traverse it here
1840		if(defaultIt != sequence.end())
1841		{
1842			emit(sw::Shader::OPCODE_ELSE);
1843			for(++defaultIt; defaultIt != sequence.end(); ++defaultIt)
1844			{
1845				(*defaultIt)->traverse(this);
1846				if((*defaultIt)->getAsBranchNode()) // Kill, Break, Continue or Return
1847				{
1848					break;
1849				}
1850			}
1851		}
1852
1853		for(int i = 0; i < nbCases; ++i)
1854		{
1855			emit(sw::Shader::OPCODE_ENDIF);
1856		}
1857
1858		emit(sw::Shader::OPCODE_ENDSWITCH);
1859
1860		return false;
1861	}
1862
1863	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4)
1864	{
1865		return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0);
1866	}
1867
1868	Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1,
1869	                             TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4)
1870	{
1871		Instruction *instruction = new Instruction(op);
1872
1873		if(dst)
1874		{
1875			instruction->dst.type = registerType(dst);
1876			instruction->dst.index = registerIndex(dst) + dstIndex;
1877			instruction->dst.mask = writeMask(dst);
1878			instruction->dst.integer = (dst->getBasicType() == EbtInt);
1879		}
1880
1881		argument(instruction->src[0], src0, index0);
1882		argument(instruction->src[1], src1, index1);
1883		argument(instruction->src[2], src2, index2);
1884		argument(instruction->src[3], src3, index3);
1885		argument(instruction->src[4], src4, index4);
1886
1887		shader->append(instruction);
1888
1889		return instruction;
1890	}
1891
1892	Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)
1893	{
1894		return emitCast(dst, 0, src, 0);
1895	}
1896
1897	Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex)
1898	{
1899		switch(src->getBasicType())
1900		{
1901		case EbtBool:
1902			switch(dst->getBasicType())
1903			{
1904			case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
1905			case EbtUInt:  return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);
1906			case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex);
1907			default:       break;
1908			}
1909			break;
1910		case EbtInt:
1911			switch(dst->getBasicType())
1912			{
1913			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
1914			case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex);
1915			default:       break;
1916			}
1917			break;
1918		case EbtUInt:
1919			switch(dst->getBasicType())
1920			{
1921			case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);
1922			case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex);
1923			default:       break;
1924			}
1925			break;
1926		case EbtFloat:
1927			switch(dst->getBasicType())
1928			{
1929			case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex);
1930			case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex);
1931			case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex);
1932			default:      break;
1933			}
1934			break;
1935		default:
1936			break;
1937		}
1938
1939		ASSERT((src->getBasicType() == dst->getBasicType()) ||
1940		      ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) ||
1941		      ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt)));
1942
1943		return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex);
1944	}
1945
1946	void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)
1947	{
1948		for(int index = 0; index < dst->elementRegisterCount(); index++)
1949		{
1950			emit(op, dst, index, src0, index, src1, index, src2, index);
1951		}
1952	}
1953
1954	void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)
1955	{
1956		emitBinary(op, result, src0, src1);
1957		assignLvalue(lhs, result);
1958	}
1959
1960	void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)
1961	{
1962		sw::Shader::Opcode opcode;
1963		switch(left->getAsTyped()->getBasicType())
1964		{
1965		case EbtBool:
1966		case EbtInt:
1967			opcode = sw::Shader::OPCODE_ICMP;
1968			break;
1969		case EbtUInt:
1970			opcode = sw::Shader::OPCODE_UCMP;
1971			break;
1972		default:
1973			opcode = sw::Shader::OPCODE_CMP;
1974			break;
1975		}
1976
1977		Instruction *cmp = emit(opcode, dst, 0, left, index, right, index);
1978		cmp->control = cmpOp;
1979	}
1980
1981	int componentCount(const TType &type, int registers)
1982	{
1983		if(registers == 0)
1984		{
1985			return 0;
1986		}
1987
1988		if(type.isArray() && registers >= type.elementRegisterCount())
1989		{
1990			int index = registers / type.elementRegisterCount();
1991			registers -= index * type.elementRegisterCount();
1992			return index * type.getElementSize() + componentCount(type, registers);
1993		}
1994
1995		if(type.isStruct() || type.isInterfaceBlock())
1996		{
1997			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
1998			int elements = 0;
1999
2000			for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)
2001			{
2002				const TType &fieldType = *((*field)->type());
2003
2004				if(fieldType.totalRegisterCount() <= registers)
2005				{
2006					registers -= fieldType.totalRegisterCount();
2007					elements += fieldType.getObjectSize();
2008				}
2009				else   // Register within this field
2010				{
2011					return elements + componentCount(fieldType, registers);
2012				}
2013			}
2014		}
2015		else if(type.isMatrix())
2016		{
2017			return registers * type.registerSize();
2018		}
2019
2020		UNREACHABLE(0);
2021		return 0;
2022	}
2023
2024	int registerSize(const TType &type, int registers)
2025	{
2026		if(registers == 0)
2027		{
2028			if(type.isStruct())
2029			{
2030				return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);
2031			}
2032			else if(type.isInterfaceBlock())
2033			{
2034				return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0);
2035			}
2036
2037			return type.registerSize();
2038		}
2039
2040		if(type.isArray() && registers >= type.elementRegisterCount())
2041		{
2042			int index = registers / type.elementRegisterCount();
2043			registers -= index * type.elementRegisterCount();
2044			return registerSize(type, registers);
2045		}
2046
2047		if(type.isStruct() || type.isInterfaceBlock())
2048		{
2049			const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();
2050			int elements = 0;
2051
2052			for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)
2053			{
2054				const TType &fieldType = *((*field)->type());
2055
2056				if(fieldType.totalRegisterCount() <= registers)
2057				{
2058					registers -= fieldType.totalRegisterCount();
2059					elements += fieldType.getObjectSize();
2060				}
2061				else   // Register within this field
2062				{
2063					return registerSize(fieldType, registers);
2064				}
2065			}
2066		}
2067		else if(type.isMatrix())
2068		{
2069			return registerSize(type, 0);
2070		}
2071
2072		UNREACHABLE(0);
2073		return 0;
2074	}
2075
2076	int OutputASM::getBlockId(TIntermTyped *arg)
2077	{
2078		if(arg)
2079		{
2080			const TType &type = arg->getType();
2081			TInterfaceBlock* block = type.getInterfaceBlock();
2082			if(block && (type.getQualifier() == EvqUniform))
2083			{
2084				// Make sure the uniform block is declared
2085				uniformRegister(arg);
2086
2087				const char* blockName = block->name().c_str();
2088
2089				// Fetch uniform block index from array of blocks
2090				for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it)
2091				{
2092					if(blockName == it->name)
2093					{
2094						return it->blockId;
2095					}
2096				}
2097
2098				ASSERT(false);
2099			}
2100		}
2101
2102		return -1;
2103	}
2104
2105	OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index)
2106	{
2107		const TType &type = arg->getType();
2108		int blockId = getBlockId(arg);
2109		ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1);
2110		if(blockId != -1)
2111		{
2112			argumentInfo.bufferIndex = 0;
2113			for(int i = 0; i < blockId; ++i)
2114			{
2115				int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize;
2116				argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1;
2117			}
2118
2119			const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId];
2120
2121			BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end();
2122			BlockDefinitionIndexMap::const_iterator it = itEnd;
2123
2124			argumentInfo.clampedIndex = index;
2125			if(type.isInterfaceBlock())
2126			{
2127				// Offset index to the beginning of the selected instance
2128				int blockRegisters = type.elementRegisterCount();
2129				int bufferOffset = argumentInfo.clampedIndex / blockRegisters;
2130				argumentInfo.bufferIndex += bufferOffset;
2131				argumentInfo.clampedIndex -= bufferOffset * blockRegisters;
2132			}
2133
2134			int regIndex = registerIndex(arg);
2135			for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i)
2136			{
2137				it = blockDefinition.find(i);
2138				if(it != itEnd)
2139				{
2140					argumentInfo.clampedIndex -= (i - regIndex);
2141					break;
2142				}
2143			}
2144			ASSERT(it != itEnd);
2145
2146			argumentInfo.typedMemberInfo = it->second;
2147
2148			int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount();
2149			argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex;
2150		}
2151		else
2152		{
2153			argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;
2154		}
2155
2156		return argumentInfo;
2157	}
2158
2159	void OutputASM::argument(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)
2160	{
2161		if(argument)
2162		{
2163			TIntermTyped *arg = argument->getAsTyped();
2164			Temporary unpackedUniform(this);
2165
2166			const TType& srcType = arg->getType();
2167			TInterfaceBlock* srcBlock = srcType.getInterfaceBlock();
2168			if(srcBlock && (srcType.getQualifier() == EvqUniform))
2169			{
2170				const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2171				const TType &memberType = argumentInfo.typedMemberInfo.type;
2172
2173				if(memberType.getBasicType() == EbtBool)
2174				{
2175					ASSERT(argumentInfo.clampedIndex < (memberType.isArray() ? memberType.getArraySize() : 1)); // index < arraySize
2176
2177					// Convert the packed bool, which is currently an int, to a true bool
2178					Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B);
2179					instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2180					instruction->dst.index = registerIndex(&unpackedUniform);
2181					instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2182					instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2183					instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride;
2184
2185					shader->append(instruction);
2186
2187					arg = &unpackedUniform;
2188					index = 0;
2189				}
2190				else if((srcBlock->matrixPacking() == EmpRowMajor) && memberType.isMatrix())
2191				{
2192					int numCols = memberType.getNominalSize();
2193					int numRows = memberType.getSecondarySize();
2194
2195					ASSERT(argumentInfo.clampedIndex < (numCols * (memberType.isArray() ? memberType.getArraySize() : 1))); // index < cols * arraySize
2196
2197					unsigned int dstIndex = registerIndex(&unpackedUniform);
2198					unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55;
2199					int arrayIndex = argumentInfo.clampedIndex / numCols;
2200					int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride;
2201
2202					for(int j = 0; j < numRows; ++j)
2203					{
2204						// Transpose the row major matrix
2205						Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV);
2206						instruction->dst.type = sw::Shader::PARAMETER_TEMP;
2207						instruction->dst.index = dstIndex;
2208						instruction->dst.mask = 1 << j;
2209						instruction->src[0].type = sw::Shader::PARAMETER_CONST;
2210						instruction->src[0].bufferIndex = argumentInfo.bufferIndex;
2211						instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride;
2212						instruction->src[0].swizzle = srcSwizzle;
2213
2214						shader->append(instruction);
2215					}
2216
2217					arg = &unpackedUniform;
2218					index = 0;
2219				}
2220			}
2221
2222			const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);
2223			const TType &type = argumentInfo.typedMemberInfo.type;
2224
2225			int size = registerSize(type, argumentInfo.clampedIndex);
2226
2227			parameter.type = registerType(arg);
2228			parameter.bufferIndex = argumentInfo.bufferIndex;
2229
2230			if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer())
2231			{
2232				int component = componentCount(type, argumentInfo.clampedIndex);
2233				ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();
2234
2235				for(int i = 0; i < 4; i++)
2236				{
2237					if(size == 1)   // Replicate
2238					{
2239						parameter.value[i] = constants[component + 0].getAsFloat();
2240					}
2241					else if(i < size)
2242					{
2243						parameter.value[i] = constants[component + i].getAsFloat();
2244					}
2245					else
2246					{
2247						parameter.value[i] = 0.0f;
2248					}
2249				}
2250			}
2251			else
2252			{
2253				parameter.index = registerIndex(arg) + argumentInfo.clampedIndex;
2254
2255				if(parameter.bufferIndex != -1)
2256				{
2257					int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride;
2258					parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride;
2259				}
2260			}
2261
2262			if(!IsSampler(arg->getBasicType()))
2263			{
2264				parameter.swizzle = readSwizzle(arg, size);
2265			}
2266		}
2267	}
2268
2269	void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)
2270	{
2271		for(int index = 0; index < dst->totalRegisterCount(); index++)
2272		{
2273			Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index);
2274			mov->dst.mask = writeMask(dst, index);
2275		}
2276	}
2277
2278	int swizzleElement(int swizzle, int index)
2279	{
2280		return (swizzle >> (index * 2)) & 0x03;
2281	}
2282
2283	int swizzleSwizzle(int leftSwizzle, int rightSwizzle)
2284	{
2285		return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |
2286		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |
2287		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |
2288		       (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);
2289	}
2290
2291	void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)
2292	{
2293		if(src &&
2294			((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) ||
2295			 (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize())))))
2296		{
2297			return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");
2298		}
2299
2300		TIntermBinary *binary = dst->getAsBinaryNode();
2301
2302		if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar())
2303		{
2304			Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);
2305
2306			Temporary address(this);
2307			lvalue(insert->dst, address, dst);
2308
2309			insert->src[0].type = insert->dst.type;
2310			insert->src[0].index = insert->dst.index;
2311			insert->src[0].rel = insert->dst.rel;
2312			argument(insert->src[1], src);
2313			argument(insert->src[2], binary->getRight());
2314
2315			shader->append(insert);
2316		}
2317		else
2318		{
2319			for(int offset = 0; offset < dst->totalRegisterCount(); offset++)
2320			{
2321				Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);
2322
2323				Temporary address(this);
2324				int swizzle = lvalue(mov->dst, address, dst);
2325				mov->dst.index += offset;
2326
2327				if(offset > 0)
2328				{
2329					mov->dst.mask = writeMask(dst, offset);
2330				}
2331
2332				argument(mov->src[0], src, offset);
2333				mov->src[0].swizzle = swizzleSwizzle(mov->src[0].swizzle, swizzle);
2334
2335				shader->append(mov);
2336			}
2337		}
2338	}
2339
2340	int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, Temporary &address, TIntermTyped *node)
2341	{
2342		TIntermTyped *result = node;
2343		TIntermBinary *binary = node->getAsBinaryNode();
2344		TIntermSymbol *symbol = node->getAsSymbolNode();
2345
2346		if(binary)
2347		{
2348			TIntermTyped *left = binary->getLeft();
2349			TIntermTyped *right = binary->getRight();
2350
2351			int leftSwizzle = lvalue(dst, address, left);   // Resolve the l-value of the left side
2352
2353			switch(binary->getOp())
2354			{
2355			case EOpIndexDirect:
2356				{
2357					int rightIndex = right->getAsConstantUnion()->getIConst(0);
2358
2359					if(left->isRegister())
2360					{
2361						int leftMask = dst.mask;
2362
2363						dst.mask = 1;
2364						while((leftMask & dst.mask) == 0)
2365						{
2366							dst.mask = dst.mask << 1;
2367						}
2368
2369						int element = swizzleElement(leftSwizzle, rightIndex);
2370						dst.mask = 1 << element;
2371
2372						return element;
2373					}
2374					else if(left->isArray() || left->isMatrix())
2375					{
2376						dst.index += rightIndex * result->totalRegisterCount();
2377						return 0xE4;
2378					}
2379					else UNREACHABLE(0);
2380				}
2381				break;
2382			case EOpIndexIndirect:
2383				{
2384					if(left->isRegister())
2385					{
2386						// Requires INSERT instruction (handled by calling function)
2387					}
2388					else if(left->isArray() || left->isMatrix())
2389					{
2390						int scale = result->totalRegisterCount();
2391
2392						if(dst.rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly
2393						{
2394							if(left->totalRegisterCount() > 1)
2395							{
2396								sw::Shader::SourceParameter relativeRegister;
2397								argument(relativeRegister, right);
2398
2399								dst.rel.index = relativeRegister.index;
2400								dst.rel.type = relativeRegister.type;
2401								dst.rel.scale = scale;
2402								dst.rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);
2403							}
2404						}
2405						else if(dst.rel.index != registerIndex(&address))   // Move the previous index register to the address register
2406						{
2407							if(scale == 1)
2408							{
2409								Constant oldScale((int)dst.rel.scale);
2410								Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);
2411								mad->src[0].index = dst.rel.index;
2412								mad->src[0].type = dst.rel.type;
2413							}
2414							else
2415							{
2416								Constant oldScale((int)dst.rel.scale);
2417								Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);
2418								mul->src[0].index = dst.rel.index;
2419								mul->src[0].type = dst.rel.type;
2420
2421								Constant newScale(scale);
2422								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2423							}
2424
2425							dst.rel.type = sw::Shader::PARAMETER_TEMP;
2426							dst.rel.index = registerIndex(&address);
2427							dst.rel.scale = 1;
2428						}
2429						else   // Just add the new index to the address register
2430						{
2431							if(scale == 1)
2432							{
2433								emit(sw::Shader::OPCODE_IADD, &address, &address, right);
2434							}
2435							else
2436							{
2437								Constant newScale(scale);
2438								emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);
2439							}
2440						}
2441					}
2442					else UNREACHABLE(0);
2443				}
2444				break;
2445			case EOpIndexDirectStruct:
2446			case EOpIndexDirectInterfaceBlock:
2447				{
2448					const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?
2449					                           left->getType().getStruct()->fields() :
2450					                           left->getType().getInterfaceBlock()->fields();
2451					int index = right->getAsConstantUnion()->getIConst(0);
2452					int fieldOffset = 0;
2453
2454					for(int i = 0; i < index; i++)
2455					{
2456						fieldOffset += fields[i]->type()->totalRegisterCount();
2457					}
2458
2459					dst.type = registerType(left);
2460					dst.index += fieldOffset;
2461					dst.mask = writeMask(result);
2462
2463					return 0xE4;
2464				}
2465				break;
2466			case EOpVectorSwizzle:
2467				{
2468					ASSERT(left->isRegister());
2469
2470					int leftMask = dst.mask;
2471
2472					int swizzle = 0;
2473					int rightMask = 0;
2474
2475					TIntermSequence &sequence = right->getAsAggregate()->getSequence();
2476
2477					for(unsigned int i = 0; i < sequence.size(); i++)
2478					{
2479						int index = sequence[i]->getAsConstantUnion()->getIConst(0);
2480
2481						int element = swizzleElement(leftSwizzle, index);
2482						rightMask = rightMask | (1 << element);
2483						swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);
2484					}
2485
2486					dst.mask = leftMask & rightMask;
2487
2488					return swizzle;
2489				}
2490				break;
2491			default:
2492				UNREACHABLE(binary->getOp());   // Not an l-value operator
2493				break;
2494			}
2495		}
2496		else if(symbol)
2497		{
2498			dst.type = registerType(symbol);
2499			dst.index = registerIndex(symbol);
2500			dst.mask = writeMask(symbol);
2501			return 0xE4;
2502		}
2503
2504		return 0xE4;
2505	}
2506
2507	sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)
2508	{
2509		if(isSamplerRegister(operand))
2510		{
2511			return sw::Shader::PARAMETER_SAMPLER;
2512		}
2513
2514		const TQualifier qualifier = operand->getQualifier();
2515		if((EvqFragColor == qualifier) || (EvqFragData == qualifier))
2516		{
2517			if(((EvqFragData == qualifier) && (EvqFragColor == outputQualifier)) ||
2518			   ((EvqFragColor == qualifier) && (EvqFragData == outputQualifier)))
2519			{
2520				mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");
2521			}
2522			outputQualifier = qualifier;
2523		}
2524
2525		if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer()))
2526		{
2527			return sw::Shader::PARAMETER_TEMP;
2528		}
2529
2530		switch(qualifier)
2531		{
2532		case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;
2533		case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;
2534		case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float
2535		case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;
2536		case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;
2537		case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;
2538		case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;
2539		case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;
2540		case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;
2541		case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;
2542		case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend
2543		case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend
2544		case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;
2545		case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;
2546		case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;
2547		case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;
2548		case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;
2549		case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;
2550		case EvqUniform:             return sw::Shader::PARAMETER_CONST;
2551		case EvqIn:                  return sw::Shader::PARAMETER_TEMP;
2552		case EvqOut:                 return sw::Shader::PARAMETER_TEMP;
2553		case EvqInOut:               return sw::Shader::PARAMETER_TEMP;
2554		case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;
2555		case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;
2556		case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;
2557		case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;
2558		case EvqVertexID:            return sw::Shader::PARAMETER_MISCTYPE;
2559		case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;
2560		case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;
2561		case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;
2562		case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;
2563		case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;
2564		case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;
2565		default: UNREACHABLE(qualifier);
2566		}
2567
2568		return sw::Shader::PARAMETER_VOID;
2569	}
2570
2571	bool OutputASM::hasFlatQualifier(TIntermTyped *operand)
2572	{
2573		const TQualifier qualifier = operand->getQualifier();
2574		return qualifier == EvqFlat || qualifier == EvqFlatOut || qualifier == EvqFlatIn;
2575	}
2576
2577	unsigned int OutputASM::registerIndex(TIntermTyped *operand)
2578	{
2579		if(isSamplerRegister(operand))
2580		{
2581			return samplerRegister(operand);
2582		}
2583
2584		switch(operand->getQualifier())
2585		{
2586		case EvqTemporary:           return temporaryRegister(operand);
2587		case EvqGlobal:              return temporaryRegister(operand);
2588		case EvqConstExpr:           return temporaryRegister(operand);   // Unevaluated constant expression
2589		case EvqAttribute:           return attributeRegister(operand);
2590		case EvqVaryingIn:           return varyingRegister(operand);
2591		case EvqVaryingOut:          return varyingRegister(operand);
2592		case EvqVertexIn:            return attributeRegister(operand);
2593		case EvqFragmentOut:         return fragmentOutputRegister(operand);
2594		case EvqVertexOut:           return varyingRegister(operand);
2595		case EvqFragmentIn:          return varyingRegister(operand);
2596		case EvqInvariantVaryingIn:  return varyingRegister(operand);
2597		case EvqInvariantVaryingOut: return varyingRegister(operand);
2598		case EvqSmooth:              return varyingRegister(operand);
2599		case EvqFlat:                return varyingRegister(operand);
2600		case EvqCentroidOut:         return varyingRegister(operand);
2601		case EvqSmoothIn:            return varyingRegister(operand);
2602		case EvqFlatIn:              return varyingRegister(operand);
2603		case EvqCentroidIn:          return varyingRegister(operand);
2604		case EvqUniform:             return uniformRegister(operand);
2605		case EvqIn:                  return temporaryRegister(operand);
2606		case EvqOut:                 return temporaryRegister(operand);
2607		case EvqInOut:               return temporaryRegister(operand);
2608		case EvqConstReadOnly:       return temporaryRegister(operand);
2609		case EvqPosition:            return varyingRegister(operand);
2610		case EvqPointSize:           return varyingRegister(operand);
2611		case EvqInstanceID:          vertexShader->declareInstanceId(); return sw::Shader::InstanceIDIndex;
2612		case EvqVertexID:            vertexShader->declareVertexId(); return sw::Shader::VertexIDIndex;
2613		case EvqFragCoord:           pixelShader->declareVPos();  return sw::Shader::VPosIndex;
2614		case EvqFrontFacing:         pixelShader->declareVFace(); return sw::Shader::VFaceIndex;
2615		case EvqPointCoord:          return varyingRegister(operand);
2616		case EvqFragColor:           return 0;
2617		case EvqFragData:            return fragmentOutputRegister(operand);
2618		case EvqFragDepth:           return 0;
2619		default: UNREACHABLE(operand->getQualifier());
2620		}
2621
2622		return 0;
2623	}
2624
2625	int OutputASM::writeMask(TIntermTyped *destination, int index)
2626	{
2627		if(destination->getQualifier() == EvqPointSize)
2628		{
2629			return 0x2;   // Point size stored in the y component
2630		}
2631
2632		return 0xF >> (4 - registerSize(destination->getType(), index));
2633	}
2634
2635	int OutputASM::readSwizzle(TIntermTyped *argument, int size)
2636	{
2637		if(argument->getQualifier() == EvqPointSize)
2638		{
2639			return 0x55;   // Point size stored in the y component
2640		}
2641
2642		static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw
2643
2644		return swizzleSize[size];
2645	}
2646
2647	// Conservatively checks whether an expression is fast to compute and has no side effects
2648	bool OutputASM::trivial(TIntermTyped *expression, int budget)
2649	{
2650		if(!expression->isRegister())
2651		{
2652			return false;
2653		}
2654
2655		return cost(expression, budget) >= 0;
2656	}
2657
2658	// Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)
2659	int OutputASM::cost(TIntermNode *expression, int budget)
2660	{
2661		if(budget < 0)
2662		{
2663			return budget;
2664		}
2665
2666		if(expression->getAsSymbolNode())
2667		{
2668			return budget;
2669		}
2670		else if(expression->getAsConstantUnion())
2671		{
2672			return budget;
2673		}
2674		else if(expression->getAsBinaryNode())
2675		{
2676			TIntermBinary *binary = expression->getAsBinaryNode();
2677
2678			switch(binary->getOp())
2679			{
2680			case EOpVectorSwizzle:
2681			case EOpIndexDirect:
2682			case EOpIndexDirectStruct:
2683			case EOpIndexDirectInterfaceBlock:
2684				return cost(binary->getLeft(), budget - 0);
2685			case EOpAdd:
2686			case EOpSub:
2687			case EOpMul:
2688				return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));
2689			default:
2690				return -1;
2691			}
2692		}
2693		else if(expression->getAsUnaryNode())
2694		{
2695			TIntermUnary *unary = expression->getAsUnaryNode();
2696
2697			switch(unary->getOp())
2698			{
2699			case EOpAbs:
2700			case EOpNegative:
2701				return cost(unary->getOperand(), budget - 1);
2702			default:
2703				return -1;
2704			}
2705		}
2706		else if(expression->getAsSelectionNode())
2707		{
2708			TIntermSelection *selection = expression->getAsSelectionNode();
2709
2710			if(selection->usesTernaryOperator())
2711			{
2712				TIntermTyped *condition = selection->getCondition();
2713				TIntermNode *trueBlock = selection->getTrueBlock();
2714				TIntermNode *falseBlock = selection->getFalseBlock();
2715				TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();
2716
2717				if(constantCondition)
2718				{
2719					bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();
2720
2721					if(trueCondition)
2722					{
2723						return cost(trueBlock, budget - 0);
2724					}
2725					else
2726					{
2727						return cost(falseBlock, budget - 0);
2728					}
2729				}
2730				else
2731				{
2732					return cost(trueBlock, cost(falseBlock, budget - 2));
2733				}
2734			}
2735		}
2736
2737		return -1;
2738	}
2739
2740	const Function *OutputASM::findFunction(const TString &name)
2741	{
2742		for(unsigned int f = 0; f < functionArray.size(); f++)
2743		{
2744			if(functionArray[f].name == name)
2745			{
2746				return &functionArray[f];
2747			}
2748		}
2749
2750		return 0;
2751	}
2752
2753	int OutputASM::temporaryRegister(TIntermTyped *temporary)
2754	{
2755		return allocate(temporaries, temporary);
2756	}
2757
2758	int OutputASM::varyingRegister(TIntermTyped *varying)
2759	{
2760		int var = lookup(varyings, varying);
2761
2762		if(var == -1)
2763		{
2764			var = allocate(varyings, varying);
2765			int componentCount = varying->registerSize();
2766			int registerCount = varying->totalRegisterCount();
2767
2768			if(pixelShader)
2769			{
2770				if((var + registerCount) > sw::MAX_FRAGMENT_INPUTS)
2771				{
2772					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");
2773					return 0;
2774				}
2775
2776				if(varying->getQualifier() == EvqPointCoord)
2777				{
2778					ASSERT(varying->isRegister());
2779					pixelShader->setInput(var, componentCount, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var));
2780				}
2781				else
2782				{
2783					for(int i = 0; i < varying->totalRegisterCount(); i++)
2784					{
2785						bool flat = hasFlatQualifier(varying);
2786
2787						pixelShader->setInput(var + i, componentCount, sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i, flat));
2788					}
2789				}
2790			}
2791			else if(vertexShader)
2792			{
2793				if((var + registerCount) > sw::MAX_VERTEX_OUTPUTS)
2794				{
2795					mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");
2796					return 0;
2797				}
2798
2799				if(varying->getQualifier() == EvqPosition)
2800				{
2801					ASSERT(varying->isRegister());
2802					vertexShader->setPositionRegister(var);
2803				}
2804				else if(varying->getQualifier() == EvqPointSize)
2805				{
2806					ASSERT(varying->isRegister());
2807					vertexShader->setPointSizeRegister(var);
2808				}
2809				else
2810				{
2811					// Semantic indexes for user varyings will be assigned during program link to match the pixel shader
2812				}
2813			}
2814			else UNREACHABLE(0);
2815
2816			declareVarying(varying, var);
2817		}
2818
2819		return var;
2820	}
2821
2822	void OutputASM::declareVarying(TIntermTyped *varying, int reg)
2823	{
2824		if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking
2825		{
2826			const TType &type = varying->getType();
2827			const char *name = varying->getAsSymbolNode()->getSymbol().c_str();
2828			VaryingList &activeVaryings = shaderObject->varyings;
2829
2830			// Check if this varying has been declared before without having a register assigned
2831			for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)
2832			{
2833				if(v->name == name)
2834				{
2835					if(reg >= 0)
2836					{
2837						ASSERT(v->reg < 0 || v->reg == reg);
2838						v->reg = reg;
2839					}
2840
2841					return;
2842				}
2843			}
2844
2845			activeVaryings.push_back(glsl::Varying(glVariableType(type), name, varying->getArraySize(), reg, 0));
2846		}
2847	}
2848
2849	int OutputASM::uniformRegister(TIntermTyped *uniform)
2850	{
2851		const TType &type = uniform->getType();
2852		ASSERT(!IsSampler(type.getBasicType()));
2853		TInterfaceBlock *block = type.getAsInterfaceBlock();
2854		TIntermSymbol *symbol = uniform->getAsSymbolNode();
2855		ASSERT(symbol || block);
2856
2857		if(symbol || block)
2858		{
2859			TInterfaceBlock* parentBlock = type.getInterfaceBlock();
2860			bool isBlockMember = (!block && parentBlock);
2861			int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform);
2862
2863			if(index == -1 || isBlockMember)
2864			{
2865				if(index == -1)
2866				{
2867					index = allocate(uniforms, uniform);
2868				}
2869
2870				// Verify if the current uniform is a member of an already declared block
2871				const TString &name = symbol ? symbol->getSymbol() : block->name();
2872				int blockMemberIndex = blockMemberLookup(type, name, index);
2873				if(blockMemberIndex == -1)
2874				{
2875					declareUniform(type, name, index);
2876				}
2877				else
2878				{
2879					index = blockMemberIndex;
2880				}
2881			}
2882
2883			return index;
2884		}
2885
2886		return 0;
2887	}
2888
2889	int OutputASM::attributeRegister(TIntermTyped *attribute)
2890	{
2891		ASSERT(!attribute->isArray());
2892
2893		int index = lookup(attributes, attribute);
2894
2895		if(index == -1)
2896		{
2897			TIntermSymbol *symbol = attribute->getAsSymbolNode();
2898			ASSERT(symbol);
2899
2900			if(symbol)
2901			{
2902				index = allocate(attributes, attribute);
2903				const TType &type = attribute->getType();
2904				int registerCount = attribute->totalRegisterCount();
2905				sw::VertexShader::AttribType attribType = sw::VertexShader::ATTRIBTYPE_FLOAT;
2906				switch(type.getBasicType())
2907				{
2908				case EbtInt:
2909					attribType = sw::VertexShader::ATTRIBTYPE_INT;
2910					break;
2911				case EbtUInt:
2912					attribType = sw::VertexShader::ATTRIBTYPE_UINT;
2913					break;
2914				case EbtFloat:
2915				default:
2916					break;
2917				}
2918
2919				if(vertexShader && (index + registerCount) <= sw::MAX_VERTEX_INPUTS)
2920				{
2921					for(int i = 0; i < registerCount; i++)
2922					{
2923						vertexShader->setInput(index + i, sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i, false), attribType);
2924					}
2925				}
2926
2927				ActiveAttributes &activeAttributes = shaderObject->activeAttributes;
2928
2929				const char *name = symbol->getSymbol().c_str();
2930				activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));
2931			}
2932		}
2933
2934		return index;
2935	}
2936
2937	int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput)
2938	{
2939		return allocate(fragmentOutputs, fragmentOutput);
2940	}
2941
2942	int OutputASM::samplerRegister(TIntermTyped *sampler)
2943	{
2944		const TType &type = sampler->getType();
2945		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
2946
2947		TIntermSymbol *symbol = sampler->getAsSymbolNode();
2948		TIntermBinary *binary = sampler->getAsBinaryNode();
2949
2950		if(symbol)
2951		{
2952			switch(type.getQualifier())
2953			{
2954			case EvqUniform:
2955				return samplerRegister(symbol);
2956			case EvqIn:
2957			case EvqConstReadOnly:
2958				// Function arguments are not (uniform) sampler registers
2959				return -1;
2960			default:
2961				UNREACHABLE(type.getQualifier());
2962			}
2963		}
2964		else if(binary)
2965		{
2966			TIntermTyped *left = binary->getLeft();
2967			TIntermTyped *right = binary->getRight();
2968			const TType &leftType = left->getType();
2969			int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0;
2970			int offset = 0;
2971
2972			switch(binary->getOp())
2973			{
2974			case EOpIndexDirect:
2975				ASSERT(left->isArray());
2976				offset = index * leftType.elementRegisterCount();
2977				break;
2978			case EOpIndexDirectStruct:
2979				ASSERT(leftType.isStruct());
2980				{
2981					const TFieldList &fields = leftType.getStruct()->fields();
2982
2983					for(int i = 0; i < index; i++)
2984					{
2985						offset += fields[i]->type()->totalRegisterCount();
2986					}
2987				}
2988				break;
2989			case EOpIndexIndirect:               // Indirect indexing produces a temporary, not a sampler register
2990				return -1;
2991			case EOpIndexDirectInterfaceBlock:   // Interface blocks can't contain samplers
2992			default:
2993				UNREACHABLE(binary->getOp());
2994				return -1;
2995			}
2996
2997			int base = samplerRegister(left);
2998
2999			if(base < 0)
3000			{
3001				return -1;
3002			}
3003
3004			return base + offset;
3005		}
3006
3007		UNREACHABLE(0);
3008		return -1;   // Not a (uniform) sampler register
3009	}
3010
3011	int OutputASM::samplerRegister(TIntermSymbol *sampler)
3012	{
3013		const TType &type = sampler->getType();
3014		ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers
3015
3016		int index = lookup(samplers, sampler);
3017
3018		if(index == -1)
3019		{
3020			index = allocate(samplers, sampler);
3021
3022			if(sampler->getQualifier() == EvqUniform)
3023			{
3024				const char *name = sampler->getSymbol().c_str();
3025				declareUniform(type, name, index);
3026			}
3027		}
3028
3029		return index;
3030	}
3031
3032	bool OutputASM::isSamplerRegister(TIntermTyped *operand)
3033	{
3034		return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0;
3035	}
3036
3037	int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)
3038	{
3039		for(unsigned int i = 0; i < list.size(); i++)
3040		{
3041			if(list[i] == variable)
3042			{
3043				return i;   // Pointer match
3044			}
3045		}
3046
3047		TIntermSymbol *varSymbol = variable->getAsSymbolNode();
3048		TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();
3049
3050		if(varBlock)
3051		{
3052			for(unsigned int i = 0; i < list.size(); i++)
3053			{
3054				if(list[i])
3055				{
3056					TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();
3057
3058					if(listBlock)
3059					{
3060						if(listBlock->name() == varBlock->name())
3061						{
3062							ASSERT(listBlock->arraySize() == varBlock->arraySize());
3063							ASSERT(listBlock->fields() == varBlock->fields());
3064							ASSERT(listBlock->blockStorage() == varBlock->blockStorage());
3065							ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());
3066
3067							return i;
3068						}
3069					}
3070				}
3071			}
3072		}
3073		else if(varSymbol)
3074		{
3075			for(unsigned int i = 0; i < list.size(); i++)
3076			{
3077				if(list[i])
3078				{
3079					TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();
3080
3081					if(listSymbol)
3082					{
3083						if(listSymbol->getId() == varSymbol->getId())
3084						{
3085							ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());
3086							ASSERT(listSymbol->getType() == varSymbol->getType());
3087							ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());
3088
3089							return i;
3090						}
3091					}
3092				}
3093			}
3094		}
3095
3096		return -1;
3097	}
3098
3099	int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block)
3100	{
3101		for(unsigned int i = 0; i < list.size(); i++)
3102		{
3103			if(list[i] && (list[i]->getType().getInterfaceBlock() == block))
3104			{
3105				return i;   // Pointer match
3106			}
3107		}
3108		return -1;
3109	}
3110
3111	int OutputASM::allocate(VariableArray &list, TIntermTyped *variable)
3112	{
3113		int index = lookup(list, variable);
3114
3115		if(index == -1)
3116		{
3117			unsigned int registerCount = variable->blockRegisterCount();
3118
3119			for(unsigned int i = 0; i < list.size(); i++)
3120			{
3121				if(list[i] == 0)
3122				{
3123					unsigned int j = 1;
3124					for( ; j < registerCount && (i + j) < list.size(); j++)
3125					{
3126						if(list[i + j] != 0)
3127						{
3128							break;
3129						}
3130					}
3131
3132					if(j == registerCount)   // Found free slots
3133					{
3134						for(unsigned int j = 0; j < registerCount; j++)
3135						{
3136							list[i + j] = variable;
3137						}
3138
3139						return i;
3140					}
3141				}
3142			}
3143
3144			index = list.size();
3145
3146			for(unsigned int i = 0; i < registerCount; i++)
3147			{
3148				list.push_back(variable);
3149			}
3150		}
3151
3152		return index;
3153	}
3154
3155	void OutputASM::free(VariableArray &list, TIntermTyped *variable)
3156	{
3157		int index = lookup(list, variable);
3158
3159		if(index >= 0)
3160		{
3161			list[index] = 0;
3162		}
3163	}
3164
3165	int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex)
3166	{
3167		const TInterfaceBlock *block = type.getInterfaceBlock();
3168
3169		if(block)
3170		{
3171			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3172			const TFieldList& fields = block->fields();
3173			const TString &blockName = block->name();
3174			int fieldRegisterIndex = registerIndex;
3175
3176			if(!type.isInterfaceBlock())
3177			{
3178				// This is a uniform that's part of a block, let's see if the block is already defined
3179				for(size_t i = 0; i < activeUniformBlocks.size(); ++i)
3180				{
3181					if(activeUniformBlocks[i].name == blockName.c_str())
3182					{
3183						// The block is already defined, find the register for the current uniform and return it
3184						for(size_t j = 0; j < fields.size(); j++)
3185						{
3186							const TString &fieldName = fields[j]->name();
3187							if(fieldName == name)
3188							{
3189								return fieldRegisterIndex;
3190							}
3191
3192							fieldRegisterIndex += fields[j]->type()->totalRegisterCount();
3193						}
3194
3195						ASSERT(false);
3196						return fieldRegisterIndex;
3197					}
3198				}
3199			}
3200		}
3201
3202		return -1;
3203	}
3204
3205	void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, int blockId, BlockLayoutEncoder* encoder)
3206	{
3207		const TStructure *structure = type.getStruct();
3208		const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;
3209
3210		if(!structure && !block)
3211		{
3212			ActiveUniforms &activeUniforms = shaderObject->activeUniforms;
3213			const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo();
3214			if(blockId >= 0)
3215			{
3216				blockDefinitions[blockId][registerIndex] = TypedMemberInfo(blockInfo, type);
3217				shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());
3218			}
3219			int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex;
3220			activeUniforms.push_back(Uniform(glVariableType(type), glVariablePrecision(type), name.c_str(), type.getArraySize(),
3221			                                 fieldRegisterIndex, blockId, blockInfo));
3222			if(IsSampler(type.getBasicType()))
3223			{
3224				for(int i = 0; i < type.totalRegisterCount(); i++)
3225				{
3226					shader->declareSampler(fieldRegisterIndex + i);
3227				}
3228			}
3229		}
3230		else if(block)
3231		{
3232			ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;
3233			const TFieldList& fields = block->fields();
3234			const TString &blockName = block->name();
3235			int fieldRegisterIndex = registerIndex;
3236			bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1);
3237
3238			blockId = activeUniformBlocks.size();
3239			bool isRowMajor = block->matrixPacking() == EmpRowMajor;
3240			activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(),
3241			                                           block->blockStorage(), isRowMajor, registerIndex, blockId));
3242			blockDefinitions.push_back(BlockDefinitionIndexMap());
3243
3244			Std140BlockEncoder currentBlockEncoder(isRowMajor);
3245			currentBlockEncoder.enterAggregateType();
3246			for(size_t i = 0; i < fields.size(); i++)
3247			{
3248				const TType &fieldType = *(fields[i]->type());
3249				const TString &fieldName = fields[i]->name();
3250				if(isUniformBlockMember && (fieldName == name))
3251				{
3252					registerIndex = fieldRegisterIndex;
3253				}
3254
3255				const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName;
3256
3257				declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, &currentBlockEncoder);
3258				fieldRegisterIndex += fieldType.totalRegisterCount();
3259			}
3260			currentBlockEncoder.exitAggregateType();
3261			activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize();
3262		}
3263		else
3264		{
3265			int fieldRegisterIndex = registerIndex;
3266
3267			const TFieldList& fields = structure->fields();
3268			if(type.isArray() && (structure || type.isInterfaceBlock()))
3269			{
3270				for(int i = 0; i < type.getArraySize(); i++)
3271				{
3272					if(encoder)
3273					{
3274						encoder->enterAggregateType();
3275					}
3276					for(size_t j = 0; j < fields.size(); j++)
3277					{
3278						const TType &fieldType = *(fields[j]->type());
3279						const TString &fieldName = fields[j]->name();
3280						const TString uniformName = name + "[" + str(i) + "]." + fieldName;
3281
3282						declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);
3283						fieldRegisterIndex += fieldType.totalRegisterCount();
3284					}
3285					if(encoder)
3286					{
3287						encoder->exitAggregateType();
3288					}
3289				}
3290			}
3291			else
3292			{
3293				if(encoder)
3294				{
3295					encoder->enterAggregateType();
3296				}
3297				for(size_t i = 0; i < fields.size(); i++)
3298				{
3299					const TType &fieldType = *(fields[i]->type());
3300					const TString &fieldName = fields[i]->name();
3301					const TString uniformName = name + "." + fieldName;
3302
3303					declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);
3304					fieldRegisterIndex += fieldType.totalRegisterCount();
3305				}
3306				if(encoder)
3307				{
3308					encoder->exitAggregateType();
3309				}
3310			}
3311		}
3312	}
3313
3314	GLenum OutputASM::glVariableType(const TType &type)
3315	{
3316		switch(type.getBasicType())
3317		{
3318		case EbtFloat:
3319			if(type.isScalar())
3320			{
3321				return GL_FLOAT;
3322			}
3323			else if(type.isVector())
3324			{
3325				switch(type.getNominalSize())
3326				{
3327				case 2: return GL_FLOAT_VEC2;
3328				case 3: return GL_FLOAT_VEC3;
3329				case 4: return GL_FLOAT_VEC4;
3330				default: UNREACHABLE(type.getNominalSize());
3331				}
3332			}
3333			else if(type.isMatrix())
3334			{
3335				switch(type.getNominalSize())
3336				{
3337				case 2:
3338					switch(type.getSecondarySize())
3339					{
3340					case 2: return GL_FLOAT_MAT2;
3341					case 3: return GL_FLOAT_MAT2x3;
3342					case 4: return GL_FLOAT_MAT2x4;
3343					default: UNREACHABLE(type.getSecondarySize());
3344					}
3345				case 3:
3346					switch(type.getSecondarySize())
3347					{
3348					case 2: return GL_FLOAT_MAT3x2;
3349					case 3: return GL_FLOAT_MAT3;
3350					case 4: return GL_FLOAT_MAT3x4;
3351					default: UNREACHABLE(type.getSecondarySize());
3352					}
3353				case 4:
3354					switch(type.getSecondarySize())
3355					{
3356					case 2: return GL_FLOAT_MAT4x2;
3357					case 3: return GL_FLOAT_MAT4x3;
3358					case 4: return GL_FLOAT_MAT4;
3359					default: UNREACHABLE(type.getSecondarySize());
3360					}
3361				default: UNREACHABLE(type.getNominalSize());
3362				}
3363			}
3364			else UNREACHABLE(0);
3365			break;
3366		case EbtInt:
3367			if(type.isScalar())
3368			{
3369				return GL_INT;
3370			}
3371			else if(type.isVector())
3372			{
3373				switch(type.getNominalSize())
3374				{
3375				case 2: return GL_INT_VEC2;
3376				case 3: return GL_INT_VEC3;
3377				case 4: return GL_INT_VEC4;
3378				default: UNREACHABLE(type.getNominalSize());
3379				}
3380			}
3381			else UNREACHABLE(0);
3382			break;
3383		case EbtUInt:
3384			if(type.isScalar())
3385			{
3386				return GL_UNSIGNED_INT;
3387			}
3388			else if(type.isVector())
3389			{
3390				switch(type.getNominalSize())
3391				{
3392				case 2: return GL_UNSIGNED_INT_VEC2;
3393				case 3: return GL_UNSIGNED_INT_VEC3;
3394				case 4: return GL_UNSIGNED_INT_VEC4;
3395				default: UNREACHABLE(type.getNominalSize());
3396				}
3397			}
3398			else UNREACHABLE(0);
3399			break;
3400		case EbtBool:
3401			if(type.isScalar())
3402			{
3403				return GL_BOOL;
3404			}
3405			else if(type.isVector())
3406			{
3407				switch(type.getNominalSize())
3408				{
3409				case 2: return GL_BOOL_VEC2;
3410				case 3: return GL_BOOL_VEC3;
3411				case 4: return GL_BOOL_VEC4;
3412				default: UNREACHABLE(type.getNominalSize());
3413				}
3414			}
3415			else UNREACHABLE(0);
3416			break;
3417		case EbtSampler2D:
3418			return GL_SAMPLER_2D;
3419		case EbtISampler2D:
3420			return GL_INT_SAMPLER_2D;
3421		case EbtUSampler2D:
3422			return GL_UNSIGNED_INT_SAMPLER_2D;
3423		case EbtSamplerCube:
3424			return GL_SAMPLER_CUBE;
3425		case EbtISamplerCube:
3426			return GL_INT_SAMPLER_CUBE;
3427		case EbtUSamplerCube:
3428			return GL_UNSIGNED_INT_SAMPLER_CUBE;
3429		case EbtSamplerExternalOES:
3430			return GL_SAMPLER_EXTERNAL_OES;
3431		case EbtSampler3D:
3432			return GL_SAMPLER_3D_OES;
3433		case EbtISampler3D:
3434			return GL_INT_SAMPLER_3D;
3435		case EbtUSampler3D:
3436			return GL_UNSIGNED_INT_SAMPLER_3D;
3437		case EbtSampler2DArray:
3438			return GL_SAMPLER_2D_ARRAY;
3439		case EbtISampler2DArray:
3440			return GL_INT_SAMPLER_2D_ARRAY;
3441		case EbtUSampler2DArray:
3442			return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;
3443		case EbtSampler2DShadow:
3444			return GL_SAMPLER_2D_SHADOW;
3445		case EbtSamplerCubeShadow:
3446			return GL_SAMPLER_CUBE_SHADOW;
3447		case EbtSampler2DArrayShadow:
3448			return GL_SAMPLER_2D_ARRAY_SHADOW;
3449		default:
3450			UNREACHABLE(type.getBasicType());
3451			break;
3452		}
3453
3454		return GL_NONE;
3455	}
3456
3457	GLenum OutputASM::glVariablePrecision(const TType &type)
3458	{
3459		if(type.getBasicType() == EbtFloat)
3460		{
3461			switch(type.getPrecision())
3462			{
3463			case EbpHigh:   return GL_HIGH_FLOAT;
3464			case EbpMedium: return GL_MEDIUM_FLOAT;
3465			case EbpLow:    return GL_LOW_FLOAT;
3466			case EbpUndefined:
3467				// Should be defined as the default precision by the parser
3468			default: UNREACHABLE(type.getPrecision());
3469			}
3470		}
3471		else if(type.getBasicType() == EbtInt)
3472		{
3473			switch(type.getPrecision())
3474			{
3475			case EbpHigh:   return GL_HIGH_INT;
3476			case EbpMedium: return GL_MEDIUM_INT;
3477			case EbpLow:    return GL_LOW_INT;
3478			case EbpUndefined:
3479				// Should be defined as the default precision by the parser
3480			default: UNREACHABLE(type.getPrecision());
3481			}
3482		}
3483
3484		// Other types (boolean, sampler) don't have a precision
3485		return GL_NONE;
3486	}
3487
3488	int OutputASM::dim(TIntermNode *v)
3489	{
3490		TIntermTyped *vector = v->getAsTyped();
3491		ASSERT(vector && vector->isRegister());
3492		return vector->getNominalSize();
3493	}
3494
3495	int OutputASM::dim2(TIntermNode *m)
3496	{
3497		TIntermTyped *matrix = m->getAsTyped();
3498		ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());
3499		return matrix->getSecondarySize();
3500	}
3501
3502	// Returns ~0u if no loop count could be determined
3503	unsigned int OutputASM::loopCount(TIntermLoop *node)
3504	{
3505		// Parse loops of the form:
3506		// for(int index = initial; index [comparator] limit; index += increment)
3507		TIntermSymbol *index = 0;
3508		TOperator comparator = EOpNull;
3509		int initial = 0;
3510		int limit = 0;
3511		int increment = 0;
3512
3513		// Parse index name and intial value
3514		if(node->getInit())
3515		{
3516			TIntermAggregate *init = node->getInit()->getAsAggregate();
3517
3518			if(init)
3519			{
3520				TIntermSequence &sequence = init->getSequence();
3521				TIntermTyped *variable = sequence[0]->getAsTyped();
3522
3523				if(variable && variable->getQualifier() == EvqTemporary && variable->getBasicType() == EbtInt)
3524				{
3525					TIntermBinary *assign = variable->getAsBinaryNode();
3526
3527					if(assign && assign->getOp() == EOpInitialize)
3528					{
3529						TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();
3530						TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();
3531
3532						if(symbol && constant)
3533						{
3534							if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3535							{
3536								index = symbol;
3537								initial = constant->getUnionArrayPointer()[0].getIConst();
3538							}
3539						}
3540					}
3541				}
3542			}
3543		}
3544
3545		// Parse comparator and limit value
3546		if(index && node->getCondition())
3547		{
3548			TIntermBinary *test = node->getCondition()->getAsBinaryNode();
3549			TIntermSymbol *left = test ? test->getLeft()->getAsSymbolNode() : nullptr;
3550
3551			if(left && (left->getId() == index->getId()))
3552			{
3553				TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();
3554
3555				if(constant)
3556				{
3557					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3558					{
3559						comparator = test->getOp();
3560						limit = constant->getUnionArrayPointer()[0].getIConst();
3561					}
3562				}
3563			}
3564		}
3565
3566		// Parse increment
3567		if(index && comparator != EOpNull && node->getExpression())
3568		{
3569			TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();
3570			TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();
3571
3572			if(binaryTerminal)
3573			{
3574				TOperator op = binaryTerminal->getOp();
3575				TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();
3576
3577				if(constant)
3578				{
3579					if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)
3580					{
3581						int value = constant->getUnionArrayPointer()[0].getIConst();
3582
3583						switch(op)
3584						{
3585						case EOpAddAssign: increment = value;  break;
3586						case EOpSubAssign: increment = -value; break;
3587						default: UNIMPLEMENTED();
3588						}
3589					}
3590				}
3591			}
3592			else if(unaryTerminal)
3593			{
3594				TOperator op = unaryTerminal->getOp();
3595
3596				switch(op)
3597				{
3598				case EOpPostIncrement: increment = 1;  break;
3599				case EOpPostDecrement: increment = -1; break;
3600				case EOpPreIncrement:  increment = 1;  break;
3601				case EOpPreDecrement:  increment = -1; break;
3602				default: UNIMPLEMENTED();
3603				}
3604			}
3605		}
3606
3607		if(index && comparator != EOpNull && increment != 0)
3608		{
3609			if(comparator == EOpLessThanEqual)
3610			{
3611				comparator = EOpLessThan;
3612				limit += 1;
3613			}
3614			else if(comparator == EOpGreaterThanEqual)
3615			{
3616				comparator = EOpLessThan;
3617				limit -= 1;
3618				std::swap(initial, limit);
3619				increment = -increment;
3620			}
3621			else if(comparator == EOpGreaterThan)
3622			{
3623				comparator = EOpLessThan;
3624				std::swap(initial, limit);
3625				increment = -increment;
3626			}
3627
3628			if(comparator == EOpLessThan)
3629			{
3630				if(!(initial < limit))   // Never loops
3631				{
3632					return 0;
3633				}
3634
3635				int iterations = (limit - initial + abs(increment) - 1) / increment;   // Ceiling division
3636
3637				if(iterations < 0)
3638				{
3639					return ~0u;
3640				}
3641
3642				return iterations;
3643			}
3644			else UNIMPLEMENTED();   // Falls through
3645		}
3646
3647		return ~0u;
3648	}
3649
3650	bool LoopUnrollable::traverse(TIntermNode *node)
3651	{
3652		loopDepth = 0;
3653		loopUnrollable = true;
3654
3655		node->traverse(this);
3656
3657		return loopUnrollable;
3658	}
3659
3660	bool LoopUnrollable::visitLoop(Visit visit, TIntermLoop *loop)
3661	{
3662		if(visit == PreVisit)
3663		{
3664			loopDepth++;
3665		}
3666		else if(visit == PostVisit)
3667		{
3668			loopDepth++;
3669		}
3670
3671		return true;
3672	}
3673
3674	bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node)
3675	{
3676		if(!loopUnrollable)
3677		{
3678			return false;
3679		}
3680
3681		if(!loopDepth)
3682		{
3683			return true;
3684		}
3685
3686		switch(node->getFlowOp())
3687		{
3688		case EOpKill:
3689		case EOpReturn:
3690			break;
3691		case EOpBreak:
3692		case EOpContinue:
3693			loopUnrollable = false;
3694			break;
3695		default: UNREACHABLE(node->getFlowOp());
3696		}
3697
3698		return loopUnrollable;
3699	}
3700
3701	bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node)
3702	{
3703		return loopUnrollable;
3704	}
3705}
3706