vktSpvAsmInstructionTests.cpp revision e6f673d93852261aa351ccbdac04e2a410ca1b10
1/*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief SPIR-V Assembly Tests for Instructions (special opcode/operand)
22 *//*--------------------------------------------------------------------*/
23
24#include "vktSpvAsmInstructionTests.hpp"
25
26#include "tcuCommandLine.hpp"
27#include "tcuFormatUtil.hpp"
28#include "tcuRGBA.hpp"
29#include "tcuStringTemplate.hpp"
30#include "tcuTestLog.hpp"
31#include "tcuVectorUtil.hpp"
32
33#include "vkDefs.hpp"
34#include "vkDeviceUtil.hpp"
35#include "vkMemUtil.hpp"
36#include "vkPlatform.hpp"
37#include "vkPrograms.hpp"
38#include "vkQueryUtil.hpp"
39#include "vkRef.hpp"
40#include "vkRefUtil.hpp"
41#include "vkStrUtil.hpp"
42#include "vkTypeUtil.hpp"
43
44#include "deRandom.hpp"
45#include "deStringUtil.hpp"
46#include "deUniquePtr.hpp"
47#include "tcuStringTemplate.hpp"
48
49#include <cmath>
50#include "vktSpvAsmComputeShaderCase.hpp"
51#include "vktSpvAsmComputeShaderTestUtil.hpp"
52#include "vktTestCaseUtil.hpp"
53
54#include <cmath>
55#include <limits>
56#include <map>
57#include <string>
58#include <sstream>
59
60namespace vkt
61{
62namespace SpirVAssembly
63{
64
65namespace
66{
67
68using namespace vk;
69using std::map;
70using std::string;
71using std::vector;
72using tcu::IVec3;
73using tcu::IVec4;
74using tcu::RGBA;
75using tcu::TestLog;
76using tcu::TestStatus;
77using tcu::Vec4;
78using de::UniquePtr;
79using tcu::StringTemplate;
80using tcu::Vec4;
81
82typedef Unique<VkShaderModule>			ModuleHandleUp;
83typedef de::SharedPtr<ModuleHandleUp>	ModuleHandleSp;
84
85template<typename T>	T			randomScalar	(de::Random& rnd, T minValue, T maxValue);
86template<> inline		float		randomScalar	(de::Random& rnd, float minValue, float maxValue)		{ return rnd.getFloat(minValue, maxValue);	}
87template<> inline		deInt32		randomScalar	(de::Random& rnd, deInt32 minValue, deInt32 maxValue)	{ return rnd.getInt(minValue, maxValue);	}
88
89template<typename T>
90static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, int offset = 0)
91{
92	T* const typedPtr = (T*)dst;
93	for (int ndx = 0; ndx < numValues; ndx++)
94		typedPtr[offset + ndx] = randomScalar<T>(rnd, minValue, maxValue);
95}
96
97static void floorAll (vector<float>& values)
98{
99	for (size_t i = 0; i < values.size(); i++)
100		values[i] = deFloatFloor(values[i]);
101}
102
103static void floorAll (vector<Vec4>& values)
104{
105	for (size_t i = 0; i < values.size(); i++)
106		values[i] = floor(values[i]);
107}
108
109struct CaseParameter
110{
111	const char*		name;
112	string			param;
113
114	CaseParameter	(const char* case_, const string& param_) : name(case_), param(param_) {}
115};
116
117// Assembly code used for testing OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
118//
119// #version 430
120//
121// layout(std140, set = 0, binding = 0) readonly buffer Input {
122//   float elements[];
123// } input_data;
124// layout(std140, set = 0, binding = 1) writeonly buffer Output {
125//   float elements[];
126// } output_data;
127//
128// layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
129//
130// void main() {
131//   uint x = gl_GlobalInvocationID.x;
132//   output_data.elements[x] = -input_data.elements[x];
133// }
134
135static const char* const s_ShaderPreamble =
136	"OpCapability Shader\n"
137	"OpMemoryModel Logical GLSL450\n"
138	"OpEntryPoint GLCompute %main \"main\" %id\n"
139	"OpExecutionMode %main LocalSize 1 1 1\n";
140
141static const char* const s_CommonTypes =
142	"%bool      = OpTypeBool\n"
143	"%void      = OpTypeVoid\n"
144	"%voidf     = OpTypeFunction %void\n"
145	"%u32       = OpTypeInt 32 0\n"
146	"%i32       = OpTypeInt 32 1\n"
147	"%f32       = OpTypeFloat 32\n"
148	"%uvec3     = OpTypeVector %u32 3\n"
149	"%fvec3     = OpTypeVector %f32 3\n"
150	"%uvec3ptr  = OpTypePointer Input %uvec3\n"
151	"%f32ptr    = OpTypePointer Uniform %f32\n"
152	"%f32arr    = OpTypeRuntimeArray %f32\n";
153
154// Declares two uniform variables (indata, outdata) of type "struct { float[] }". Depends on type "f32arr" (for "float[]").
155static const char* const s_InputOutputBuffer =
156	"%buf     = OpTypeStruct %f32arr\n"
157	"%bufptr  = OpTypePointer Uniform %buf\n"
158	"%indata    = OpVariable %bufptr Uniform\n"
159	"%outdata   = OpVariable %bufptr Uniform\n";
160
161// Declares buffer type and layout for uniform variables indata and outdata. Both of them are SSBO bounded to descriptor set 0.
162// indata is at binding point 0, while outdata is at 1.
163static const char* const s_InputOutputBufferTraits =
164	"OpDecorate %buf BufferBlock\n"
165	"OpDecorate %indata DescriptorSet 0\n"
166	"OpDecorate %indata Binding 0\n"
167	"OpDecorate %outdata DescriptorSet 0\n"
168	"OpDecorate %outdata Binding 1\n"
169	"OpDecorate %f32arr ArrayStride 4\n"
170	"OpMemberDecorate %buf 0 Offset 0\n";
171
172tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
173{
174	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnop", "Test the OpNop instruction"));
175	ComputeShaderSpec				spec;
176	de::Random						rnd				(deStringHash(group->getName()));
177	const int						numElements		= 100;
178	vector<float>					positiveFloats	(numElements, 0);
179	vector<float>					negativeFloats	(numElements, 0);
180
181	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
182
183	for (size_t ndx = 0; ndx < numElements; ++ndx)
184		negativeFloats[ndx] = -positiveFloats[ndx];
185
186	spec.assembly =
187		string(s_ShaderPreamble) +
188
189		"OpSource GLSL 430\n"
190		"OpName %main           \"main\"\n"
191		"OpName %id             \"gl_GlobalInvocationID\"\n"
192
193		"OpDecorate %id BuiltIn GlobalInvocationId\n"
194
195		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes)
196
197		+ string(s_InputOutputBuffer) +
198
199		"%id        = OpVariable %uvec3ptr Input\n"
200		"%zero      = OpConstant %i32 0\n"
201
202		"%main      = OpFunction %void None %voidf\n"
203		"%label     = OpLabel\n"
204		"%idval     = OpLoad %uvec3 %id\n"
205		"%x         = OpCompositeExtract %u32 %idval 0\n"
206
207		"             OpNop\n" // Inside a function body
208
209		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
210		"%inval     = OpLoad %f32 %inloc\n"
211		"%neg       = OpFNegate %f32 %inval\n"
212		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
213		"             OpStore %outloc %neg\n"
214		"             OpReturn\n"
215		"             OpFunctionEnd\n";
216	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
217	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
218	spec.numWorkGroups = IVec3(numElements, 1, 1);
219
220	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNop appearing at different places", spec));
221
222	return group.release();
223}
224
225tcu::TestCaseGroup* createOpLineGroup (tcu::TestContext& testCtx)
226{
227	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opline", "Test the OpLine instruction"));
228	ComputeShaderSpec				spec;
229	de::Random						rnd				(deStringHash(group->getName()));
230	const int						numElements		= 100;
231	vector<float>					positiveFloats	(numElements, 0);
232	vector<float>					negativeFloats	(numElements, 0);
233
234	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
235
236	for (size_t ndx = 0; ndx < numElements; ++ndx)
237		negativeFloats[ndx] = -positiveFloats[ndx];
238
239	spec.assembly =
240		string(s_ShaderPreamble) +
241
242		"%fname1 = OpString \"negateInputs.comp\"\n"
243		"%fname2 = OpString \"negateInputs\"\n"
244
245		"OpSource GLSL 430\n"
246		"OpName %main           \"main\"\n"
247		"OpName %id             \"gl_GlobalInvocationID\"\n"
248
249		"OpDecorate %id BuiltIn GlobalInvocationId\n"
250
251		+ string(s_InputOutputBufferTraits) +
252
253		"OpLine %fname1 0 0\n" // At the earliest possible position
254
255		+ string(s_CommonTypes) + string(s_InputOutputBuffer) +
256
257		"OpLine %fname1 0 1\n" // Multiple OpLines in sequence
258		"OpLine %fname2 1 0\n" // Different filenames
259		"OpLine %fname1 1000 100000\n"
260
261		"%id        = OpVariable %uvec3ptr Input\n"
262		"%zero      = OpConstant %i32 0\n"
263
264		"OpLine %fname1 1 1\n" // Before a function
265
266		"%main      = OpFunction %void None %voidf\n"
267		"%label     = OpLabel\n"
268
269		"OpLine %fname1 1 1\n" // In a function
270
271		"%idval     = OpLoad %uvec3 %id\n"
272		"%x         = OpCompositeExtract %u32 %idval 0\n"
273		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
274		"%inval     = OpLoad %f32 %inloc\n"
275		"%neg       = OpFNegate %f32 %inval\n"
276		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
277		"             OpStore %outloc %neg\n"
278		"             OpReturn\n"
279		"             OpFunctionEnd\n";
280	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
281	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
282	spec.numWorkGroups = IVec3(numElements, 1, 1);
283
284	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpLine appearing at different places", spec));
285
286	return group.release();
287}
288
289tcu::TestCaseGroup* createOpNoLineGroup (tcu::TestContext& testCtx)
290{
291	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnoline", "Test the OpNoLine instruction"));
292	ComputeShaderSpec				spec;
293	de::Random						rnd				(deStringHash(group->getName()));
294	const int						numElements		= 100;
295	vector<float>					positiveFloats	(numElements, 0);
296	vector<float>					negativeFloats	(numElements, 0);
297
298	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
299
300	for (size_t ndx = 0; ndx < numElements; ++ndx)
301		negativeFloats[ndx] = -positiveFloats[ndx];
302
303	spec.assembly =
304		string(s_ShaderPreamble) +
305
306		"%fname = OpString \"negateInputs.comp\"\n"
307
308		"OpSource GLSL 430\n"
309		"OpName %main           \"main\"\n"
310		"OpName %id             \"gl_GlobalInvocationID\"\n"
311
312		"OpDecorate %id BuiltIn GlobalInvocationId\n"
313
314		+ string(s_InputOutputBufferTraits) +
315
316		"OpNoLine\n" // At the earliest possible position, without preceding OpLine
317
318		+ string(s_CommonTypes) + string(s_InputOutputBuffer) +
319
320		"OpLine %fname 0 1\n"
321		"OpNoLine\n" // Immediately following a preceding OpLine
322
323		"OpLine %fname 1000 1\n"
324
325		"%id        = OpVariable %uvec3ptr Input\n"
326		"%zero      = OpConstant %i32 0\n"
327
328		"OpNoLine\n" // Contents after the previous OpLine
329
330		"%main      = OpFunction %void None %voidf\n"
331		"%label     = OpLabel\n"
332		"%idval     = OpLoad %uvec3 %id\n"
333		"%x         = OpCompositeExtract %u32 %idval 0\n"
334
335		"OpNoLine\n" // Multiple OpNoLine
336		"OpNoLine\n"
337		"OpNoLine\n"
338
339		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
340		"%inval     = OpLoad %f32 %inloc\n"
341		"%neg       = OpFNegate %f32 %inval\n"
342		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
343		"             OpStore %outloc %neg\n"
344		"             OpReturn\n"
345		"             OpFunctionEnd\n";
346	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
347	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
348	spec.numWorkGroups = IVec3(numElements, 1, 1);
349
350	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNoLine appearing at different places", spec));
351
352	return group.release();
353}
354
355// Compare instruction for the contraction compute case.
356// Returns true if the output is what is expected from the test case.
357bool compareNoContractCase(const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs)
358{
359	if (outputAllocs.size() != 1)
360		return false;
361
362	// We really just need this for size because we are not comparing the exact values.
363	const BufferSp&	expectedOutput	= expectedOutputs[0];
364	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());;
365
366	for(size_t i = 0; i < expectedOutput->getNumBytes() / sizeof(float); ++i) {
367		if (outputAsFloat[i] != 0.f &&
368			outputAsFloat[i] != -ldexp(1, -24)) {
369			return false;
370		}
371	}
372
373	return true;
374}
375
376tcu::TestCaseGroup* createNoContractionGroup (tcu::TestContext& testCtx)
377{
378	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
379	vector<CaseParameter>			cases;
380	const int						numElements		= 100;
381	vector<float>					inputFloats1	(numElements, 0);
382	vector<float>					inputFloats2	(numElements, 0);
383	vector<float>					outputFloats	(numElements, 0);
384	const StringTemplate			shaderTemplate	(
385		string(s_ShaderPreamble) +
386
387		"OpName %main           \"main\"\n"
388		"OpName %id             \"gl_GlobalInvocationID\"\n"
389
390		"OpDecorate %id BuiltIn GlobalInvocationId\n"
391
392		"${DECORATION}\n"
393
394		"OpDecorate %buf BufferBlock\n"
395		"OpDecorate %indata1 DescriptorSet 0\n"
396		"OpDecorate %indata1 Binding 0\n"
397		"OpDecorate %indata2 DescriptorSet 0\n"
398		"OpDecorate %indata2 Binding 1\n"
399		"OpDecorate %outdata DescriptorSet 0\n"
400		"OpDecorate %outdata Binding 2\n"
401		"OpDecorate %f32arr ArrayStride 4\n"
402		"OpMemberDecorate %buf 0 Offset 0\n"
403
404		+ string(s_CommonTypes) +
405
406		"%buf        = OpTypeStruct %f32arr\n"
407		"%bufptr     = OpTypePointer Uniform %buf\n"
408		"%indata1    = OpVariable %bufptr Uniform\n"
409		"%indata2    = OpVariable %bufptr Uniform\n"
410		"%outdata    = OpVariable %bufptr Uniform\n"
411
412		"%id         = OpVariable %uvec3ptr Input\n"
413		"%zero       = OpConstant %i32 0\n"
414		"%c_f_m1     = OpConstant %f32 -1.\n"
415
416		"%main       = OpFunction %void None %voidf\n"
417		"%label      = OpLabel\n"
418		"%idval      = OpLoad %uvec3 %id\n"
419		"%x          = OpCompositeExtract %u32 %idval 0\n"
420		"%inloc1     = OpAccessChain %f32ptr %indata1 %zero %x\n"
421		"%inval1     = OpLoad %f32 %inloc1\n"
422		"%inloc2     = OpAccessChain %f32ptr %indata2 %zero %x\n"
423		"%inval2     = OpLoad %f32 %inloc2\n"
424		"%mul        = OpFMul %f32 %inval1 %inval2\n"
425		"%add        = OpFAdd %f32 %mul %c_f_m1\n"
426		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
427		"              OpStore %outloc %add\n"
428		"              OpReturn\n"
429		"              OpFunctionEnd\n");
430
431	cases.push_back(CaseParameter("multiplication",	"OpDecorate %mul NoContraction"));
432	cases.push_back(CaseParameter("addition",		"OpDecorate %add NoContraction"));
433	cases.push_back(CaseParameter("both",			"OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"));
434
435	for (size_t ndx = 0; ndx < numElements; ++ndx)
436	{
437		inputFloats1[ndx]	= 1.f + std::ldexp(1.f, -23); // 1 + 2^-23.
438		inputFloats2[ndx]	= 1.f - std::ldexp(1.f, -23); // 1 - 2^-23.
439		// Result for (1 + 2^-23) * (1 - 2^-23) - 1. With NoContraction, the multiplication will be
440		// conducted separately and the result is rounded to 1, or 0x1.fffffcp-1
441		// So the final result will be 0.f or 0x1p-24.
442		// If the operation is combined into a precise fused multiply-add, then the result would be
443		// 2^-46 (0xa8800000).
444		outputFloats[ndx]	= 0.f;
445	}
446
447	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
448	{
449		map<string, string>		specializations;
450		ComputeShaderSpec		spec;
451
452		specializations["DECORATION"] = cases[caseNdx].param;
453		spec.assembly = shaderTemplate.specialize(specializations);
454		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
455		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
456		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
457		spec.numWorkGroups = IVec3(numElements, 1, 1);
458		// Check against the two possible answers based on rounding mode.
459		spec.verifyIO = &compareNoContractCase;
460
461		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
462	}
463	return group.release();
464}
465
466bool compareFRem(const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs)
467{
468	if (outputAllocs.size() != 1)
469		return false;
470
471	const BufferSp& expectedOutput = expectedOutputs[0];
472	const float *expectedOutputAsFloat = static_cast<const float*>(expectedOutput->data());
473	const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
474
475	for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
476	{
477		const float f0 = expectedOutputAsFloat[idx];
478		const float f1 = outputAsFloat[idx];
479		// \todo relative error needs to be fairly high because FRem may be implemented as
480		// (roughly) frac(a/b)*b, so LSB errors can be magnified. But this should be fine for now.
481		if (deFloatAbs((f1 - f0) / f0) > 0.02)
482			return false;
483	}
484
485	return true;
486}
487
488tcu::TestCaseGroup* createOpFRemGroup (tcu::TestContext& testCtx)
489{
490	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opfrem", "Test the OpFRem instruction"));
491	ComputeShaderSpec				spec;
492	de::Random						rnd				(deStringHash(group->getName()));
493	const int						numElements		= 200;
494	vector<float>					inputFloats1	(numElements, 0);
495	vector<float>					inputFloats2	(numElements, 0);
496	vector<float>					outputFloats	(numElements, 0);
497
498	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
499	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats2[0], numElements);
500
501	for (size_t ndx = 0; ndx < numElements; ++ndx)
502	{
503		// Guard against divisors near zero.
504		if (std::fabs(inputFloats2[ndx]) < 1e-3)
505			inputFloats2[ndx] = 8.f;
506
507		// The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
508		outputFloats[ndx] = std::fmod(inputFloats1[ndx], inputFloats2[ndx]);
509	}
510
511	spec.assembly =
512		string(s_ShaderPreamble) +
513
514		"OpName %main           \"main\"\n"
515		"OpName %id             \"gl_GlobalInvocationID\"\n"
516
517		"OpDecorate %id BuiltIn GlobalInvocationId\n"
518
519		"OpDecorate %buf BufferBlock\n"
520		"OpDecorate %indata1 DescriptorSet 0\n"
521		"OpDecorate %indata1 Binding 0\n"
522		"OpDecorate %indata2 DescriptorSet 0\n"
523		"OpDecorate %indata2 Binding 1\n"
524		"OpDecorate %outdata DescriptorSet 0\n"
525		"OpDecorate %outdata Binding 2\n"
526		"OpDecorate %f32arr ArrayStride 4\n"
527		"OpMemberDecorate %buf 0 Offset 0\n"
528
529		+ string(s_CommonTypes) +
530
531		"%buf        = OpTypeStruct %f32arr\n"
532		"%bufptr     = OpTypePointer Uniform %buf\n"
533		"%indata1    = OpVariable %bufptr Uniform\n"
534		"%indata2    = OpVariable %bufptr Uniform\n"
535		"%outdata    = OpVariable %bufptr Uniform\n"
536
537		"%id        = OpVariable %uvec3ptr Input\n"
538		"%zero      = OpConstant %i32 0\n"
539
540		"%main      = OpFunction %void None %voidf\n"
541		"%label     = OpLabel\n"
542		"%idval     = OpLoad %uvec3 %id\n"
543		"%x         = OpCompositeExtract %u32 %idval 0\n"
544		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
545		"%inval1    = OpLoad %f32 %inloc1\n"
546		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
547		"%inval2    = OpLoad %f32 %inloc2\n"
548		"%rem       = OpFRem %f32 %inval1 %inval2\n"
549		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
550		"             OpStore %outloc %rem\n"
551		"             OpReturn\n"
552		"             OpFunctionEnd\n";
553
554	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
555	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
556	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
557	spec.numWorkGroups = IVec3(numElements, 1, 1);
558	spec.verifyIO = &compareFRem;
559
560	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
561
562	return group.release();
563}
564
565// Copy contents in the input buffer to the output buffer.
566tcu::TestCaseGroup* createOpCopyMemoryGroup (tcu::TestContext& testCtx)
567{
568	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opcopymemory", "Test the OpCopyMemory instruction"));
569	de::Random						rnd				(deStringHash(group->getName()));
570	const int						numElements		= 100;
571
572	// The following case adds vec4(0., 0.5, 1.5, 2.5) to each of the elements in the input buffer and writes output to the output buffer.
573	ComputeShaderSpec				spec1;
574	vector<Vec4>					inputFloats1	(numElements);
575	vector<Vec4>					outputFloats1	(numElements);
576
577	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats1[0], numElements * 4);
578
579	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
580	floorAll(inputFloats1);
581
582	for (size_t ndx = 0; ndx < numElements; ++ndx)
583		outputFloats1[ndx] = inputFloats1[ndx] + Vec4(0.f, 0.5f, 1.5f, 2.5f);
584
585	spec1.assembly =
586		string(s_ShaderPreamble) +
587
588		"OpName %main           \"main\"\n"
589		"OpName %id             \"gl_GlobalInvocationID\"\n"
590
591		"OpDecorate %id BuiltIn GlobalInvocationId\n"
592		"OpDecorate %vec4arr ArrayStride 16\n"
593
594		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
595
596		"%vec4       = OpTypeVector %f32 4\n"
597		"%vec4ptr_u  = OpTypePointer Uniform %vec4\n"
598		"%vec4ptr_f  = OpTypePointer Function %vec4\n"
599		"%vec4arr    = OpTypeRuntimeArray %vec4\n"
600		"%buf        = OpTypeStruct %vec4arr\n"
601		"%bufptr     = OpTypePointer Uniform %buf\n"
602		"%indata     = OpVariable %bufptr Uniform\n"
603		"%outdata    = OpVariable %bufptr Uniform\n"
604
605		"%id         = OpVariable %uvec3ptr Input\n"
606		"%zero       = OpConstant %i32 0\n"
607		"%c_f_0      = OpConstant %f32 0.\n"
608		"%c_f_0_5    = OpConstant %f32 0.5\n"
609		"%c_f_1_5    = OpConstant %f32 1.5\n"
610		"%c_f_2_5    = OpConstant %f32 2.5\n"
611		"%c_vec4     = OpConstantComposite %vec4 %c_f_0 %c_f_0_5 %c_f_1_5 %c_f_2_5\n"
612
613		"%main       = OpFunction %void None %voidf\n"
614		"%label      = OpLabel\n"
615		"%v_vec4     = OpVariable %vec4ptr_f Function\n"
616		"%idval      = OpLoad %uvec3 %id\n"
617		"%x          = OpCompositeExtract %u32 %idval 0\n"
618		"%inloc      = OpAccessChain %vec4ptr_u %indata %zero %x\n"
619		"%outloc     = OpAccessChain %vec4ptr_u %outdata %zero %x\n"
620		"              OpCopyMemory %v_vec4 %inloc\n"
621		"%v_vec4_val = OpLoad %vec4 %v_vec4\n"
622		"%add        = OpFAdd %vec4 %v_vec4_val %c_vec4\n"
623		"              OpStore %outloc %add\n"
624		"              OpReturn\n"
625		"              OpFunctionEnd\n";
626
627	spec1.inputs.push_back(BufferSp(new Vec4Buffer(inputFloats1)));
628	spec1.outputs.push_back(BufferSp(new Vec4Buffer(outputFloats1)));
629	spec1.numWorkGroups = IVec3(numElements, 1, 1);
630
631	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector", "OpCopyMemory elements of vector type", spec1));
632
633	// The following case copies a float[100] variable from the input buffer to the output buffer.
634	ComputeShaderSpec				spec2;
635	vector<float>					inputFloats2	(numElements);
636	vector<float>					outputFloats2	(numElements);
637
638	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats2[0], numElements);
639
640	for (size_t ndx = 0; ndx < numElements; ++ndx)
641		outputFloats2[ndx] = inputFloats2[ndx];
642
643	spec2.assembly =
644		string(s_ShaderPreamble) +
645
646		"OpName %main           \"main\"\n"
647		"OpName %id             \"gl_GlobalInvocationID\"\n"
648
649		"OpDecorate %id BuiltIn GlobalInvocationId\n"
650		"OpDecorate %f32arr100 ArrayStride 4\n"
651
652		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
653
654		"%hundred        = OpConstant %u32 100\n"
655		"%f32arr100      = OpTypeArray %f32 %hundred\n"
656		"%f32arr100ptr_f = OpTypePointer Function %f32arr100\n"
657		"%f32arr100ptr_u = OpTypePointer Uniform %f32arr100\n"
658		"%buf            = OpTypeStruct %f32arr100\n"
659		"%bufptr         = OpTypePointer Uniform %buf\n"
660		"%indata         = OpVariable %bufptr Uniform\n"
661		"%outdata        = OpVariable %bufptr Uniform\n"
662
663		"%id             = OpVariable %uvec3ptr Input\n"
664		"%zero           = OpConstant %i32 0\n"
665
666		"%main           = OpFunction %void None %voidf\n"
667		"%label          = OpLabel\n"
668		"%var            = OpVariable %f32arr100ptr_f Function\n"
669		"%inarr          = OpAccessChain %f32arr100ptr_u %indata %zero\n"
670		"%outarr         = OpAccessChain %f32arr100ptr_u %outdata %zero\n"
671		"                  OpCopyMemory %var %inarr\n"
672		"                  OpCopyMemory %outarr %var\n"
673		"                  OpReturn\n"
674		"                  OpFunctionEnd\n";
675
676	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
677	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
678	spec2.numWorkGroups = IVec3(1, 1, 1);
679
680	group->addChild(new SpvAsmComputeShaderCase(testCtx, "array", "OpCopyMemory elements of array type", spec2));
681
682	// The following case copies a struct{vec4, vec4, vec4, vec4} variable from the input buffer to the output buffer.
683	ComputeShaderSpec				spec3;
684	vector<float>					inputFloats3	(16);
685	vector<float>					outputFloats3	(16);
686
687	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats3[0], 16);
688
689	for (size_t ndx = 0; ndx < 16; ++ndx)
690		outputFloats3[ndx] = inputFloats3[ndx];
691
692	spec3.assembly =
693		string(s_ShaderPreamble) +
694
695		"OpName %main           \"main\"\n"
696		"OpName %id             \"gl_GlobalInvocationID\"\n"
697
698		"OpDecorate %id BuiltIn GlobalInvocationId\n"
699		"OpMemberDecorate %buf 0 Offset 0\n"
700		"OpMemberDecorate %buf 1 Offset 16\n"
701		"OpMemberDecorate %buf 2 Offset 32\n"
702		"OpMemberDecorate %buf 3 Offset 48\n"
703
704		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
705
706		"%vec4      = OpTypeVector %f32 4\n"
707		"%buf       = OpTypeStruct %vec4 %vec4 %vec4 %vec4\n"
708		"%bufptr    = OpTypePointer Uniform %buf\n"
709		"%indata    = OpVariable %bufptr Uniform\n"
710		"%outdata   = OpVariable %bufptr Uniform\n"
711		"%vec4stptr = OpTypePointer Function %buf\n"
712
713		"%id        = OpVariable %uvec3ptr Input\n"
714		"%zero      = OpConstant %i32 0\n"
715
716		"%main      = OpFunction %void None %voidf\n"
717		"%label     = OpLabel\n"
718		"%var       = OpVariable %vec4stptr Function\n"
719		"             OpCopyMemory %var %indata\n"
720		"             OpCopyMemory %outdata %var\n"
721		"             OpReturn\n"
722		"             OpFunctionEnd\n";
723
724	spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
725	spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
726	spec3.numWorkGroups = IVec3(1, 1, 1);
727
728	group->addChild(new SpvAsmComputeShaderCase(testCtx, "struct", "OpCopyMemory elements of struct type", spec3));
729
730	// The following case negates multiple float variables from the input buffer and stores the results to the output buffer.
731	ComputeShaderSpec				spec4;
732	vector<float>					inputFloats4	(numElements);
733	vector<float>					outputFloats4	(numElements);
734
735	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats4[0], numElements);
736
737	for (size_t ndx = 0; ndx < numElements; ++ndx)
738		outputFloats4[ndx] = -inputFloats4[ndx];
739
740	spec4.assembly =
741		string(s_ShaderPreamble) +
742
743		"OpName %main           \"main\"\n"
744		"OpName %id             \"gl_GlobalInvocationID\"\n"
745
746		"OpDecorate %id BuiltIn GlobalInvocationId\n"
747
748		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
749
750		"%f32ptr_f  = OpTypePointer Function %f32\n"
751		"%id        = OpVariable %uvec3ptr Input\n"
752		"%zero      = OpConstant %i32 0\n"
753
754		"%main      = OpFunction %void None %voidf\n"
755		"%label     = OpLabel\n"
756		"%var       = OpVariable %f32ptr_f Function\n"
757		"%idval     = OpLoad %uvec3 %id\n"
758		"%x         = OpCompositeExtract %u32 %idval 0\n"
759		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
760		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
761		"             OpCopyMemory %var %inloc\n"
762		"%val       = OpLoad %f32 %var\n"
763		"%neg       = OpFNegate %f32 %val\n"
764		"             OpStore %outloc %neg\n"
765		"             OpReturn\n"
766		"             OpFunctionEnd\n";
767
768	spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
769	spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
770	spec4.numWorkGroups = IVec3(numElements, 1, 1);
771
772	group->addChild(new SpvAsmComputeShaderCase(testCtx, "float", "OpCopyMemory elements of float type", spec4));
773
774	return group.release();
775}
776
777tcu::TestCaseGroup* createOpCopyObjectGroup (tcu::TestContext& testCtx)
778{
779	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opcopyobject", "Test the OpCopyObject instruction"));
780	ComputeShaderSpec				spec;
781	de::Random						rnd				(deStringHash(group->getName()));
782	const int						numElements		= 100;
783	vector<float>					inputFloats		(numElements, 0);
784	vector<float>					outputFloats	(numElements, 0);
785
786	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
787
788	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
789	floorAll(inputFloats);
790
791	for (size_t ndx = 0; ndx < numElements; ++ndx)
792		outputFloats[ndx] = inputFloats[ndx] + 7.5f;
793
794	spec.assembly =
795		string(s_ShaderPreamble) +
796
797		"OpName %main           \"main\"\n"
798		"OpName %id             \"gl_GlobalInvocationID\"\n"
799
800		"OpDecorate %id BuiltIn GlobalInvocationId\n"
801
802		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
803
804		"%fmat     = OpTypeMatrix %fvec3 3\n"
805		"%three    = OpConstant %u32 3\n"
806		"%farr     = OpTypeArray %f32 %three\n"
807		"%fst      = OpTypeStruct %f32 %f32\n"
808
809		+ string(s_InputOutputBuffer) +
810
811		"%id            = OpVariable %uvec3ptr Input\n"
812		"%zero          = OpConstant %i32 0\n"
813		"%c_f           = OpConstant %f32 1.5\n"
814		"%c_fvec3       = OpConstantComposite %fvec3 %c_f %c_f %c_f\n"
815		"%c_fmat        = OpConstantComposite %fmat %c_fvec3 %c_fvec3 %c_fvec3\n"
816		"%c_farr        = OpConstantComposite %farr %c_f %c_f %c_f\n"
817		"%c_fst         = OpConstantComposite %fst %c_f %c_f\n"
818
819		"%main          = OpFunction %void None %voidf\n"
820		"%label         = OpLabel\n"
821		"%c_f_copy      = OpCopyObject %f32   %c_f\n"
822		"%c_fvec3_copy  = OpCopyObject %fvec3 %c_fvec3\n"
823		"%c_fmat_copy   = OpCopyObject %fmat  %c_fmat\n"
824		"%c_farr_copy   = OpCopyObject %farr  %c_farr\n"
825		"%c_fst_copy    = OpCopyObject %fst   %c_fst\n"
826		"%fvec3_elem    = OpCompositeExtract %f32 %c_fvec3_copy 0\n"
827		"%fmat_elem     = OpCompositeExtract %f32 %c_fmat_copy 1 2\n"
828		"%farr_elem     = OpCompositeExtract %f32 %c_farr_copy 2\n"
829		"%fst_elem      = OpCompositeExtract %f32 %c_fst_copy 1\n"
830		// Add up. 1.5 * 5 = 7.5.
831		"%add1          = OpFAdd %f32 %c_f_copy %fvec3_elem\n"
832		"%add2          = OpFAdd %f32 %add1     %fmat_elem\n"
833		"%add3          = OpFAdd %f32 %add2     %farr_elem\n"
834		"%add4          = OpFAdd %f32 %add3     %fst_elem\n"
835
836		"%idval         = OpLoad %uvec3 %id\n"
837		"%x             = OpCompositeExtract %u32 %idval 0\n"
838		"%inloc         = OpAccessChain %f32ptr %indata %zero %x\n"
839		"%outloc        = OpAccessChain %f32ptr %outdata %zero %x\n"
840		"%inval         = OpLoad %f32 %inloc\n"
841		"%add           = OpFAdd %f32 %add4 %inval\n"
842		"                 OpStore %outloc %add\n"
843		"                 OpReturn\n"
844		"                 OpFunctionEnd\n";
845	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
846	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
847	spec.numWorkGroups = IVec3(numElements, 1, 1);
848
849	group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "OpCopyObject on different types", spec));
850
851	return group.release();
852}
853// Assembly code used for testing OpUnreachable is based on GLSL source code:
854//
855// #version 430
856//
857// layout(std140, set = 0, binding = 0) readonly buffer Input {
858//   float elements[];
859// } input_data;
860// layout(std140, set = 0, binding = 1) writeonly buffer Output {
861//   float elements[];
862// } output_data;
863//
864// void not_called_func() {
865//   // place OpUnreachable here
866// }
867//
868// uint modulo4(uint val) {
869//   switch (val % uint(4)) {
870//     case 0:  return 3;
871//     case 1:  return 2;
872//     case 2:  return 1;
873//     case 3:  return 0;
874//     default: return 100; // place OpUnreachable here
875//   }
876// }
877//
878// uint const5() {
879//   return 5;
880//   // place OpUnreachable here
881// }
882//
883// void main() {
884//   uint x = gl_GlobalInvocationID.x;
885//   if (const5() > modulo4(1000)) {
886//     output_data.elements[x] = -input_data.elements[x];
887//   } else {
888//     // place OpUnreachable here
889//     output_data.elements[x] = input_data.elements[x];
890//   }
891// }
892
893tcu::TestCaseGroup* createOpUnreachableGroup (tcu::TestContext& testCtx)
894{
895	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opunreachable", "Test the OpUnreachable instruction"));
896	ComputeShaderSpec				spec;
897	de::Random						rnd				(deStringHash(group->getName()));
898	const int						numElements		= 100;
899	vector<float>					positiveFloats	(numElements, 0);
900	vector<float>					negativeFloats	(numElements, 0);
901
902	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
903
904	for (size_t ndx = 0; ndx < numElements; ++ndx)
905		negativeFloats[ndx] = -positiveFloats[ndx];
906
907	spec.assembly =
908		string(s_ShaderPreamble) +
909
910		"OpSource GLSL 430\n"
911		"OpName %main            \"main\"\n"
912		"OpName %func_not_called_func \"not_called_func(\"\n"
913		"OpName %func_modulo4         \"modulo4(u1;\"\n"
914		"OpName %func_const5          \"const5(\"\n"
915		"OpName %id                   \"gl_GlobalInvocationID\"\n"
916
917		"OpDecorate %id BuiltIn GlobalInvocationId\n"
918
919		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
920
921		"%u32ptr    = OpTypePointer Function %u32\n"
922		"%uintfuint = OpTypeFunction %u32 %u32ptr\n"
923		"%unitf     = OpTypeFunction %u32\n"
924
925		"%id        = OpVariable %uvec3ptr Input\n"
926		"%zero      = OpConstant %u32 0\n"
927		"%one       = OpConstant %u32 1\n"
928		"%two       = OpConstant %u32 2\n"
929		"%three     = OpConstant %u32 3\n"
930		"%four      = OpConstant %u32 4\n"
931		"%five      = OpConstant %u32 5\n"
932		"%hundred   = OpConstant %u32 100\n"
933		"%thousand  = OpConstant %u32 1000\n"
934
935		+ string(s_InputOutputBuffer) +
936
937		// Main()
938		"%main   = OpFunction %void None %voidf\n"
939		"%main_entry  = OpLabel\n"
940		"%v_thousand  = OpVariable %u32ptr Function %thousand\n"
941		"%idval       = OpLoad %uvec3 %id\n"
942		"%x           = OpCompositeExtract %u32 %idval 0\n"
943		"%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
944		"%inval       = OpLoad %f32 %inloc\n"
945		"%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
946		"%ret_const5  = OpFunctionCall %u32 %func_const5\n"
947		"%ret_modulo4 = OpFunctionCall %u32 %func_modulo4 %v_thousand\n"
948		"%cmp_gt      = OpUGreaterThan %bool %ret_const5 %ret_modulo4\n"
949		"               OpSelectionMerge %if_end None\n"
950		"               OpBranchConditional %cmp_gt %if_true %if_false\n"
951		"%if_true     = OpLabel\n"
952		"%negate      = OpFNegate %f32 %inval\n"
953		"               OpStore %outloc %negate\n"
954		"               OpBranch %if_end\n"
955		"%if_false    = OpLabel\n"
956		"               OpUnreachable\n" // Unreachable else branch for if statement
957		"%if_end      = OpLabel\n"
958		"               OpReturn\n"
959		"               OpFunctionEnd\n"
960
961		// not_called_function()
962		"%func_not_called_func  = OpFunction %void None %voidf\n"
963		"%not_called_func_entry = OpLabel\n"
964		"                         OpUnreachable\n" // Unreachable entry block in not called static function
965		"                         OpFunctionEnd\n"
966
967		// modulo4()
968		"%func_modulo4  = OpFunction %u32 None %uintfuint\n"
969		"%valptr        = OpFunctionParameter %u32ptr\n"
970		"%modulo4_entry = OpLabel\n"
971		"%val           = OpLoad %u32 %valptr\n"
972		"%modulo        = OpUMod %u32 %val %four\n"
973		"                 OpSelectionMerge %switch_merge None\n"
974		"                 OpSwitch %modulo %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
975		"%case0         = OpLabel\n"
976		"                 OpReturnValue %three\n"
977		"%case1         = OpLabel\n"
978		"                 OpReturnValue %two\n"
979		"%case2         = OpLabel\n"
980		"                 OpReturnValue %one\n"
981		"%case3         = OpLabel\n"
982		"                 OpReturnValue %zero\n"
983		"%default       = OpLabel\n"
984		"                 OpUnreachable\n" // Unreachable default case for switch statement
985		"%switch_merge  = OpLabel\n"
986		"                 OpUnreachable\n" // Unreachable merge block for switch statement
987		"                 OpFunctionEnd\n"
988
989		// const5()
990		"%func_const5  = OpFunction %u32 None %unitf\n"
991		"%const5_entry = OpLabel\n"
992		"                OpReturnValue %five\n"
993		"%unreachable  = OpLabel\n"
994		"                OpUnreachable\n" // Unreachable block in function
995		"                OpFunctionEnd\n";
996	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
997	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
998	spec.numWorkGroups = IVec3(numElements, 1, 1);
999
1000	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpUnreachable appearing at different places", spec));
1001
1002	return group.release();
1003}
1004
1005// Assembly code used for testing decoration group is based on GLSL source code:
1006//
1007// #version 430
1008//
1009// layout(std140, set = 0, binding = 0) readonly buffer Input0 {
1010//   float elements[];
1011// } input_data0;
1012// layout(std140, set = 0, binding = 1) readonly buffer Input1 {
1013//   float elements[];
1014// } input_data1;
1015// layout(std140, set = 0, binding = 2) readonly buffer Input2 {
1016//   float elements[];
1017// } input_data2;
1018// layout(std140, set = 0, binding = 3) readonly buffer Input3 {
1019//   float elements[];
1020// } input_data3;
1021// layout(std140, set = 0, binding = 4) readonly buffer Input4 {
1022//   float elements[];
1023// } input_data4;
1024// layout(std140, set = 0, binding = 5) writeonly buffer Output {
1025//   float elements[];
1026// } output_data;
1027//
1028// void main() {
1029//   uint x = gl_GlobalInvocationID.x;
1030//   output_data.elements[x] = input_data0.elements[x] + input_data1.elements[x] + input_data2.elements[x] + input_data3.elements[x] + input_data4.elements[x];
1031// }
1032tcu::TestCaseGroup* createDecorationGroupGroup (tcu::TestContext& testCtx)
1033{
1034	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "decoration_group", "Test the OpDecorationGroup & OpGroupDecorate instruction"));
1035	ComputeShaderSpec				spec;
1036	de::Random						rnd				(deStringHash(group->getName()));
1037	const int						numElements		= 100;
1038	vector<float>					inputFloats0	(numElements, 0);
1039	vector<float>					inputFloats1	(numElements, 0);
1040	vector<float>					inputFloats2	(numElements, 0);
1041	vector<float>					inputFloats3	(numElements, 0);
1042	vector<float>					inputFloats4	(numElements, 0);
1043	vector<float>					outputFloats	(numElements, 0);
1044
1045	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats0[0], numElements);
1046	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats1[0], numElements);
1047	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats2[0], numElements);
1048	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats3[0], numElements);
1049	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats4[0], numElements);
1050
1051	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
1052	floorAll(inputFloats0);
1053	floorAll(inputFloats1);
1054	floorAll(inputFloats2);
1055	floorAll(inputFloats3);
1056	floorAll(inputFloats4);
1057
1058	for (size_t ndx = 0; ndx < numElements; ++ndx)
1059		outputFloats[ndx] = inputFloats0[ndx] + inputFloats1[ndx] + inputFloats2[ndx] + inputFloats3[ndx] + inputFloats4[ndx];
1060
1061	spec.assembly =
1062		string(s_ShaderPreamble) +
1063
1064		"OpSource GLSL 430\n"
1065		"OpName %main \"main\"\n"
1066		"OpName %id \"gl_GlobalInvocationID\"\n"
1067
1068		// Not using group decoration on variable.
1069		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1070		// Not using group decoration on type.
1071		"OpDecorate %f32arr ArrayStride 4\n"
1072
1073		"OpDecorate %groups BufferBlock\n"
1074		"OpDecorate %groupm Offset 0\n"
1075		"%groups = OpDecorationGroup\n"
1076		"%groupm = OpDecorationGroup\n"
1077
1078		// Group decoration on multiple structs.
1079		"OpGroupDecorate %groups %outbuf %inbuf0 %inbuf1 %inbuf2 %inbuf3 %inbuf4\n"
1080		// Group decoration on multiple struct members.
1081		"OpGroupMemberDecorate %groupm %outbuf 0 %inbuf0 0 %inbuf1 0 %inbuf2 0 %inbuf3 0 %inbuf4 0\n"
1082
1083		"OpDecorate %group1 DescriptorSet 0\n"
1084		"OpDecorate %group3 DescriptorSet 0\n"
1085		"OpDecorate %group3 NonWritable\n"
1086		"OpDecorate %group3 Restrict\n"
1087		"%group0 = OpDecorationGroup\n"
1088		"%group1 = OpDecorationGroup\n"
1089		"%group3 = OpDecorationGroup\n"
1090
1091		// Applying the same decoration group multiple times.
1092		"OpGroupDecorate %group1 %outdata\n"
1093		"OpGroupDecorate %group1 %outdata\n"
1094		"OpGroupDecorate %group1 %outdata\n"
1095		"OpDecorate %outdata DescriptorSet 0\n"
1096		"OpDecorate %outdata Binding 5\n"
1097		// Applying decoration group containing nothing.
1098		"OpGroupDecorate %group0 %indata0\n"
1099		"OpDecorate %indata0 DescriptorSet 0\n"
1100		"OpDecorate %indata0 Binding 0\n"
1101		// Applying decoration group containing one decoration.
1102		"OpGroupDecorate %group1 %indata1\n"
1103		"OpDecorate %indata1 Binding 1\n"
1104		// Applying decoration group containing multiple decorations.
1105		"OpGroupDecorate %group3 %indata2 %indata3\n"
1106		"OpDecorate %indata2 Binding 2\n"
1107		"OpDecorate %indata3 Binding 3\n"
1108		// Applying multiple decoration groups (with overlapping).
1109		"OpGroupDecorate %group0 %indata4\n"
1110		"OpGroupDecorate %group1 %indata4\n"
1111		"OpGroupDecorate %group3 %indata4\n"
1112		"OpDecorate %indata4 Binding 4\n"
1113
1114		+ string(s_CommonTypes) +
1115
1116		"%id   = OpVariable %uvec3ptr Input\n"
1117		"%zero = OpConstant %i32 0\n"
1118
1119		"%outbuf    = OpTypeStruct %f32arr\n"
1120		"%outbufptr = OpTypePointer Uniform %outbuf\n"
1121		"%outdata   = OpVariable %outbufptr Uniform\n"
1122		"%inbuf0    = OpTypeStruct %f32arr\n"
1123		"%inbuf0ptr = OpTypePointer Uniform %inbuf0\n"
1124		"%indata0   = OpVariable %inbuf0ptr Uniform\n"
1125		"%inbuf1    = OpTypeStruct %f32arr\n"
1126		"%inbuf1ptr = OpTypePointer Uniform %inbuf1\n"
1127		"%indata1   = OpVariable %inbuf1ptr Uniform\n"
1128		"%inbuf2    = OpTypeStruct %f32arr\n"
1129		"%inbuf2ptr = OpTypePointer Uniform %inbuf2\n"
1130		"%indata2   = OpVariable %inbuf2ptr Uniform\n"
1131		"%inbuf3    = OpTypeStruct %f32arr\n"
1132		"%inbuf3ptr = OpTypePointer Uniform %inbuf3\n"
1133		"%indata3   = OpVariable %inbuf3ptr Uniform\n"
1134		"%inbuf4    = OpTypeStruct %f32arr\n"
1135		"%inbufptr  = OpTypePointer Uniform %inbuf4\n"
1136		"%indata4   = OpVariable %inbufptr Uniform\n"
1137
1138		"%main   = OpFunction %void None %voidf\n"
1139		"%label  = OpLabel\n"
1140		"%idval  = OpLoad %uvec3 %id\n"
1141		"%x      = OpCompositeExtract %u32 %idval 0\n"
1142		"%inloc0 = OpAccessChain %f32ptr %indata0 %zero %x\n"
1143		"%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
1144		"%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
1145		"%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
1146		"%inloc4 = OpAccessChain %f32ptr %indata4 %zero %x\n"
1147		"%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1148		"%inval0 = OpLoad %f32 %inloc0\n"
1149		"%inval1 = OpLoad %f32 %inloc1\n"
1150		"%inval2 = OpLoad %f32 %inloc2\n"
1151		"%inval3 = OpLoad %f32 %inloc3\n"
1152		"%inval4 = OpLoad %f32 %inloc4\n"
1153		"%add0   = OpFAdd %f32 %inval0 %inval1\n"
1154		"%add1   = OpFAdd %f32 %add0 %inval2\n"
1155		"%add2   = OpFAdd %f32 %add1 %inval3\n"
1156		"%add    = OpFAdd %f32 %add2 %inval4\n"
1157		"          OpStore %outloc %add\n"
1158		"          OpReturn\n"
1159		"          OpFunctionEnd\n";
1160	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats0)));
1161	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1162	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1163	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
1164	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
1165	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1166	spec.numWorkGroups = IVec3(numElements, 1, 1);
1167
1168	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "decoration group cases", spec));
1169
1170	return group.release();
1171}
1172
1173struct SpecConstantTwoIntCase
1174{
1175	const char*		caseName;
1176	const char*		scDefinition0;
1177	const char*		scDefinition1;
1178	const char*		scResultType;
1179	const char*		scOperation;
1180	deInt32			scActualValue0;
1181	deInt32			scActualValue1;
1182	const char*		resultOperation;
1183	vector<deInt32>	expectedOutput;
1184
1185					SpecConstantTwoIntCase (const char* name,
1186											const char* definition0,
1187											const char* definition1,
1188											const char* resultType,
1189											const char* operation,
1190											deInt32 value0,
1191											deInt32 value1,
1192											const char* resultOp,
1193											const vector<deInt32>& output)
1194						: caseName			(name)
1195						, scDefinition0		(definition0)
1196						, scDefinition1		(definition1)
1197						, scResultType		(resultType)
1198						, scOperation		(operation)
1199						, scActualValue0	(value0)
1200						, scActualValue1	(value1)
1201						, resultOperation	(resultOp)
1202						, expectedOutput	(output) {}
1203};
1204
1205tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
1206{
1207	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
1208	vector<SpecConstantTwoIntCase>	cases;
1209	de::Random						rnd				(deStringHash(group->getName()));
1210	const int						numElements		= 100;
1211	vector<deInt32>					inputInts		(numElements, 0);
1212	vector<deInt32>					outputInts1		(numElements, 0);
1213	vector<deInt32>					outputInts2		(numElements, 0);
1214	vector<deInt32>					outputInts3		(numElements, 0);
1215	vector<deInt32>					outputInts4		(numElements, 0);
1216	const StringTemplate			shaderTemplate	(
1217		string(s_ShaderPreamble) +
1218
1219		"OpName %main           \"main\"\n"
1220		"OpName %id             \"gl_GlobalInvocationID\"\n"
1221
1222		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1223		"OpDecorate %sc_0  SpecId 0\n"
1224		"OpDecorate %sc_1  SpecId 1\n"
1225		"OpDecorate %i32arr ArrayStride 4\n"
1226
1227		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
1228
1229		"%i32ptr    = OpTypePointer Uniform %i32\n"
1230		"%i32arr    = OpTypeRuntimeArray %i32\n"
1231		"%boolptr   = OpTypePointer Uniform %bool\n"
1232		"%boolarr   = OpTypeRuntimeArray %bool\n"
1233		"%buf     = OpTypeStruct %i32arr\n"
1234		"%bufptr  = OpTypePointer Uniform %buf\n"
1235		"%indata    = OpVariable %bufptr Uniform\n"
1236		"%outdata   = OpVariable %bufptr Uniform\n"
1237
1238		"%id        = OpVariable %uvec3ptr Input\n"
1239		"%zero      = OpConstant %i32 0\n"
1240
1241		"%sc_0      = OpSpecConstant${SC_DEF0}\n"
1242		"%sc_1      = OpSpecConstant${SC_DEF1}\n"
1243		"%sc_final  = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n"
1244
1245		"%main      = OpFunction %void None %voidf\n"
1246		"%label     = OpLabel\n"
1247		"%idval     = OpLoad %uvec3 %id\n"
1248		"%x         = OpCompositeExtract %u32 %idval 0\n"
1249		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
1250		"%inval     = OpLoad %i32 %inloc\n"
1251		"%final     = ${GEN_RESULT}\n"
1252		"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
1253		"             OpStore %outloc %final\n"
1254		"             OpReturn\n"
1255		"             OpFunctionEnd\n");
1256
1257	fillRandomScalars(rnd, -65536, 65536, &inputInts[0], numElements);
1258
1259	for (size_t ndx = 0; ndx < numElements; ++ndx)
1260	{
1261		outputInts1[ndx] = inputInts[ndx] + 42;
1262		outputInts2[ndx] = inputInts[ndx];
1263		outputInts3[ndx] = inputInts[ndx] - 11200;
1264		outputInts4[ndx] = inputInts[ndx] + 1;
1265	}
1266
1267	const char addScToInput[]		= "OpIAdd %i32 %inval %sc_final";
1268	const char selectTrueUsingSc[]	= "OpSelect %i32 %sc_final %inval %zero";
1269	const char selectFalseUsingSc[]	= "OpSelect %i32 %sc_final %zero %inval";
1270
1271	cases.push_back(SpecConstantTwoIntCase("iadd",					" %i32 0",		" %i32 0",		"%i32",		"IAdd                 %sc_0 %sc_1",			62,		-20,	addScToInput,		outputInts1));
1272	cases.push_back(SpecConstantTwoIntCase("isub",					" %i32 0",		" %i32 0",		"%i32",		"ISub                 %sc_0 %sc_1",			100,	58,		addScToInput,		outputInts1));
1273	cases.push_back(SpecConstantTwoIntCase("imul",					" %i32 0",		" %i32 0",		"%i32",		"IMul                 %sc_0 %sc_1",			-2,		-21,	addScToInput,		outputInts1));
1274	cases.push_back(SpecConstantTwoIntCase("sdiv",					" %i32 0",		" %i32 0",		"%i32",		"SDiv                 %sc_0 %sc_1",			-126,	-3,		addScToInput,		outputInts1));
1275	cases.push_back(SpecConstantTwoIntCase("udiv",					" %i32 0",		" %i32 0",		"%i32",		"UDiv                 %sc_0 %sc_1",			126,	3,		addScToInput,		outputInts1));
1276	cases.push_back(SpecConstantTwoIntCase("srem",					" %i32 0",		" %i32 0",		"%i32",		"SRem                 %sc_0 %sc_1",			7,		3,		addScToInput,		outputInts4));
1277	cases.push_back(SpecConstantTwoIntCase("smod",					" %i32 0",		" %i32 0",		"%i32",		"SMod                 %sc_0 %sc_1",			7,		3,		addScToInput,		outputInts4));
1278	cases.push_back(SpecConstantTwoIntCase("umod",					" %i32 0",		" %i32 0",		"%i32",		"UMod                 %sc_0 %sc_1",			342,	50,		addScToInput,		outputInts1));
1279	cases.push_back(SpecConstantTwoIntCase("bitwiseand",			" %i32 0",		" %i32 0",		"%i32",		"BitwiseAnd           %sc_0 %sc_1",			42,		63,		addScToInput,		outputInts1));
1280	cases.push_back(SpecConstantTwoIntCase("bitwiseor",				" %i32 0",		" %i32 0",		"%i32",		"BitwiseOr            %sc_0 %sc_1",			34,		8,		addScToInput,		outputInts1));
1281	cases.push_back(SpecConstantTwoIntCase("bitwisexor",			" %i32 0",		" %i32 0",		"%i32",		"BitwiseXor           %sc_0 %sc_1",			18,		56,		addScToInput,		outputInts1));
1282	cases.push_back(SpecConstantTwoIntCase("shiftrightlogical",		" %i32 0",		" %i32 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",			168,	2,		addScToInput,		outputInts1));
1283	cases.push_back(SpecConstantTwoIntCase("shiftrightarithmetic",	" %i32 0",		" %i32 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",			168,	2,		addScToInput,		outputInts1));
1284	cases.push_back(SpecConstantTwoIntCase("shiftleftlogical",		" %i32 0",		" %i32 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",			21,		1,		addScToInput,		outputInts1));
1285	cases.push_back(SpecConstantTwoIntCase("slessthan",				" %i32 0",		" %i32 0",		"%bool",	"SLessThan            %sc_0 %sc_1",			-20,	-10,	selectTrueUsingSc,	outputInts2));
1286	cases.push_back(SpecConstantTwoIntCase("ulessthan",				" %i32 0",		" %i32 0",		"%bool",	"ULessThan            %sc_0 %sc_1",			10,		20,		selectTrueUsingSc,	outputInts2));
1287	cases.push_back(SpecConstantTwoIntCase("sgreaterthan",			" %i32 0",		" %i32 0",		"%bool",	"SGreaterThan         %sc_0 %sc_1",			-1000,	50,		selectFalseUsingSc,	outputInts2));
1288	cases.push_back(SpecConstantTwoIntCase("ugreaterthan",			" %i32 0",		" %i32 0",		"%bool",	"UGreaterThan         %sc_0 %sc_1",			10,		5,		selectTrueUsingSc,	outputInts2));
1289	cases.push_back(SpecConstantTwoIntCase("slessthanequal",		" %i32 0",		" %i32 0",		"%bool",	"SLessThanEqual       %sc_0 %sc_1",			-10,	-10,	selectTrueUsingSc,	outputInts2));
1290	cases.push_back(SpecConstantTwoIntCase("ulessthanequal",		" %i32 0",		" %i32 0",		"%bool",	"ULessThanEqual       %sc_0 %sc_1",			50,		100,	selectTrueUsingSc,	outputInts2));
1291	cases.push_back(SpecConstantTwoIntCase("sgreaterthanequal",		" %i32 0",		" %i32 0",		"%bool",	"SGreaterThanEqual    %sc_0 %sc_1",			-1000,	50,		selectFalseUsingSc,	outputInts2));
1292	cases.push_back(SpecConstantTwoIntCase("ugreaterthanequal",		" %i32 0",		" %i32 0",		"%bool",	"UGreaterThanEqual    %sc_0 %sc_1",			10,		10,		selectTrueUsingSc,	outputInts2));
1293	cases.push_back(SpecConstantTwoIntCase("iequal",				" %i32 0",		" %i32 0",		"%bool",	"IEqual               %sc_0 %sc_1",			42,		24,		selectFalseUsingSc,	outputInts2));
1294	cases.push_back(SpecConstantTwoIntCase("logicaland",			"True %bool",	"True %bool",	"%bool",	"LogicalAnd           %sc_0 %sc_1",			0,		1,		selectFalseUsingSc,	outputInts2));
1295	cases.push_back(SpecConstantTwoIntCase("logicalor",				"False %bool",	"False %bool",	"%bool",	"LogicalOr            %sc_0 %sc_1",			1,		0,		selectTrueUsingSc,	outputInts2));
1296	cases.push_back(SpecConstantTwoIntCase("logicalequal",			"True %bool",	"True %bool",	"%bool",	"LogicalEqual         %sc_0 %sc_1",			0,		1,		selectFalseUsingSc,	outputInts2));
1297	cases.push_back(SpecConstantTwoIntCase("logicalnotequal",		"False %bool",	"False %bool",	"%bool",	"LogicalNotEqual      %sc_0 %sc_1",			1,		0,		selectTrueUsingSc,	outputInts2));
1298	cases.push_back(SpecConstantTwoIntCase("snegate",				" %i32 0",		" %i32 0",		"%i32",		"SNegate              %sc_0",				-42,	0,		addScToInput,		outputInts1));
1299	cases.push_back(SpecConstantTwoIntCase("not",					" %i32 0",		" %i32 0",		"%i32",		"Not                  %sc_0",				-43,	0,		addScToInput,		outputInts1));
1300	cases.push_back(SpecConstantTwoIntCase("logicalnot",			"False %bool",	"False %bool",	"%bool",	"LogicalNot           %sc_0",				1,		0,		selectFalseUsingSc,	outputInts2));
1301	cases.push_back(SpecConstantTwoIntCase("select",				"False %bool",	" %i32 0",		"%i32",		"Select               %sc_0 %sc_1 %zero",	1,		42,		addScToInput,		outputInts1));
1302	// OpSConvert, OpFConvert: these two instructions involve ints/floats of different bitwidths.
1303
1304	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1305	{
1306		map<string, string>		specializations;
1307		ComputeShaderSpec		spec;
1308
1309		specializations["SC_DEF0"]			= cases[caseNdx].scDefinition0;
1310		specializations["SC_DEF1"]			= cases[caseNdx].scDefinition1;
1311		specializations["SC_RESULT_TYPE"]	= cases[caseNdx].scResultType;
1312		specializations["SC_OP"]			= cases[caseNdx].scOperation;
1313		specializations["GEN_RESULT"]		= cases[caseNdx].resultOperation;
1314
1315		spec.assembly = shaderTemplate.specialize(specializations);
1316		spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
1317		spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
1318		spec.numWorkGroups = IVec3(numElements, 1, 1);
1319		spec.specConstants.push_back(cases[caseNdx].scActualValue0);
1320		spec.specConstants.push_back(cases[caseNdx].scActualValue1);
1321
1322		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName, cases[caseNdx].caseName, spec));
1323	}
1324
1325	ComputeShaderSpec				spec;
1326
1327	spec.assembly =
1328		string(s_ShaderPreamble) +
1329
1330		"OpName %main           \"main\"\n"
1331		"OpName %id             \"gl_GlobalInvocationID\"\n"
1332
1333		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1334		"OpDecorate %sc_0  SpecId 0\n"
1335		"OpDecorate %sc_1  SpecId 1\n"
1336		"OpDecorate %sc_2  SpecId 2\n"
1337		"OpDecorate %i32arr ArrayStride 4\n"
1338
1339		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
1340
1341		"%ivec3     = OpTypeVector %i32 3\n"
1342		"%i32ptr    = OpTypePointer Uniform %i32\n"
1343		"%i32arr    = OpTypeRuntimeArray %i32\n"
1344		"%boolptr   = OpTypePointer Uniform %bool\n"
1345		"%boolarr   = OpTypeRuntimeArray %bool\n"
1346		"%buf     = OpTypeStruct %i32arr\n"
1347		"%bufptr  = OpTypePointer Uniform %buf\n"
1348		"%indata    = OpVariable %bufptr Uniform\n"
1349		"%outdata   = OpVariable %bufptr Uniform\n"
1350
1351		"%id        = OpVariable %uvec3ptr Input\n"
1352		"%zero      = OpConstant %i32 0\n"
1353		"%ivec3_0   = OpConstantComposite %ivec3 %zero %zero %zero\n"
1354
1355		"%sc_0        = OpSpecConstant %i32 0\n"
1356		"%sc_1        = OpSpecConstant %i32 0\n"
1357		"%sc_2        = OpSpecConstant %i32 0\n"
1358		"%sc_vec3_0   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_0        %ivec3_0   0\n"     // (sc_0, 0, 0)
1359		"%sc_vec3_1   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_1        %ivec3_0   1\n"     // (0, sc_1, 0)
1360		"%sc_vec3_2   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_2        %ivec3_0   2\n"     // (0, 0, sc_2)
1361		"%sc_vec3_01  = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0   %sc_vec3_1 1 0 4\n" // (0,    sc_0, sc_1)
1362		"%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_01  %sc_vec3_2 5 1 2\n" // (sc_2, sc_0, sc_1)
1363		"%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            0\n"     // sc_2
1364		"%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            1\n"     // sc_0
1365		"%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            2\n"     // sc_1
1366		"%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"        // (sc_2 - sc_0)
1367		"%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n"        // (sc_2 - sc_0) * sc_1
1368
1369		"%main      = OpFunction %void None %voidf\n"
1370		"%label     = OpLabel\n"
1371		"%idval     = OpLoad %uvec3 %id\n"
1372		"%x         = OpCompositeExtract %u32 %idval 0\n"
1373		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
1374		"%inval     = OpLoad %i32 %inloc\n"
1375		"%final     = OpIAdd %i32 %inval %sc_final\n"
1376		"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
1377		"             OpStore %outloc %final\n"
1378		"             OpReturn\n"
1379		"             OpFunctionEnd\n";
1380	spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
1381	spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts3)));
1382	spec.numWorkGroups = IVec3(numElements, 1, 1);
1383	spec.specConstants.push_back(123);
1384	spec.specConstants.push_back(56);
1385	spec.specConstants.push_back(-77);
1386
1387	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
1388
1389	return group.release();
1390}
1391
1392tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
1393{
1394	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
1395	ComputeShaderSpec				spec1;
1396	ComputeShaderSpec				spec2;
1397	ComputeShaderSpec				spec3;
1398	de::Random						rnd				(deStringHash(group->getName()));
1399	const int						numElements		= 100;
1400	vector<float>					inputFloats		(numElements, 0);
1401	vector<float>					outputFloats1	(numElements, 0);
1402	vector<float>					outputFloats2	(numElements, 0);
1403	vector<float>					outputFloats3	(numElements, 0);
1404
1405	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
1406
1407	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
1408	floorAll(inputFloats);
1409
1410	for (size_t ndx = 0; ndx < numElements; ++ndx)
1411	{
1412		switch (ndx % 3)
1413		{
1414			case 0:		outputFloats1[ndx] = inputFloats[ndx] + 5.5f;	break;
1415			case 1:		outputFloats1[ndx] = inputFloats[ndx] + 20.5f;	break;
1416			case 2:		outputFloats1[ndx] = inputFloats[ndx] + 1.75f;	break;
1417			default:	break;
1418		}
1419		outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
1420		outputFloats3[ndx] = 8.5f - inputFloats[ndx];
1421	}
1422
1423	spec1.assembly =
1424		string(s_ShaderPreamble) +
1425
1426		"OpSource GLSL 430\n"
1427		"OpName %main \"main\"\n"
1428		"OpName %id \"gl_GlobalInvocationID\"\n"
1429
1430		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1431
1432		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
1433
1434		"%id = OpVariable %uvec3ptr Input\n"
1435		"%zero       = OpConstant %i32 0\n"
1436		"%three      = OpConstant %u32 3\n"
1437		"%constf5p5  = OpConstant %f32 5.5\n"
1438		"%constf20p5 = OpConstant %f32 20.5\n"
1439		"%constf1p75 = OpConstant %f32 1.75\n"
1440		"%constf8p5  = OpConstant %f32 8.5\n"
1441		"%constf6p5  = OpConstant %f32 6.5\n"
1442
1443		"%main     = OpFunction %void None %voidf\n"
1444		"%entry    = OpLabel\n"
1445		"%idval    = OpLoad %uvec3 %id\n"
1446		"%x        = OpCompositeExtract %u32 %idval 0\n"
1447		"%selector = OpUMod %u32 %x %three\n"
1448		"            OpSelectionMerge %phi None\n"
1449		"            OpSwitch %selector %default 0 %case0 1 %case1 2 %case2\n"
1450
1451		// Case 1 before OpPhi.
1452		"%case1    = OpLabel\n"
1453		"            OpBranch %phi\n"
1454
1455		"%default  = OpLabel\n"
1456		"            OpUnreachable\n"
1457
1458		"%phi      = OpLabel\n"
1459		"%operand  = OpPhi %f32   %constf1p75 %case2   %constf20p5 %case1   %constf5p5 %case0\n" // not in the order of blocks
1460		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
1461		"%inval    = OpLoad %f32 %inloc\n"
1462		"%add      = OpFAdd %f32 %inval %operand\n"
1463		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
1464		"            OpStore %outloc %add\n"
1465		"            OpReturn\n"
1466
1467		// Case 0 after OpPhi.
1468		"%case0    = OpLabel\n"
1469		"            OpBranch %phi\n"
1470
1471
1472		// Case 2 after OpPhi.
1473		"%case2    = OpLabel\n"
1474		"            OpBranch %phi\n"
1475
1476		"            OpFunctionEnd\n";
1477	spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
1478	spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
1479	spec1.numWorkGroups = IVec3(numElements, 1, 1);
1480
1481	group->addChild(new SpvAsmComputeShaderCase(testCtx, "block", "out-of-order and unreachable blocks for OpPhi", spec1));
1482
1483	spec2.assembly =
1484		string(s_ShaderPreamble) +
1485
1486		"OpName %main \"main\"\n"
1487		"OpName %id \"gl_GlobalInvocationID\"\n"
1488
1489		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1490
1491		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
1492
1493		"%id         = OpVariable %uvec3ptr Input\n"
1494		"%zero       = OpConstant %i32 0\n"
1495		"%one        = OpConstant %i32 1\n"
1496		"%three      = OpConstant %i32 3\n"
1497		"%constf6p5  = OpConstant %f32 6.5\n"
1498
1499		"%main       = OpFunction %void None %voidf\n"
1500		"%entry      = OpLabel\n"
1501		"%idval      = OpLoad %uvec3 %id\n"
1502		"%x          = OpCompositeExtract %u32 %idval 0\n"
1503		"%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
1504		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
1505		"%inval      = OpLoad %f32 %inloc\n"
1506		"              OpBranch %phi\n"
1507
1508		"%phi        = OpLabel\n"
1509		"%step       = OpPhi %i32 %zero  %entry %step_next  %phi\n"
1510		"%accum      = OpPhi %f32 %inval %entry %accum_next %phi\n"
1511		"%step_next  = OpIAdd %i32 %step %one\n"
1512		"%accum_next = OpFAdd %f32 %accum %constf6p5\n"
1513		"%still_loop = OpSLessThan %bool %step %three\n"
1514		"              OpLoopMerge %exit %phi None\n"
1515		"              OpBranchConditional %still_loop %phi %exit\n"
1516
1517		"%exit       = OpLabel\n"
1518		"              OpStore %outloc %accum\n"
1519		"              OpReturn\n"
1520		"              OpFunctionEnd\n";
1521	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
1522	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
1523	spec2.numWorkGroups = IVec3(numElements, 1, 1);
1524
1525	group->addChild(new SpvAsmComputeShaderCase(testCtx, "induction", "The usual way induction variables are handled in LLVM IR", spec2));
1526
1527	spec3.assembly =
1528		string(s_ShaderPreamble) +
1529
1530		"OpName %main \"main\"\n"
1531		"OpName %id \"gl_GlobalInvocationID\"\n"
1532
1533		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1534
1535		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
1536
1537		"%f32ptr_f   = OpTypePointer Function %f32\n"
1538		"%id         = OpVariable %uvec3ptr Input\n"
1539		"%true       = OpConstantTrue %bool\n"
1540		"%false      = OpConstantFalse %bool\n"
1541		"%zero       = OpConstant %i32 0\n"
1542		"%constf8p5  = OpConstant %f32 8.5\n"
1543
1544		"%main       = OpFunction %void None %voidf\n"
1545		"%entry      = OpLabel\n"
1546		"%b          = OpVariable %f32ptr_f Function %constf8p5\n"
1547		"%idval      = OpLoad %uvec3 %id\n"
1548		"%x          = OpCompositeExtract %u32 %idval 0\n"
1549		"%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
1550		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
1551		"%a_init     = OpLoad %f32 %inloc\n"
1552		"%b_init     = OpLoad %f32 %b\n"
1553		"              OpBranch %phi\n"
1554
1555		"%phi        = OpLabel\n"
1556		"%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
1557		"%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
1558		"%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
1559		"              OpLoopMerge %exit %phi None\n"
1560		"              OpBranchConditional %still_loop %phi %exit\n"
1561
1562		"%exit       = OpLabel\n"
1563		"%sub        = OpFSub %f32 %a_next %b_next\n"
1564		"              OpStore %outloc %sub\n"
1565		"              OpReturn\n"
1566		"              OpFunctionEnd\n";
1567	spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
1568	spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
1569	spec3.numWorkGroups = IVec3(numElements, 1, 1);
1570
1571	group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", "Swap the values of two variables using OpPhi", spec3));
1572
1573	return group.release();
1574}
1575
1576// Assembly code used for testing block order is based on GLSL source code:
1577//
1578// #version 430
1579//
1580// layout(std140, set = 0, binding = 0) readonly buffer Input {
1581//   float elements[];
1582// } input_data;
1583// layout(std140, set = 0, binding = 1) writeonly buffer Output {
1584//   float elements[];
1585// } output_data;
1586//
1587// void main() {
1588//   uint x = gl_GlobalInvocationID.x;
1589//   output_data.elements[x] = input_data.elements[x];
1590//   if (x > uint(50)) {
1591//     switch (x % uint(3)) {
1592//       case 0: output_data.elements[x] += 1.5f; break;
1593//       case 1: output_data.elements[x] += 42.f; break;
1594//       case 2: output_data.elements[x] -= 27.f; break;
1595//       default: break;
1596//     }
1597//   } else {
1598//     output_data.elements[x] = -input_data.elements[x];
1599//   }
1600// }
1601tcu::TestCaseGroup* createBlockOrderGroup (tcu::TestContext& testCtx)
1602{
1603	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "block_order", "Test block orders"));
1604	ComputeShaderSpec				spec;
1605	de::Random						rnd				(deStringHash(group->getName()));
1606	const int						numElements		= 100;
1607	vector<float>					inputFloats		(numElements, 0);
1608	vector<float>					outputFloats	(numElements, 0);
1609
1610	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
1611
1612	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
1613	floorAll(inputFloats);
1614
1615	for (size_t ndx = 0; ndx <= 50; ++ndx)
1616		outputFloats[ndx] = -inputFloats[ndx];
1617
1618	for (size_t ndx = 51; ndx < numElements; ++ndx)
1619	{
1620		switch (ndx % 3)
1621		{
1622			case 0:		outputFloats[ndx] = inputFloats[ndx] + 1.5f; break;
1623			case 1:		outputFloats[ndx] = inputFloats[ndx] + 42.f; break;
1624			case 2:		outputFloats[ndx] = inputFloats[ndx] - 27.f; break;
1625			default:	break;
1626		}
1627	}
1628
1629	spec.assembly =
1630		string(s_ShaderPreamble) +
1631
1632		"OpSource GLSL 430\n"
1633		"OpName %main \"main\"\n"
1634		"OpName %id \"gl_GlobalInvocationID\"\n"
1635
1636		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1637
1638		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
1639
1640		"%u32ptr       = OpTypePointer Function %u32\n"
1641		"%u32ptr_input = OpTypePointer Input %u32\n"
1642
1643		+ string(s_InputOutputBuffer) +
1644
1645		"%id        = OpVariable %uvec3ptr Input\n"
1646		"%zero      = OpConstant %i32 0\n"
1647		"%const3    = OpConstant %u32 3\n"
1648		"%const50   = OpConstant %u32 50\n"
1649		"%constf1p5 = OpConstant %f32 1.5\n"
1650		"%constf27  = OpConstant %f32 27.0\n"
1651		"%constf42  = OpConstant %f32 42.0\n"
1652
1653		"%main = OpFunction %void None %voidf\n"
1654
1655		// entry block.
1656		"%entry    = OpLabel\n"
1657
1658		// Create a temporary variable to hold the value of gl_GlobalInvocationID.x.
1659		"%xvar     = OpVariable %u32ptr Function\n"
1660		"%xptr     = OpAccessChain %u32ptr_input %id %zero\n"
1661		"%x        = OpLoad %u32 %xptr\n"
1662		"            OpStore %xvar %x\n"
1663
1664		"%cmp      = OpUGreaterThan %bool %x %const50\n"
1665		"            OpSelectionMerge %if_merge None\n"
1666		"            OpBranchConditional %cmp %if_true %if_false\n"
1667
1668		// Merge block for switch-statement: placed at the beginning.
1669		"%switch_merge = OpLabel\n"
1670		"                OpBranch %if_merge\n"
1671
1672		// Case 1 for switch-statement.
1673		"%case1    = OpLabel\n"
1674		"%x_1      = OpLoad %u32 %xvar\n"
1675		"%inloc_1  = OpAccessChain %f32ptr %indata %zero %x_1\n"
1676		"%inval_1  = OpLoad %f32 %inloc_1\n"
1677		"%addf42   = OpFAdd %f32 %inval_1 %constf42\n"
1678		"%outloc_1 = OpAccessChain %f32ptr %outdata %zero %x_1\n"
1679		"            OpStore %outloc_1 %addf42\n"
1680		"            OpBranch %switch_merge\n"
1681
1682		// False branch for if-statement: placed in the middle of switch cases and before true branch.
1683		"%if_false = OpLabel\n"
1684		"%x_f      = OpLoad %u32 %xvar\n"
1685		"%inloc_f  = OpAccessChain %f32ptr %indata %zero %x_f\n"
1686		"%inval_f  = OpLoad %f32 %inloc_f\n"
1687		"%negate   = OpFNegate %f32 %inval_f\n"
1688		"%outloc_f = OpAccessChain %f32ptr %outdata %zero %x_f\n"
1689		"            OpStore %outloc_f %negate\n"
1690		"            OpBranch %if_merge\n"
1691
1692		// Merge block for if-statement: placed in the middle of true and false branch.
1693		"%if_merge = OpLabel\n"
1694		"            OpReturn\n"
1695
1696		// True branch for if-statement: placed in the middle of swtich cases and after the false branch.
1697		"%if_true  = OpLabel\n"
1698		"%xval_t   = OpLoad %u32 %xvar\n"
1699		"%mod      = OpUMod %u32 %xval_t %const3\n"
1700		"            OpSelectionMerge %switch_merge None\n"
1701		"            OpSwitch %mod %default 0 %case0 1 %case1 2 %case2\n"
1702
1703		// Case 2 for switch-statement.
1704		"%case2    = OpLabel\n"
1705		"%x_2      = OpLoad %u32 %xvar\n"
1706		"%inloc_2  = OpAccessChain %f32ptr %indata %zero %x_2\n"
1707		"%inval_2  = OpLoad %f32 %inloc_2\n"
1708		"%subf27   = OpFSub %f32 %inval_2 %constf27\n"
1709		"%outloc_2 = OpAccessChain %f32ptr %outdata %zero %x_2\n"
1710		"            OpStore %outloc_2 %subf27\n"
1711		"            OpBranch %switch_merge\n"
1712
1713		// Default case for switch-statement: placed in the middle of normal cases.
1714		"%default = OpLabel\n"
1715		"           OpBranch %switch_merge\n"
1716
1717		// Case 0 for switch-statement: out of order.
1718		"%case0    = OpLabel\n"
1719		"%x_0      = OpLoad %u32 %xvar\n"
1720		"%inloc_0  = OpAccessChain %f32ptr %indata %zero %x_0\n"
1721		"%inval_0  = OpLoad %f32 %inloc_0\n"
1722		"%addf1p5  = OpFAdd %f32 %inval_0 %constf1p5\n"
1723		"%outloc_0 = OpAccessChain %f32ptr %outdata %zero %x_0\n"
1724		"            OpStore %outloc_0 %addf1p5\n"
1725		"            OpBranch %switch_merge\n"
1726
1727		"            OpFunctionEnd\n";
1728	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
1729	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1730	spec.numWorkGroups = IVec3(numElements, 1, 1);
1731
1732	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "various out-of-order blocks", spec));
1733
1734	return group.release();
1735}
1736
1737tcu::TestCaseGroup* createMultipleShaderGroup (tcu::TestContext& testCtx)
1738{
1739	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "multiple_shaders", "Test multiple shaders in the same module"));
1740	ComputeShaderSpec				spec1;
1741	ComputeShaderSpec				spec2;
1742	de::Random						rnd				(deStringHash(group->getName()));
1743	const int						numElements		= 100;
1744	vector<float>					inputFloats		(numElements, 0);
1745	vector<float>					outputFloats1	(numElements, 0);
1746	vector<float>					outputFloats2	(numElements, 0);
1747	fillRandomScalars(rnd, -500.f, 500.f, &inputFloats[0], numElements);
1748
1749	for (size_t ndx = 0; ndx < numElements; ++ndx)
1750	{
1751		outputFloats1[ndx] = inputFloats[ndx] + inputFloats[ndx];
1752		outputFloats2[ndx] = -inputFloats[ndx];
1753	}
1754
1755	const string assembly(
1756		"OpCapability Shader\n"
1757		"OpCapability ClipDistance\n"
1758		"OpMemoryModel Logical GLSL450\n"
1759		"OpEntryPoint GLCompute %comp_main1 \"entrypoint1\" %id\n"
1760		"OpEntryPoint GLCompute %comp_main2 \"entrypoint2\" %id\n"
1761		// A module cannot have two OpEntryPoint instructions with the same Execution Model and the same Name string.
1762		"OpEntryPoint Vertex    %vert_main  \"entrypoint2\" %vert_builtins %vertexIndex %instanceIndex\n"
1763		"OpExecutionMode %comp_main1 LocalSize 1 1 1\n"
1764		"OpExecutionMode %comp_main2 LocalSize 1 1 1\n"
1765
1766		"OpName %comp_main1              \"entrypoint1\"\n"
1767		"OpName %comp_main2              \"entrypoint2\"\n"
1768		"OpName %vert_main               \"entrypoint2\"\n"
1769		"OpName %id                      \"gl_GlobalInvocationID\"\n"
1770		"OpName %vert_builtin_st         \"gl_PerVertex\"\n"
1771		"OpName %vertexIndex             \"gl_VertexIndex\"\n"
1772		"OpName %instanceIndex           \"gl_InstanceIndex\"\n"
1773		"OpMemberName %vert_builtin_st 0 \"gl_Position\"\n"
1774		"OpMemberName %vert_builtin_st 1 \"gl_PointSize\"\n"
1775		"OpMemberName %vert_builtin_st 2 \"gl_ClipDistance\"\n"
1776
1777		"OpDecorate %id                      BuiltIn GlobalInvocationId\n"
1778		"OpDecorate %vertexIndex             BuiltIn VertexIndex\n"
1779		"OpDecorate %instanceIndex           BuiltIn InstanceIndex\n"
1780		"OpDecorate %vert_builtin_st         Block\n"
1781		"OpMemberDecorate %vert_builtin_st 0 BuiltIn Position\n"
1782		"OpMemberDecorate %vert_builtin_st 1 BuiltIn PointSize\n"
1783		"OpMemberDecorate %vert_builtin_st 2 BuiltIn ClipDistance\n"
1784
1785		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
1786
1787		"%zero       = OpConstant %i32 0\n"
1788		"%one        = OpConstant %u32 1\n"
1789		"%c_f32_1    = OpConstant %f32 1\n"
1790
1791		"%i32ptr              = OpTypePointer Input %i32\n"
1792		"%vec4                = OpTypeVector %f32 4\n"
1793		"%vec4ptr             = OpTypePointer Output %vec4\n"
1794		"%f32arr1             = OpTypeArray %f32 %one\n"
1795		"%vert_builtin_st     = OpTypeStruct %vec4 %f32 %f32arr1\n"
1796		"%vert_builtin_st_ptr = OpTypePointer Output %vert_builtin_st\n"
1797		"%vert_builtins       = OpVariable %vert_builtin_st_ptr Output\n"
1798
1799		"%id         = OpVariable %uvec3ptr Input\n"
1800		"%vertexIndex = OpVariable %i32ptr Input\n"
1801		"%instanceIndex = OpVariable %i32ptr Input\n"
1802		"%c_vec4_1   = OpConstantComposite %vec4 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
1803
1804		// gl_Position = vec4(1.);
1805		"%vert_main  = OpFunction %void None %voidf\n"
1806		"%vert_entry = OpLabel\n"
1807		"%position   = OpAccessChain %vec4ptr %vert_builtins %zero\n"
1808		"              OpStore %position %c_vec4_1\n"
1809		"              OpReturn\n"
1810		"              OpFunctionEnd\n"
1811
1812		// Double inputs.
1813		"%comp_main1  = OpFunction %void None %voidf\n"
1814		"%comp1_entry = OpLabel\n"
1815		"%idval1      = OpLoad %uvec3 %id\n"
1816		"%x1          = OpCompositeExtract %u32 %idval1 0\n"
1817		"%inloc1      = OpAccessChain %f32ptr %indata %zero %x1\n"
1818		"%inval1      = OpLoad %f32 %inloc1\n"
1819		"%add         = OpFAdd %f32 %inval1 %inval1\n"
1820		"%outloc1     = OpAccessChain %f32ptr %outdata %zero %x1\n"
1821		"               OpStore %outloc1 %add\n"
1822		"               OpReturn\n"
1823		"               OpFunctionEnd\n"
1824
1825		// Negate inputs.
1826		"%comp_main2  = OpFunction %void None %voidf\n"
1827		"%comp2_entry = OpLabel\n"
1828		"%idval2      = OpLoad %uvec3 %id\n"
1829		"%x2          = OpCompositeExtract %u32 %idval2 0\n"
1830		"%inloc2      = OpAccessChain %f32ptr %indata %zero %x2\n"
1831		"%inval2      = OpLoad %f32 %inloc2\n"
1832		"%neg         = OpFNegate %f32 %inval2\n"
1833		"%outloc2     = OpAccessChain %f32ptr %outdata %zero %x2\n"
1834		"               OpStore %outloc2 %neg\n"
1835		"               OpReturn\n"
1836		"               OpFunctionEnd\n");
1837
1838	spec1.assembly = assembly;
1839	spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
1840	spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
1841	spec1.numWorkGroups = IVec3(numElements, 1, 1);
1842	spec1.entryPoint = "entrypoint1";
1843
1844	spec2.assembly = assembly;
1845	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
1846	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
1847	spec2.numWorkGroups = IVec3(numElements, 1, 1);
1848	spec2.entryPoint = "entrypoint2";
1849
1850	group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader1", "multiple shaders in the same module", spec1));
1851	group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader2", "multiple shaders in the same module", spec2));
1852
1853	return group.release();
1854}
1855
1856inline std::string makeLongUTF8String (size_t num4ByteChars)
1857{
1858	// An example of a longest valid UTF-8 character.  Be explicit about the
1859	// character type because Microsoft compilers can otherwise interpret the
1860	// character string as being over wide (16-bit) characters. Ideally, we
1861	// would just use a C++11 UTF-8 string literal, but we want to support older
1862	// Microsoft compilers.
1863	const std::basic_string<char> earthAfrica("\xF0\x9F\x8C\x8D");
1864	std::string longString;
1865	longString.reserve(num4ByteChars * 4);
1866	for (size_t count = 0; count < num4ByteChars; count++)
1867	{
1868		longString += earthAfrica;
1869	}
1870	return longString;
1871}
1872
1873tcu::TestCaseGroup* createOpSourceGroup (tcu::TestContext& testCtx)
1874{
1875	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsource", "Tests the OpSource & OpSourceContinued instruction"));
1876	vector<CaseParameter>			cases;
1877	de::Random						rnd				(deStringHash(group->getName()));
1878	const int						numElements		= 100;
1879	vector<float>					positiveFloats	(numElements, 0);
1880	vector<float>					negativeFloats	(numElements, 0);
1881	const StringTemplate			shaderTemplate	(
1882		"OpCapability Shader\n"
1883		"OpMemoryModel Logical GLSL450\n"
1884
1885		"OpEntryPoint GLCompute %main \"main\" %id\n"
1886		"OpExecutionMode %main LocalSize 1 1 1\n"
1887
1888		"${SOURCE}\n"
1889
1890		"OpName %main           \"main\"\n"
1891		"OpName %id             \"gl_GlobalInvocationID\"\n"
1892
1893		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1894
1895		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
1896
1897		"%id        = OpVariable %uvec3ptr Input\n"
1898		"%zero      = OpConstant %i32 0\n"
1899
1900		"%main      = OpFunction %void None %voidf\n"
1901		"%label     = OpLabel\n"
1902		"%idval     = OpLoad %uvec3 %id\n"
1903		"%x         = OpCompositeExtract %u32 %idval 0\n"
1904		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1905		"%inval     = OpLoad %f32 %inloc\n"
1906		"%neg       = OpFNegate %f32 %inval\n"
1907		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1908		"             OpStore %outloc %neg\n"
1909		"             OpReturn\n"
1910		"             OpFunctionEnd\n");
1911
1912	cases.push_back(CaseParameter("unknown_source",							"OpSource Unknown 0"));
1913	cases.push_back(CaseParameter("wrong_source",							"OpSource OpenCL_C 210"));
1914	cases.push_back(CaseParameter("normal_filename",						"%fname = OpString \"filename\"\n"
1915																			"OpSource GLSL 430 %fname"));
1916	cases.push_back(CaseParameter("empty_filename",							"%fname = OpString \"\"\n"
1917																			"OpSource GLSL 430 %fname"));
1918	cases.push_back(CaseParameter("normal_source_code",						"%fname = OpString \"filename\"\n"
1919																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\""));
1920	cases.push_back(CaseParameter("empty_source_code",						"%fname = OpString \"filename\"\n"
1921																			"OpSource GLSL 430 %fname \"\""));
1922	cases.push_back(CaseParameter("long_source_code",						"%fname = OpString \"filename\"\n"
1923																			"OpSource GLSL 430 %fname \"" + makeLongUTF8String(65530) + "ccc\"")); // word count: 65535
1924	cases.push_back(CaseParameter("utf8_source_code",						"%fname = OpString \"filename\"\n"
1925																			"OpSource GLSL 430 %fname \"\xE2\x98\x82\xE2\x98\x85\"")); // umbrella & black star symbol
1926	cases.push_back(CaseParameter("normal_sourcecontinued",					"%fname = OpString \"filename\"\n"
1927																			"OpSource GLSL 430 %fname \"#version 430\nvo\"\n"
1928																			"OpSourceContinued \"id main() {}\""));
1929	cases.push_back(CaseParameter("empty_sourcecontinued",					"%fname = OpString \"filename\"\n"
1930																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
1931																			"OpSourceContinued \"\""));
1932	cases.push_back(CaseParameter("long_sourcecontinued",					"%fname = OpString \"filename\"\n"
1933																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
1934																			"OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\"")); // word count: 65535
1935	cases.push_back(CaseParameter("utf8_sourcecontinued",					"%fname = OpString \"filename\"\n"
1936																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
1937																			"OpSourceContinued \"\xE2\x98\x8E\xE2\x9A\x91\"")); // white telephone & black flag symbol
1938	cases.push_back(CaseParameter("multi_sourcecontinued",					"%fname = OpString \"filename\"\n"
1939																			"OpSource GLSL 430 %fname \"#version 430\n\"\n"
1940																			"OpSourceContinued \"void\"\n"
1941																			"OpSourceContinued \"main()\"\n"
1942																			"OpSourceContinued \"{}\""));
1943	cases.push_back(CaseParameter("empty_source_before_sourcecontinued",	"%fname = OpString \"filename\"\n"
1944																			"OpSource GLSL 430 %fname \"\"\n"
1945																			"OpSourceContinued \"#version 430\nvoid main() {}\""));
1946
1947	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1948
1949	for (size_t ndx = 0; ndx < numElements; ++ndx)
1950		negativeFloats[ndx] = -positiveFloats[ndx];
1951
1952	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1953	{
1954		map<string, string>		specializations;
1955		ComputeShaderSpec		spec;
1956
1957		specializations["SOURCE"] = cases[caseNdx].param;
1958		spec.assembly = shaderTemplate.specialize(specializations);
1959		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1960		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1961		spec.numWorkGroups = IVec3(numElements, 1, 1);
1962
1963		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1964	}
1965
1966	return group.release();
1967}
1968
1969tcu::TestCaseGroup* createOpSourceExtensionGroup (tcu::TestContext& testCtx)
1970{
1971	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsourceextension", "Tests the OpSource instruction"));
1972	vector<CaseParameter>			cases;
1973	de::Random						rnd				(deStringHash(group->getName()));
1974	const int						numElements		= 100;
1975	vector<float>					inputFloats		(numElements, 0);
1976	vector<float>					outputFloats	(numElements, 0);
1977	const StringTemplate			shaderTemplate	(
1978		string(s_ShaderPreamble) +
1979
1980		"OpSourceExtension \"${EXTENSION}\"\n"
1981
1982		"OpName %main           \"main\"\n"
1983		"OpName %id             \"gl_GlobalInvocationID\"\n"
1984
1985		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1986
1987		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
1988
1989		"%id        = OpVariable %uvec3ptr Input\n"
1990		"%zero      = OpConstant %i32 0\n"
1991
1992		"%main      = OpFunction %void None %voidf\n"
1993		"%label     = OpLabel\n"
1994		"%idval     = OpLoad %uvec3 %id\n"
1995		"%x         = OpCompositeExtract %u32 %idval 0\n"
1996		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1997		"%inval     = OpLoad %f32 %inloc\n"
1998		"%neg       = OpFNegate %f32 %inval\n"
1999		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2000		"             OpStore %outloc %neg\n"
2001		"             OpReturn\n"
2002		"             OpFunctionEnd\n");
2003
2004	cases.push_back(CaseParameter("empty_extension",	""));
2005	cases.push_back(CaseParameter("real_extension",		"GL_ARB_texture_rectangle"));
2006	cases.push_back(CaseParameter("fake_extension",		"GL_ARB_im_the_ultimate_extension"));
2007	cases.push_back(CaseParameter("utf8_extension",		"GL_ARB_\xE2\x98\x82\xE2\x98\x85"));
2008	cases.push_back(CaseParameter("long_extension",		makeLongUTF8String(65533) + "ccc")); // word count: 65535
2009
2010	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
2011
2012	for (size_t ndx = 0; ndx < numElements; ++ndx)
2013		outputFloats[ndx] = -inputFloats[ndx];
2014
2015	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
2016	{
2017		map<string, string>		specializations;
2018		ComputeShaderSpec		spec;
2019
2020		specializations["EXTENSION"] = cases[caseNdx].param;
2021		spec.assembly = shaderTemplate.specialize(specializations);
2022		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
2023		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2024		spec.numWorkGroups = IVec3(numElements, 1, 1);
2025
2026		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
2027	}
2028
2029	return group.release();
2030}
2031
2032// Checks that a compute shader can generate a constant null value of various types, without exercising a computation on it.
2033tcu::TestCaseGroup* createOpConstantNullGroup (tcu::TestContext& testCtx)
2034{
2035	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantnull", "Tests the OpConstantNull instruction"));
2036	vector<CaseParameter>			cases;
2037	de::Random						rnd				(deStringHash(group->getName()));
2038	const int						numElements		= 100;
2039	vector<float>					positiveFloats	(numElements, 0);
2040	vector<float>					negativeFloats	(numElements, 0);
2041	const StringTemplate			shaderTemplate	(
2042		string(s_ShaderPreamble) +
2043
2044		"OpSource GLSL 430\n"
2045		"OpName %main           \"main\"\n"
2046		"OpName %id             \"gl_GlobalInvocationID\"\n"
2047
2048		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2049
2050		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
2051
2052		"${TYPE}\n"
2053		"%null      = OpConstantNull %type\n"
2054
2055		"%id        = OpVariable %uvec3ptr Input\n"
2056		"%zero      = OpConstant %i32 0\n"
2057
2058		"%main      = OpFunction %void None %voidf\n"
2059		"%label     = OpLabel\n"
2060		"%idval     = OpLoad %uvec3 %id\n"
2061		"%x         = OpCompositeExtract %u32 %idval 0\n"
2062		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
2063		"%inval     = OpLoad %f32 %inloc\n"
2064		"%neg       = OpFNegate %f32 %inval\n"
2065		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2066		"             OpStore %outloc %neg\n"
2067		"             OpReturn\n"
2068		"             OpFunctionEnd\n");
2069
2070	cases.push_back(CaseParameter("bool",			"%type = OpTypeBool"));
2071	cases.push_back(CaseParameter("sint32",			"%type = OpTypeInt 32 1"));
2072	cases.push_back(CaseParameter("uint32",			"%type = OpTypeInt 32 0"));
2073	cases.push_back(CaseParameter("float32",		"%type = OpTypeFloat 32"));
2074	cases.push_back(CaseParameter("vec4float32",	"%type = OpTypeVector %f32 4"));
2075	cases.push_back(CaseParameter("vec3bool",		"%type = OpTypeVector %bool 3"));
2076	cases.push_back(CaseParameter("vec2uint32",		"%type = OpTypeVector %u32 2"));
2077	cases.push_back(CaseParameter("matrix",			"%type = OpTypeMatrix %fvec3 3"));
2078	cases.push_back(CaseParameter("array",			"%100 = OpConstant %u32 100\n"
2079													"%type = OpTypeArray %i32 %100"));
2080	cases.push_back(CaseParameter("struct",			"%type = OpTypeStruct %f32 %i32 %u32"));
2081	cases.push_back(CaseParameter("pointer",		"%type = OpTypePointer Function %i32"));
2082
2083	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
2084
2085	for (size_t ndx = 0; ndx < numElements; ++ndx)
2086		negativeFloats[ndx] = -positiveFloats[ndx];
2087
2088	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
2089	{
2090		map<string, string>		specializations;
2091		ComputeShaderSpec		spec;
2092
2093		specializations["TYPE"] = cases[caseNdx].param;
2094		spec.assembly = shaderTemplate.specialize(specializations);
2095		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
2096		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
2097		spec.numWorkGroups = IVec3(numElements, 1, 1);
2098
2099		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
2100	}
2101
2102	return group.release();
2103}
2104
2105// Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
2106tcu::TestCaseGroup* createOpConstantCompositeGroup (tcu::TestContext& testCtx)
2107{
2108	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
2109	vector<CaseParameter>			cases;
2110	de::Random						rnd				(deStringHash(group->getName()));
2111	const int						numElements		= 100;
2112	vector<float>					positiveFloats	(numElements, 0);
2113	vector<float>					negativeFloats	(numElements, 0);
2114	const StringTemplate			shaderTemplate	(
2115		string(s_ShaderPreamble) +
2116
2117		"OpSource GLSL 430\n"
2118		"OpName %main           \"main\"\n"
2119		"OpName %id             \"gl_GlobalInvocationID\"\n"
2120
2121		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2122
2123		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
2124
2125		"%id        = OpVariable %uvec3ptr Input\n"
2126		"%zero      = OpConstant %i32 0\n"
2127
2128		"${CONSTANT}\n"
2129
2130		"%main      = OpFunction %void None %voidf\n"
2131		"%label     = OpLabel\n"
2132		"%idval     = OpLoad %uvec3 %id\n"
2133		"%x         = OpCompositeExtract %u32 %idval 0\n"
2134		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
2135		"%inval     = OpLoad %f32 %inloc\n"
2136		"%neg       = OpFNegate %f32 %inval\n"
2137		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2138		"             OpStore %outloc %neg\n"
2139		"             OpReturn\n"
2140		"             OpFunctionEnd\n");
2141
2142	cases.push_back(CaseParameter("vector",			"%five = OpConstant %u32 5\n"
2143													"%const = OpConstantComposite %uvec3 %five %zero %five"));
2144	cases.push_back(CaseParameter("matrix",			"%m3fvec3 = OpTypeMatrix %fvec3 3\n"
2145													"%ten = OpConstant %f32 10.\n"
2146													"%fzero = OpConstant %f32 0.\n"
2147													"%vec = OpConstantComposite %fvec3 %ten %fzero %ten\n"
2148													"%mat = OpConstantComposite %m3fvec3 %vec %vec %vec"));
2149	cases.push_back(CaseParameter("struct",			"%m2vec3 = OpTypeMatrix %fvec3 2\n"
2150													"%struct = OpTypeStruct %i32 %f32 %fvec3 %m2vec3\n"
2151													"%fzero = OpConstant %f32 0.\n"
2152													"%one = OpConstant %f32 1.\n"
2153													"%point5 = OpConstant %f32 0.5\n"
2154													"%vec = OpConstantComposite %fvec3 %one %one %fzero\n"
2155													"%mat = OpConstantComposite %m2vec3 %vec %vec\n"
2156													"%const = OpConstantComposite %struct %zero %point5 %vec %mat"));
2157	cases.push_back(CaseParameter("nested_struct",	"%st1 = OpTypeStruct %u32 %f32\n"
2158													"%st2 = OpTypeStruct %i32 %i32\n"
2159													"%struct = OpTypeStruct %st1 %st2\n"
2160													"%point5 = OpConstant %f32 0.5\n"
2161													"%one = OpConstant %u32 1\n"
2162													"%ten = OpConstant %i32 10\n"
2163													"%st1val = OpConstantComposite %st1 %one %point5\n"
2164													"%st2val = OpConstantComposite %st2 %ten %ten\n"
2165													"%const = OpConstantComposite %struct %st1val %st2val"));
2166
2167	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
2168
2169	for (size_t ndx = 0; ndx < numElements; ++ndx)
2170		negativeFloats[ndx] = -positiveFloats[ndx];
2171
2172	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
2173	{
2174		map<string, string>		specializations;
2175		ComputeShaderSpec		spec;
2176
2177		specializations["CONSTANT"] = cases[caseNdx].param;
2178		spec.assembly = shaderTemplate.specialize(specializations);
2179		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
2180		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
2181		spec.numWorkGroups = IVec3(numElements, 1, 1);
2182
2183		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
2184	}
2185
2186	return group.release();
2187}
2188
2189// Creates a floating point number with the given exponent, and significand
2190// bits set. It can only create normalized numbers. Only the least significant
2191// 24 bits of the significand will be examined. The final bit of the
2192// significand will also be ignored. This allows alignment to be written
2193// similarly to C99 hex-floats.
2194// For example if you wanted to write 0x1.7f34p-12 you would call
2195// constructNormalizedFloat(-12, 0x7f3400)
2196float constructNormalizedFloat (deInt32 exponent, deUint32 significand)
2197{
2198	float f = 1.0f;
2199
2200	for (deInt32 idx = 0; idx < 23; ++idx)
2201	{
2202		f += ((significand & 0x800000) == 0) ? 0.f : std::ldexp(1.0f, -(idx + 1));
2203		significand <<= 1;
2204	}
2205
2206	return std::ldexp(f, exponent);
2207}
2208
2209// Compare instruction for the OpQuantizeF16 compute exact case.
2210// Returns true if the output is what is expected from the test case.
2211bool compareOpQuantizeF16ComputeExactCase (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs)
2212{
2213	if (outputAllocs.size() != 1)
2214		return false;
2215
2216	// We really just need this for size because we cannot compare Nans.
2217	const BufferSp&	expectedOutput	= expectedOutputs[0];
2218	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());;
2219
2220	if (expectedOutput->getNumBytes() != 4*sizeof(float)) {
2221		return false;
2222	}
2223
2224	if (*outputAsFloat != constructNormalizedFloat(8, 0x304000) &&
2225		*outputAsFloat != constructNormalizedFloat(8, 0x300000)) {
2226		return false;
2227	}
2228	outputAsFloat++;
2229
2230	if (*outputAsFloat != -constructNormalizedFloat(-7, 0x600000) &&
2231		*outputAsFloat != -constructNormalizedFloat(-7, 0x604000)) {
2232		return false;
2233	}
2234	outputAsFloat++;
2235
2236	if (*outputAsFloat != constructNormalizedFloat(2, 0x01C000) &&
2237		*outputAsFloat != constructNormalizedFloat(2, 0x020000)) {
2238		return false;
2239	}
2240	outputAsFloat++;
2241
2242	if (*outputAsFloat != constructNormalizedFloat(1, 0xFFC000) &&
2243		*outputAsFloat != constructNormalizedFloat(2, 0x000000)) {
2244		return false;
2245	}
2246
2247	return true;
2248}
2249
2250// Checks that every output from a test-case is a float NaN.
2251bool compareNan (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs)
2252{
2253	if (outputAllocs.size() != 1)
2254		return false;
2255
2256	// We really just need this for size because we cannot compare Nans.
2257	const BufferSp& expectedOutput		= expectedOutputs[0];
2258	const float* output_as_float		= static_cast<const float*>(outputAllocs[0]->getHostPtr());;
2259
2260	for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
2261	{
2262		if (!isnan(output_as_float[idx]))
2263		{
2264			return false;
2265		}
2266	}
2267
2268	return true;
2269}
2270
2271// Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
2272tcu::TestCaseGroup* createOpQuantizeToF16Group (tcu::TestContext& testCtx)
2273{
2274	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opquantize", "Tests the OpQuantizeToF16 instruction"));
2275
2276	const std::string shader (
2277		string(s_ShaderPreamble) +
2278
2279		"OpSource GLSL 430\n"
2280		"OpName %main           \"main\"\n"
2281		"OpName %id             \"gl_GlobalInvocationID\"\n"
2282
2283		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2284
2285		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
2286
2287		"%id        = OpVariable %uvec3ptr Input\n"
2288		"%zero      = OpConstant %i32 0\n"
2289
2290		"%main      = OpFunction %void None %voidf\n"
2291		"%label     = OpLabel\n"
2292		"%idval     = OpLoad %uvec3 %id\n"
2293		"%x         = OpCompositeExtract %u32 %idval 0\n"
2294		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
2295		"%inval     = OpLoad %f32 %inloc\n"
2296		"%quant     = OpQuantizeToF16 %f32 %inval\n"
2297		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2298		"             OpStore %outloc %quant\n"
2299		"             OpReturn\n"
2300		"             OpFunctionEnd\n");
2301
2302	{
2303		ComputeShaderSpec	spec;
2304		const deUint32		numElements		= 100;
2305		vector<float>		infinities;
2306		vector<float>		results;
2307
2308		infinities.reserve(numElements);
2309		results.reserve(numElements);
2310
2311		for (size_t idx = 0; idx < numElements; ++idx)
2312		{
2313			switch(idx % 4)
2314			{
2315				case 0:
2316					infinities.push_back(std::numeric_limits<float>::infinity());
2317					results.push_back(std::numeric_limits<float>::infinity());
2318					break;
2319				case 1:
2320					infinities.push_back(-std::numeric_limits<float>::infinity());
2321					results.push_back(-std::numeric_limits<float>::infinity());
2322					break;
2323				case 2:
2324					infinities.push_back(std::ldexp(1.0f, 16));
2325					results.push_back(std::numeric_limits<float>::infinity());
2326					break;
2327				case 3:
2328					infinities.push_back(std::ldexp(-1.0f, 32));
2329					results.push_back(-std::numeric_limits<float>::infinity());
2330					break;
2331			}
2332		}
2333
2334		spec.assembly = shader;
2335		spec.inputs.push_back(BufferSp(new Float32Buffer(infinities)));
2336		spec.outputs.push_back(BufferSp(new Float32Buffer(results)));
2337		spec.numWorkGroups = IVec3(numElements, 1, 1);
2338
2339		group->addChild(new SpvAsmComputeShaderCase(
2340			testCtx, "infinities", "Check that infinities propagated and created", spec));
2341	}
2342
2343	{
2344		ComputeShaderSpec	spec;
2345		vector<float>		nans;
2346		const deUint32		numElements		= 100;
2347
2348		nans.reserve(numElements);
2349
2350		for (size_t idx = 0; idx < numElements; ++idx)
2351		{
2352			if (idx % 2 == 0)
2353			{
2354				nans.push_back(std::numeric_limits<float>::quiet_NaN());
2355			}
2356			else
2357			{
2358				nans.push_back(-std::numeric_limits<float>::quiet_NaN());
2359			}
2360		}
2361
2362		spec.assembly = shader;
2363		spec.inputs.push_back(BufferSp(new Float32Buffer(nans)));
2364		spec.outputs.push_back(BufferSp(new Float32Buffer(nans)));
2365		spec.numWorkGroups = IVec3(numElements, 1, 1);
2366		spec.verifyIO = &compareNan;
2367
2368		group->addChild(new SpvAsmComputeShaderCase(
2369			testCtx, "propagated_nans", "Check that nans are propagated", spec));
2370	}
2371
2372	{
2373		ComputeShaderSpec	spec;
2374		vector<float>		small;
2375		vector<float>		zeros;
2376		const deUint32		numElements		= 100;
2377
2378		small.reserve(numElements);
2379		zeros.reserve(numElements);
2380
2381		for (size_t idx = 0; idx < numElements; ++idx)
2382		{
2383			switch(idx % 6)
2384			{
2385				case 0:
2386					small.push_back(0.f);
2387					zeros.push_back(0.f);
2388					break;
2389				case 1:
2390					small.push_back(-0.f);
2391					zeros.push_back(-0.f);
2392					break;
2393				case 2:
2394					small.push_back(std::ldexp(1.0f, -16));
2395					zeros.push_back(0.f);
2396					break;
2397				case 3:
2398					small.push_back(std::ldexp(-1.0f, -32));
2399					zeros.push_back(-0.f);
2400					break;
2401				case 4:
2402					small.push_back(std::ldexp(1.0f, -127));
2403					zeros.push_back(0.f);
2404					break;
2405				case 5:
2406					small.push_back(-std::ldexp(1.0f, -128));
2407					zeros.push_back(-0.f);
2408					break;
2409			}
2410		}
2411
2412		spec.assembly = shader;
2413		spec.inputs.push_back(BufferSp(new Float32Buffer(small)));
2414		spec.outputs.push_back(BufferSp(new Float32Buffer(zeros)));
2415		spec.numWorkGroups = IVec3(numElements, 1, 1);
2416
2417		group->addChild(new SpvAsmComputeShaderCase(
2418			testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
2419	}
2420
2421	{
2422		ComputeShaderSpec	spec;
2423		vector<float>		exact;
2424		const deUint32		numElements		= 200;
2425
2426		exact.reserve(numElements);
2427
2428		for (size_t idx = 0; idx < numElements; ++idx)
2429			exact.push_back(static_cast<float>(static_cast<int>(idx) - 100));
2430
2431		spec.assembly = shader;
2432		spec.inputs.push_back(BufferSp(new Float32Buffer(exact)));
2433		spec.outputs.push_back(BufferSp(new Float32Buffer(exact)));
2434		spec.numWorkGroups = IVec3(numElements, 1, 1);
2435
2436		group->addChild(new SpvAsmComputeShaderCase(
2437			testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
2438	}
2439
2440	{
2441		ComputeShaderSpec	spec;
2442		vector<float>		inputs;
2443		const deUint32		numElements		= 4;
2444
2445		inputs.push_back(constructNormalizedFloat(8,	0x300300));
2446		inputs.push_back(-constructNormalizedFloat(-7,	0x600800));
2447		inputs.push_back(constructNormalizedFloat(2,	0x01E000));
2448		inputs.push_back(constructNormalizedFloat(1,	0xFFE000));
2449
2450		spec.assembly = shader;
2451		spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
2452		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
2453		spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
2454		spec.numWorkGroups = IVec3(numElements, 1, 1);
2455
2456		group->addChild(new SpvAsmComputeShaderCase(
2457			testCtx, "rounded", "Check that are rounded when needed", spec));
2458	}
2459
2460	return group.release();
2461}
2462
2463// Performs a bitwise copy of source to the destination type Dest.
2464template <typename Dest, typename Src>
2465Dest bitwiseCast(Src source)
2466{
2467  Dest dest;
2468  DE_STATIC_ASSERT(sizeof(source) == sizeof(dest));
2469  deMemcpy(&dest, &source, sizeof(dest));
2470  return dest;
2471}
2472
2473tcu::TestCaseGroup* createSpecConstantOpQuantizeToF16Group (tcu::TestContext& testCtx)
2474{
2475	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opspecconstantop_opquantize", "Tests the OpQuantizeToF16 opcode for the OpSpecConstantOp instruction"));
2476
2477	const std::string shader (
2478		string(s_ShaderPreamble) +
2479
2480		"OpName %main           \"main\"\n"
2481		"OpName %id             \"gl_GlobalInvocationID\"\n"
2482
2483		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2484
2485		"OpDecorate %sc_0  SpecId 0\n"
2486		"OpDecorate %sc_1  SpecId 1\n"
2487		"OpDecorate %sc_2  SpecId 2\n"
2488		"OpDecorate %sc_3  SpecId 3\n"
2489		"OpDecorate %sc_4  SpecId 4\n"
2490		"OpDecorate %sc_5  SpecId 5\n"
2491
2492		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
2493
2494		"%id        = OpVariable %uvec3ptr Input\n"
2495		"%zero      = OpConstant %i32 0\n"
2496		"%c_u32_6   = OpConstant %u32 6\n"
2497
2498		"%sc_0      = OpSpecConstant %f32 0.\n"
2499		"%sc_1      = OpSpecConstant %f32 0.\n"
2500		"%sc_2      = OpSpecConstant %f32 0.\n"
2501		"%sc_3      = OpSpecConstant %f32 0.\n"
2502		"%sc_4      = OpSpecConstant %f32 0.\n"
2503		"%sc_5      = OpSpecConstant %f32 0.\n"
2504
2505		"%sc_0_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_0\n"
2506		"%sc_1_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_1\n"
2507		"%sc_2_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_2\n"
2508		"%sc_3_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_3\n"
2509		"%sc_4_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_4\n"
2510		"%sc_5_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_5\n"
2511
2512		"%main      = OpFunction %void None %voidf\n"
2513		"%label     = OpLabel\n"
2514		"%idval     = OpLoad %uvec3 %id\n"
2515		"%x         = OpCompositeExtract %u32 %idval 0\n"
2516		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2517		"%selector  = OpUMod %u32 %x %c_u32_6\n"
2518		"            OpSelectionMerge %exit None\n"
2519		"            OpSwitch %selector %exit 0 %case0 1 %case1 2 %case2 3 %case3 4 %case4 5 %case5\n"
2520
2521		"%case0     = OpLabel\n"
2522		"             OpStore %outloc %sc_0_quant\n"
2523		"             OpBranch %exit\n"
2524
2525		"%case1     = OpLabel\n"
2526		"             OpStore %outloc %sc_1_quant\n"
2527		"             OpBranch %exit\n"
2528
2529		"%case2     = OpLabel\n"
2530		"             OpStore %outloc %sc_2_quant\n"
2531		"             OpBranch %exit\n"
2532
2533		"%case3     = OpLabel\n"
2534		"             OpStore %outloc %sc_3_quant\n"
2535		"             OpBranch %exit\n"
2536
2537		"%case4     = OpLabel\n"
2538		"             OpStore %outloc %sc_4_quant\n"
2539		"             OpBranch %exit\n"
2540
2541		"%case5     = OpLabel\n"
2542		"             OpStore %outloc %sc_5_quant\n"
2543		"             OpBranch %exit\n"
2544
2545		"%exit      = OpLabel\n"
2546		"             OpReturn\n"
2547
2548		"             OpFunctionEnd\n");
2549
2550	{
2551		ComputeShaderSpec	spec;
2552		const deUint8		numCases	= 4;
2553		vector<float>		inputs		(numCases, 0.f);
2554		vector<float>		outputs;
2555
2556		spec.assembly		= shader;
2557		spec.numWorkGroups	= IVec3(numCases, 1, 1);
2558
2559		spec.specConstants.push_back(bitwiseCast<deUint32>(std::numeric_limits<float>::infinity()));
2560		spec.specConstants.push_back(bitwiseCast<deUint32>(-std::numeric_limits<float>::infinity()));
2561		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(1.0f, 16)));
2562		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(-1.0f, 32)));
2563
2564		outputs.push_back(std::numeric_limits<float>::infinity());
2565		outputs.push_back(-std::numeric_limits<float>::infinity());
2566		outputs.push_back(std::numeric_limits<float>::infinity());
2567		outputs.push_back(-std::numeric_limits<float>::infinity());
2568
2569		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
2570		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
2571
2572		group->addChild(new SpvAsmComputeShaderCase(
2573			testCtx, "infinities", "Check that infinities propagated and created", spec));
2574	}
2575
2576	{
2577		ComputeShaderSpec	spec;
2578		const deUint8		numCases	= 2;
2579		vector<float>		inputs		(numCases, 0.f);
2580		vector<float>		outputs;
2581
2582		spec.assembly		= shader;
2583		spec.numWorkGroups	= IVec3(numCases, 1, 1);
2584		spec.verifyIO		= &compareNan;
2585
2586		outputs.push_back(std::numeric_limits<float>::quiet_NaN());
2587		outputs.push_back(-std::numeric_limits<float>::quiet_NaN());
2588
2589		for (deUint8 idx = 0; idx < numCases; ++idx)
2590			spec.specConstants.push_back(bitwiseCast<deUint32>(outputs[idx]));
2591
2592		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
2593		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
2594
2595		group->addChild(new SpvAsmComputeShaderCase(
2596			testCtx, "propagated_nans", "Check that nans are propagated", spec));
2597	}
2598
2599	{
2600		ComputeShaderSpec	spec;
2601		const deUint8		numCases	= 6;
2602		vector<float>		inputs		(numCases, 0.f);
2603		vector<float>		outputs;
2604
2605		spec.assembly		= shader;
2606		spec.numWorkGroups	= IVec3(numCases, 1, 1);
2607
2608		spec.specConstants.push_back(bitwiseCast<deUint32>(0.f));
2609		spec.specConstants.push_back(bitwiseCast<deUint32>(-0.f));
2610		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(1.0f, -16)));
2611		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(-1.0f, -32)));
2612		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(1.0f, -127)));
2613		spec.specConstants.push_back(bitwiseCast<deUint32>(-std::ldexp(1.0f, -128)));
2614
2615		outputs.push_back(0.f);
2616		outputs.push_back(-0.f);
2617		outputs.push_back(0.f);
2618		outputs.push_back(-0.f);
2619		outputs.push_back(0.f);
2620		outputs.push_back(-0.f);
2621
2622		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
2623		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
2624
2625		group->addChild(new SpvAsmComputeShaderCase(
2626			testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
2627	}
2628
2629	{
2630		ComputeShaderSpec	spec;
2631		const deUint8		numCases	= 6;
2632		vector<float>		inputs		(numCases, 0.f);
2633		vector<float>		outputs;
2634
2635		spec.assembly		= shader;
2636		spec.numWorkGroups	= IVec3(numCases, 1, 1);
2637
2638		for (deUint8 idx = 0; idx < 6; ++idx)
2639		{
2640			const float f = static_cast<float>(idx * 10 - 30) / 4.f;
2641			spec.specConstants.push_back(bitwiseCast<deUint32>(f));
2642			outputs.push_back(f);
2643		}
2644
2645		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
2646		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
2647
2648		group->addChild(new SpvAsmComputeShaderCase(
2649			testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
2650	}
2651
2652	{
2653		ComputeShaderSpec	spec;
2654		const deUint8		numCases	= 4;
2655		vector<float>		inputs		(numCases, 0.f);
2656		vector<float>		outputs;
2657
2658		spec.assembly		= shader;
2659		spec.numWorkGroups	= IVec3(numCases, 1, 1);
2660		spec.verifyIO		= &compareOpQuantizeF16ComputeExactCase;
2661
2662		outputs.push_back(constructNormalizedFloat(8, 0x300300));
2663		outputs.push_back(-constructNormalizedFloat(-7, 0x600800));
2664		outputs.push_back(constructNormalizedFloat(2, 0x01E000));
2665		outputs.push_back(constructNormalizedFloat(1, 0xFFE000));
2666
2667		for (deUint8 idx = 0; idx < numCases; ++idx)
2668			spec.specConstants.push_back(bitwiseCast<deUint32>(outputs[idx]));
2669
2670		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
2671		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
2672
2673		group->addChild(new SpvAsmComputeShaderCase(
2674			testCtx, "rounded", "Check that are rounded when needed", spec));
2675	}
2676
2677	return group.release();
2678}
2679
2680// Checks that constant null/composite values can be used in computation.
2681tcu::TestCaseGroup* createOpConstantUsageGroup (tcu::TestContext& testCtx)
2682{
2683	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantnullcomposite", "Spotcheck the OpConstantNull & OpConstantComposite instruction"));
2684	ComputeShaderSpec				spec;
2685	de::Random						rnd				(deStringHash(group->getName()));
2686	const int						numElements		= 100;
2687	vector<float>					positiveFloats	(numElements, 0);
2688	vector<float>					negativeFloats	(numElements, 0);
2689
2690	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
2691
2692	for (size_t ndx = 0; ndx < numElements; ++ndx)
2693		negativeFloats[ndx] = -positiveFloats[ndx];
2694
2695	spec.assembly =
2696		"OpCapability Shader\n"
2697		"%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2698		"OpMemoryModel Logical GLSL450\n"
2699		"OpEntryPoint GLCompute %main \"main\" %id\n"
2700		"OpExecutionMode %main LocalSize 1 1 1\n"
2701
2702		"OpSource GLSL 430\n"
2703		"OpName %main           \"main\"\n"
2704		"OpName %id             \"gl_GlobalInvocationID\"\n"
2705
2706		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2707
2708		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
2709
2710		"%fmat      = OpTypeMatrix %fvec3 3\n"
2711		"%ten       = OpConstant %u32 10\n"
2712		"%f32arr10  = OpTypeArray %f32 %ten\n"
2713		"%fst       = OpTypeStruct %f32 %f32\n"
2714
2715		+ string(s_InputOutputBuffer) +
2716
2717		"%id        = OpVariable %uvec3ptr Input\n"
2718		"%zero      = OpConstant %i32 0\n"
2719
2720		// Create a bunch of null values
2721		"%unull     = OpConstantNull %u32\n"
2722		"%fnull     = OpConstantNull %f32\n"
2723		"%vnull     = OpConstantNull %fvec3\n"
2724		"%mnull     = OpConstantNull %fmat\n"
2725		"%anull     = OpConstantNull %f32arr10\n"
2726		"%snull     = OpConstantComposite %fst %fnull %fnull\n"
2727
2728		"%main      = OpFunction %void None %voidf\n"
2729		"%label     = OpLabel\n"
2730		"%idval     = OpLoad %uvec3 %id\n"
2731		"%x         = OpCompositeExtract %u32 %idval 0\n"
2732		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
2733		"%inval     = OpLoad %f32 %inloc\n"
2734		"%neg       = OpFNegate %f32 %inval\n"
2735
2736		// Get the abs() of (a certain element of) those null values
2737		"%unull_cov = OpConvertUToF %f32 %unull\n"
2738		"%unull_abs = OpExtInst %f32 %std450 FAbs %unull_cov\n"
2739		"%fnull_abs = OpExtInst %f32 %std450 FAbs %fnull\n"
2740		"%vnull_0   = OpCompositeExtract %f32 %vnull 0\n"
2741		"%vnull_abs = OpExtInst %f32 %std450 FAbs %vnull_0\n"
2742		"%mnull_12  = OpCompositeExtract %f32 %mnull 1 2\n"
2743		"%mnull_abs = OpExtInst %f32 %std450 FAbs %mnull_12\n"
2744		"%anull_3   = OpCompositeExtract %f32 %anull 3\n"
2745		"%anull_abs = OpExtInst %f32 %std450 FAbs %anull_3\n"
2746		"%snull_1   = OpCompositeExtract %f32 %snull 1\n"
2747		"%snull_abs = OpExtInst %f32 %std450 FAbs %snull_1\n"
2748
2749		// Add them all
2750		"%add1      = OpFAdd %f32 %neg  %unull_abs\n"
2751		"%add2      = OpFAdd %f32 %add1 %fnull_abs\n"
2752		"%add3      = OpFAdd %f32 %add2 %vnull_abs\n"
2753		"%add4      = OpFAdd %f32 %add3 %mnull_abs\n"
2754		"%add5      = OpFAdd %f32 %add4 %anull_abs\n"
2755		"%final     = OpFAdd %f32 %add5 %snull_abs\n"
2756
2757		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2758		"             OpStore %outloc %final\n" // write to output
2759		"             OpReturn\n"
2760		"             OpFunctionEnd\n";
2761	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
2762	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
2763	spec.numWorkGroups = IVec3(numElements, 1, 1);
2764
2765	group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "Check that values constructed via OpConstantNull & OpConstantComposite can be used", spec));
2766
2767	return group.release();
2768}
2769
2770// Assembly code used for testing loop control is based on GLSL source code:
2771// #version 430
2772//
2773// layout(std140, set = 0, binding = 0) readonly buffer Input {
2774//   float elements[];
2775// } input_data;
2776// layout(std140, set = 0, binding = 1) writeonly buffer Output {
2777//   float elements[];
2778// } output_data;
2779//
2780// void main() {
2781//   uint x = gl_GlobalInvocationID.x;
2782//   output_data.elements[x] = input_data.elements[x];
2783//   for (uint i = 0; i < 4; ++i)
2784//     output_data.elements[x] += 1.f;
2785// }
2786tcu::TestCaseGroup* createLoopControlGroup (tcu::TestContext& testCtx)
2787{
2788	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "loop_control", "Tests loop control cases"));
2789	vector<CaseParameter>			cases;
2790	de::Random						rnd				(deStringHash(group->getName()));
2791	const int						numElements		= 100;
2792	vector<float>					inputFloats		(numElements, 0);
2793	vector<float>					outputFloats	(numElements, 0);
2794	const StringTemplate			shaderTemplate	(
2795		string(s_ShaderPreamble) +
2796
2797		"OpSource GLSL 430\n"
2798		"OpName %main \"main\"\n"
2799		"OpName %id \"gl_GlobalInvocationID\"\n"
2800
2801		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2802
2803		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
2804
2805		"%u32ptr      = OpTypePointer Function %u32\n"
2806
2807		"%id          = OpVariable %uvec3ptr Input\n"
2808		"%zero        = OpConstant %i32 0\n"
2809		"%uzero       = OpConstant %u32 0\n"
2810		"%one         = OpConstant %i32 1\n"
2811		"%constf1     = OpConstant %f32 1.0\n"
2812		"%four        = OpConstant %u32 4\n"
2813
2814		"%main        = OpFunction %void None %voidf\n"
2815		"%entry       = OpLabel\n"
2816		"%i           = OpVariable %u32ptr Function\n"
2817		"               OpStore %i %uzero\n"
2818
2819		"%idval       = OpLoad %uvec3 %id\n"
2820		"%x           = OpCompositeExtract %u32 %idval 0\n"
2821		"%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
2822		"%inval       = OpLoad %f32 %inloc\n"
2823		"%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
2824		"               OpStore %outloc %inval\n"
2825		"               OpBranch %loop_entry\n"
2826
2827		"%loop_entry  = OpLabel\n"
2828		"%i_val       = OpLoad %u32 %i\n"
2829		"%cmp_lt      = OpULessThan %bool %i_val %four\n"
2830		"               OpLoopMerge %loop_merge %loop_entry ${CONTROL}\n"
2831		"               OpBranchConditional %cmp_lt %loop_body %loop_merge\n"
2832		"%loop_body   = OpLabel\n"
2833		"%outval      = OpLoad %f32 %outloc\n"
2834		"%addf1       = OpFAdd %f32 %outval %constf1\n"
2835		"               OpStore %outloc %addf1\n"
2836		"%new_i       = OpIAdd %u32 %i_val %one\n"
2837		"               OpStore %i %new_i\n"
2838		"               OpBranch %loop_entry\n"
2839		"%loop_merge  = OpLabel\n"
2840		"               OpReturn\n"
2841		"               OpFunctionEnd\n");
2842
2843	cases.push_back(CaseParameter("none",				"None"));
2844	cases.push_back(CaseParameter("unroll",				"Unroll"));
2845	cases.push_back(CaseParameter("dont_unroll",		"DontUnroll"));
2846	cases.push_back(CaseParameter("unroll_dont_unroll",	"Unroll|DontUnroll"));
2847
2848	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
2849
2850	for (size_t ndx = 0; ndx < numElements; ++ndx)
2851		outputFloats[ndx] = inputFloats[ndx] + 4.f;
2852
2853	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
2854	{
2855		map<string, string>		specializations;
2856		ComputeShaderSpec		spec;
2857
2858		specializations["CONTROL"] = cases[caseNdx].param;
2859		spec.assembly = shaderTemplate.specialize(specializations);
2860		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
2861		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2862		spec.numWorkGroups = IVec3(numElements, 1, 1);
2863
2864		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
2865	}
2866
2867	return group.release();
2868}
2869
2870// Assembly code used for testing selection control is based on GLSL source code:
2871// #version 430
2872//
2873// layout(std140, set = 0, binding = 0) readonly buffer Input {
2874//   float elements[];
2875// } input_data;
2876// layout(std140, set = 0, binding = 1) writeonly buffer Output {
2877//   float elements[];
2878// } output_data;
2879//
2880// void main() {
2881//   uint x = gl_GlobalInvocationID.x;
2882//   float val = input_data.elements[x];
2883//   if (val > 10.f)
2884//     output_data.elements[x] = val + 1.f;
2885//   else
2886//     output_data.elements[x] = val - 1.f;
2887// }
2888tcu::TestCaseGroup* createSelectionControlGroup (tcu::TestContext& testCtx)
2889{
2890	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "selection_control", "Tests selection control cases"));
2891	vector<CaseParameter>			cases;
2892	de::Random						rnd				(deStringHash(group->getName()));
2893	const int						numElements		= 100;
2894	vector<float>					inputFloats		(numElements, 0);
2895	vector<float>					outputFloats	(numElements, 0);
2896	const StringTemplate			shaderTemplate	(
2897		string(s_ShaderPreamble) +
2898
2899		"OpSource GLSL 430\n"
2900		"OpName %main \"main\"\n"
2901		"OpName %id \"gl_GlobalInvocationID\"\n"
2902
2903		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2904
2905		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
2906
2907		"%id       = OpVariable %uvec3ptr Input\n"
2908		"%zero     = OpConstant %i32 0\n"
2909		"%constf1  = OpConstant %f32 1.0\n"
2910		"%constf10 = OpConstant %f32 10.0\n"
2911
2912		"%main     = OpFunction %void None %voidf\n"
2913		"%entry    = OpLabel\n"
2914		"%idval    = OpLoad %uvec3 %id\n"
2915		"%x        = OpCompositeExtract %u32 %idval 0\n"
2916		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
2917		"%inval    = OpLoad %f32 %inloc\n"
2918		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
2919		"%cmp_gt   = OpFOrdGreaterThan %bool %inval %constf10\n"
2920
2921		"            OpSelectionMerge %if_end ${CONTROL}\n"
2922		"            OpBranchConditional %cmp_gt %if_true %if_false\n"
2923		"%if_true  = OpLabel\n"
2924		"%addf1    = OpFAdd %f32 %inval %constf1\n"
2925		"            OpStore %outloc %addf1\n"
2926		"            OpBranch %if_end\n"
2927		"%if_false = OpLabel\n"
2928		"%subf1    = OpFSub %f32 %inval %constf1\n"
2929		"            OpStore %outloc %subf1\n"
2930		"            OpBranch %if_end\n"
2931		"%if_end   = OpLabel\n"
2932		"            OpReturn\n"
2933		"            OpFunctionEnd\n");
2934
2935	cases.push_back(CaseParameter("none",					"None"));
2936	cases.push_back(CaseParameter("flatten",				"Flatten"));
2937	cases.push_back(CaseParameter("dont_flatten",			"DontFlatten"));
2938	cases.push_back(CaseParameter("flatten_dont_flatten",	"DontFlatten|Flatten"));
2939
2940	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
2941
2942	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
2943	floorAll(inputFloats);
2944
2945	for (size_t ndx = 0; ndx < numElements; ++ndx)
2946		outputFloats[ndx] = inputFloats[ndx] + (inputFloats[ndx] > 10.f ? 1.f : -1.f);
2947
2948	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
2949	{
2950		map<string, string>		specializations;
2951		ComputeShaderSpec		spec;
2952
2953		specializations["CONTROL"] = cases[caseNdx].param;
2954		spec.assembly = shaderTemplate.specialize(specializations);
2955		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
2956		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2957		spec.numWorkGroups = IVec3(numElements, 1, 1);
2958
2959		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
2960	}
2961
2962	return group.release();
2963}
2964
2965// Assembly code used for testing function control is based on GLSL source code:
2966//
2967// #version 430
2968//
2969// layout(std140, set = 0, binding = 0) readonly buffer Input {
2970//   float elements[];
2971// } input_data;
2972// layout(std140, set = 0, binding = 1) writeonly buffer Output {
2973//   float elements[];
2974// } output_data;
2975//
2976// float const10() { return 10.f; }
2977//
2978// void main() {
2979//   uint x = gl_GlobalInvocationID.x;
2980//   output_data.elements[x] = input_data.elements[x] + const10();
2981// }
2982tcu::TestCaseGroup* createFunctionControlGroup (tcu::TestContext& testCtx)
2983{
2984	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "function_control", "Tests function control cases"));
2985	vector<CaseParameter>			cases;
2986	de::Random						rnd				(deStringHash(group->getName()));
2987	const int						numElements		= 100;
2988	vector<float>					inputFloats		(numElements, 0);
2989	vector<float>					outputFloats	(numElements, 0);
2990	const StringTemplate			shaderTemplate	(
2991		string(s_ShaderPreamble) +
2992
2993		"OpSource GLSL 430\n"
2994		"OpName %main \"main\"\n"
2995		"OpName %func_const10 \"const10(\"\n"
2996		"OpName %id \"gl_GlobalInvocationID\"\n"
2997
2998		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2999
3000		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
3001
3002		"%f32f = OpTypeFunction %f32\n"
3003		"%id = OpVariable %uvec3ptr Input\n"
3004		"%zero = OpConstant %i32 0\n"
3005		"%constf10 = OpConstant %f32 10.0\n"
3006
3007		"%main         = OpFunction %void None %voidf\n"
3008		"%entry        = OpLabel\n"
3009		"%idval        = OpLoad %uvec3 %id\n"
3010		"%x            = OpCompositeExtract %u32 %idval 0\n"
3011		"%inloc        = OpAccessChain %f32ptr %indata %zero %x\n"
3012		"%inval        = OpLoad %f32 %inloc\n"
3013		"%ret_10       = OpFunctionCall %f32 %func_const10\n"
3014		"%fadd         = OpFAdd %f32 %inval %ret_10\n"
3015		"%outloc       = OpAccessChain %f32ptr %outdata %zero %x\n"
3016		"                OpStore %outloc %fadd\n"
3017		"                OpReturn\n"
3018		"                OpFunctionEnd\n"
3019
3020		"%func_const10 = OpFunction %f32 ${CONTROL} %f32f\n"
3021		"%label        = OpLabel\n"
3022		"                OpReturnValue %constf10\n"
3023		"                OpFunctionEnd\n");
3024
3025	cases.push_back(CaseParameter("none",						"None"));
3026	cases.push_back(CaseParameter("inline",						"Inline"));
3027	cases.push_back(CaseParameter("dont_inline",				"DontInline"));
3028	cases.push_back(CaseParameter("pure",						"Pure"));
3029	cases.push_back(CaseParameter("const",						"Const"));
3030	cases.push_back(CaseParameter("inline_pure",				"Inline|Pure"));
3031	cases.push_back(CaseParameter("const_dont_inline",			"Const|DontInline"));
3032	cases.push_back(CaseParameter("inline_dont_inline",			"Inline|DontInline"));
3033	cases.push_back(CaseParameter("pure_inline_dont_inline",	"Pure|Inline|DontInline"));
3034
3035	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
3036
3037	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3038	floorAll(inputFloats);
3039
3040	for (size_t ndx = 0; ndx < numElements; ++ndx)
3041		outputFloats[ndx] = inputFloats[ndx] + 10.f;
3042
3043	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3044	{
3045		map<string, string>		specializations;
3046		ComputeShaderSpec		spec;
3047
3048		specializations["CONTROL"] = cases[caseNdx].param;
3049		spec.assembly = shaderTemplate.specialize(specializations);
3050		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3051		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3052		spec.numWorkGroups = IVec3(numElements, 1, 1);
3053
3054		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
3055	}
3056
3057	return group.release();
3058}
3059
3060tcu::TestCaseGroup* createMemoryAccessGroup (tcu::TestContext& testCtx)
3061{
3062	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "memory_access", "Tests memory access cases"));
3063	vector<CaseParameter>			cases;
3064	de::Random						rnd				(deStringHash(group->getName()));
3065	const int						numElements		= 100;
3066	vector<float>					inputFloats		(numElements, 0);
3067	vector<float>					outputFloats	(numElements, 0);
3068	const StringTemplate			shaderTemplate	(
3069		string(s_ShaderPreamble) +
3070
3071		"OpSource GLSL 430\n"
3072		"OpName %main           \"main\"\n"
3073		"OpName %id             \"gl_GlobalInvocationID\"\n"
3074
3075		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3076
3077		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
3078
3079		"%f32ptr_f  = OpTypePointer Function %f32\n"
3080
3081		"%id        = OpVariable %uvec3ptr Input\n"
3082		"%zero      = OpConstant %i32 0\n"
3083		"%four      = OpConstant %i32 4\n"
3084
3085		"%main      = OpFunction %void None %voidf\n"
3086		"%label     = OpLabel\n"
3087		"%copy      = OpVariable %f32ptr_f Function\n"
3088		"%idval     = OpLoad %uvec3 %id ${ACCESS}\n"
3089		"%x         = OpCompositeExtract %u32 %idval 0\n"
3090		"%inloc     = OpAccessChain %f32ptr %indata  %zero %x\n"
3091		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
3092		"             OpCopyMemory %copy %inloc ${ACCESS}\n"
3093		"%val1      = OpLoad %f32 %copy\n"
3094		"%val2      = OpLoad %f32 %inloc\n"
3095		"%add       = OpFAdd %f32 %val1 %val2\n"
3096		"             OpStore %outloc %add ${ACCESS}\n"
3097		"             OpReturn\n"
3098		"             OpFunctionEnd\n");
3099
3100	cases.push_back(CaseParameter("null",					""));
3101	cases.push_back(CaseParameter("none",					"None"));
3102	cases.push_back(CaseParameter("volatile",				"Volatile"));
3103	cases.push_back(CaseParameter("aligned",				"Aligned 4"));
3104	cases.push_back(CaseParameter("nontemporal",			"Nontemporal"));
3105	cases.push_back(CaseParameter("aligned_nontemporal",	"Aligned|Nontemporal 4"));
3106	cases.push_back(CaseParameter("aligned_volatile",		"Volatile|Aligned 4"));
3107
3108	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
3109
3110	for (size_t ndx = 0; ndx < numElements; ++ndx)
3111		outputFloats[ndx] = inputFloats[ndx] + inputFloats[ndx];
3112
3113	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3114	{
3115		map<string, string>		specializations;
3116		ComputeShaderSpec		spec;
3117
3118		specializations["ACCESS"] = cases[caseNdx].param;
3119		spec.assembly = shaderTemplate.specialize(specializations);
3120		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3121		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3122		spec.numWorkGroups = IVec3(numElements, 1, 1);
3123
3124		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
3125	}
3126
3127	return group.release();
3128}
3129
3130// Checks that we can get undefined values for various types, without exercising a computation with it.
3131tcu::TestCaseGroup* createOpUndefGroup (tcu::TestContext& testCtx)
3132{
3133	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opundef", "Tests the OpUndef instruction"));
3134	vector<CaseParameter>			cases;
3135	de::Random						rnd				(deStringHash(group->getName()));
3136	const int						numElements		= 100;
3137	vector<float>					positiveFloats	(numElements, 0);
3138	vector<float>					negativeFloats	(numElements, 0);
3139	const StringTemplate			shaderTemplate	(
3140		string(s_ShaderPreamble) +
3141
3142		"OpSource GLSL 430\n"
3143		"OpName %main           \"main\"\n"
3144		"OpName %id             \"gl_GlobalInvocationID\"\n"
3145
3146		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3147
3148		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
3149
3150		"${TYPE}\n"
3151
3152		"%id        = OpVariable %uvec3ptr Input\n"
3153		"%zero      = OpConstant %i32 0\n"
3154
3155		"%main      = OpFunction %void None %voidf\n"
3156		"%label     = OpLabel\n"
3157
3158		"%undef     = OpUndef %type\n"
3159
3160		"%idval     = OpLoad %uvec3 %id\n"
3161		"%x         = OpCompositeExtract %u32 %idval 0\n"
3162
3163		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
3164		"%inval     = OpLoad %f32 %inloc\n"
3165		"%neg       = OpFNegate %f32 %inval\n"
3166		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
3167		"             OpStore %outloc %neg\n"
3168		"             OpReturn\n"
3169		"             OpFunctionEnd\n");
3170
3171	cases.push_back(CaseParameter("bool",			"%type = OpTypeBool"));
3172	cases.push_back(CaseParameter("sint32",			"%type = OpTypeInt 32 1"));
3173	cases.push_back(CaseParameter("uint32",			"%type = OpTypeInt 32 0"));
3174	cases.push_back(CaseParameter("float32",		"%type = OpTypeFloat 32"));
3175	cases.push_back(CaseParameter("vec4float32",	"%type = OpTypeVector %f32 4"));
3176	cases.push_back(CaseParameter("vec2uint32",		"%type = OpTypeVector %u32 2"));
3177	cases.push_back(CaseParameter("matrix",			"%type = OpTypeMatrix %fvec3 3"));
3178	cases.push_back(CaseParameter("image",			"%type = OpTypeImage %f32 2D 0 0 0 1 Unknown"));
3179	cases.push_back(CaseParameter("sampler",		"%type = OpTypeSampler"));
3180	cases.push_back(CaseParameter("sampledimage",	"%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
3181													"%type = OpTypeSampledImage %img"));
3182	cases.push_back(CaseParameter("array",			"%100 = OpConstant %u32 100\n"
3183													"%type = OpTypeArray %i32 %100"));
3184	cases.push_back(CaseParameter("runtimearray",	"%type = OpTypeRuntimeArray %f32"));
3185	cases.push_back(CaseParameter("struct",			"%type = OpTypeStruct %f32 %i32 %u32"));
3186	cases.push_back(CaseParameter("pointer",		"%type = OpTypePointer Function %i32"));
3187
3188	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
3189
3190	for (size_t ndx = 0; ndx < numElements; ++ndx)
3191		negativeFloats[ndx] = -positiveFloats[ndx];
3192
3193	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3194	{
3195		map<string, string>		specializations;
3196		ComputeShaderSpec		spec;
3197
3198		specializations["TYPE"] = cases[caseNdx].param;
3199		spec.assembly = shaderTemplate.specialize(specializations);
3200		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
3201		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
3202		spec.numWorkGroups = IVec3(numElements, 1, 1);
3203
3204		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
3205	}
3206
3207		return group.release();
3208}
3209typedef std::pair<std::string, VkShaderStageFlagBits>	EntryToStage;
3210typedef map<string, vector<EntryToStage> >				ModuleMap;
3211typedef map<VkShaderStageFlagBits, vector<deInt32> >	StageToSpecConstantMap;
3212
3213// Context for a specific test instantiation. For example, an instantiation
3214// may test colors yellow/magenta/cyan/mauve in a tesselation shader
3215// with an entry point named 'main_to_the_main'
3216struct InstanceContext
3217{
3218	// Map of modules to what entry_points we care to use from those modules.
3219	ModuleMap				moduleMap;
3220	RGBA					inputColors[4];
3221	RGBA					outputColors[4];
3222	// Concrete SPIR-V code to test via boilerplate specialization.
3223	map<string, string>		testCodeFragments;
3224	StageToSpecConstantMap	specConstants;
3225	bool					hasTessellation;
3226	VkShaderStageFlagBits	requiredStages;
3227
3228	InstanceContext (const RGBA (&inputs)[4], const RGBA (&outputs)[4], const map<string, string>& testCodeFragments_, const StageToSpecConstantMap& specConstants_)
3229		: testCodeFragments		(testCodeFragments_)
3230		, specConstants			(specConstants_)
3231		, hasTessellation		(false)
3232		, requiredStages		(static_cast<VkShaderStageFlagBits>(0))
3233	{
3234		inputColors[0]		= inputs[0];
3235		inputColors[1]		= inputs[1];
3236		inputColors[2]		= inputs[2];
3237		inputColors[3]		= inputs[3];
3238
3239		outputColors[0]		= outputs[0];
3240		outputColors[1]		= outputs[1];
3241		outputColors[2]		= outputs[2];
3242		outputColors[3]		= outputs[3];
3243	}
3244
3245	InstanceContext (const InstanceContext& other)
3246		: moduleMap			(other.moduleMap)
3247		, testCodeFragments	(other.testCodeFragments)
3248		, specConstants		(other.specConstants)
3249		, hasTessellation	(other.hasTessellation)
3250		, requiredStages    (other.requiredStages)
3251	{
3252		inputColors[0]		= other.inputColors[0];
3253		inputColors[1]		= other.inputColors[1];
3254		inputColors[2]		= other.inputColors[2];
3255		inputColors[3]		= other.inputColors[3];
3256
3257		outputColors[0]		= other.outputColors[0];
3258		outputColors[1]		= other.outputColors[1];
3259		outputColors[2]		= other.outputColors[2];
3260		outputColors[3]		= other.outputColors[3];
3261	}
3262};
3263
3264// A description of a shader to be used for a single stage of the graphics pipeline.
3265struct ShaderElement
3266{
3267	// The module that contains this shader entrypoint.
3268	string					moduleName;
3269
3270	// The name of the entrypoint.
3271	string					entryName;
3272
3273	// Which shader stage this entry point represents.
3274	VkShaderStageFlagBits	stage;
3275
3276	ShaderElement (const string& moduleName_, const string& entryPoint_, VkShaderStageFlagBits shaderStage_)
3277		: moduleName(moduleName_)
3278		, entryName(entryPoint_)
3279		, stage(shaderStage_)
3280	{
3281	}
3282};
3283
3284void getDefaultColors (RGBA (&colors)[4])
3285{
3286	colors[0] = RGBA::white();
3287	colors[1] = RGBA::red();
3288	colors[2] = RGBA::green();
3289	colors[3] = RGBA::blue();
3290}
3291
3292void getHalfColorsFullAlpha (RGBA (&colors)[4])
3293{
3294	colors[0] = RGBA(127, 127, 127, 255);
3295	colors[1] = RGBA(127, 0,   0,	255);
3296	colors[2] = RGBA(0,	  127, 0,	255);
3297	colors[3] = RGBA(0,	  0,   127, 255);
3298}
3299
3300void getInvertedDefaultColors (RGBA (&colors)[4])
3301{
3302	colors[0] = RGBA(0,		0,		0,		255);
3303	colors[1] = RGBA(0,		255,	255,	255);
3304	colors[2] = RGBA(255,	0,		255,	255);
3305	colors[3] = RGBA(255,	255,	0,		255);
3306}
3307
3308// Turns a statically sized array of ShaderElements into an instance-context
3309// by setting up the mapping of modules to their contained shaders and stages.
3310// The inputs and expected outputs are given by inputColors and outputColors
3311template<size_t N>
3312InstanceContext createInstanceContext (const ShaderElement (&elements)[N], const RGBA (&inputColors)[4], const RGBA (&outputColors)[4], const map<string, string>& testCodeFragments, const StageToSpecConstantMap& specConstants)
3313{
3314	InstanceContext ctx (inputColors, outputColors, testCodeFragments, specConstants);
3315	for (size_t i = 0; i < N; ++i)
3316	{
3317		ctx.moduleMap[elements[i].moduleName].push_back(std::make_pair(elements[i].entryName, elements[i].stage));
3318		ctx.requiredStages = static_cast<VkShaderStageFlagBits>(ctx.requiredStages | elements[i].stage);
3319	}
3320	return ctx;
3321}
3322
3323template<size_t N>
3324inline InstanceContext createInstanceContext (const ShaderElement (&elements)[N], RGBA (&inputColors)[4], const RGBA (&outputColors)[4], const map<string, string>& testCodeFragments)
3325{
3326	return createInstanceContext(elements, inputColors, outputColors, testCodeFragments, StageToSpecConstantMap());
3327}
3328
3329// The same as createInstanceContext above, but with default colors.
3330template<size_t N>
3331InstanceContext createInstanceContext (const ShaderElement (&elements)[N], const map<string, string>& testCodeFragments)
3332{
3333	RGBA defaultColors[4];
3334	getDefaultColors(defaultColors);
3335	return createInstanceContext(elements, defaultColors, defaultColors, testCodeFragments);
3336}
3337
3338// For the current InstanceContext, constructs the required modules and shader stage create infos.
3339void createPipelineShaderStages (const DeviceInterface& vk, const VkDevice vkDevice, InstanceContext& instance, Context& context, vector<ModuleHandleSp>& modules, vector<VkPipelineShaderStageCreateInfo>& createInfos)
3340{
3341	for (ModuleMap::const_iterator moduleNdx = instance.moduleMap.begin(); moduleNdx != instance.moduleMap.end(); ++moduleNdx)
3342	{
3343		const ModuleHandleSp mod(new Unique<VkShaderModule>(createShaderModule(vk, vkDevice, context.getBinaryCollection().get(moduleNdx->first), 0)));
3344		modules.push_back(ModuleHandleSp(mod));
3345		for (vector<EntryToStage>::const_iterator shaderNdx = moduleNdx->second.begin(); shaderNdx != moduleNdx->second.end(); ++shaderNdx)
3346		{
3347			const EntryToStage&						stage			= *shaderNdx;
3348			const VkPipelineShaderStageCreateInfo	shaderParam		=
3349			{
3350				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	//	VkStructureType			sType;
3351				DE_NULL,												//	const void*				pNext;
3352				(VkPipelineShaderStageCreateFlags)0,
3353				stage.second,											//	VkShaderStageFlagBits	stage;
3354				**modules.back(),										//	VkShaderModule			module;
3355				stage.first.c_str(),									//	const char*				pName;
3356				(const VkSpecializationInfo*)DE_NULL,
3357			};
3358			createInfos.push_back(shaderParam);
3359		}
3360	}
3361}
3362
3363#define SPIRV_ASSEMBLY_TYPES																	\
3364	"%void = OpTypeVoid\n"																		\
3365	"%bool = OpTypeBool\n"																		\
3366																								\
3367	"%i32 = OpTypeInt 32 1\n"																	\
3368	"%u32 = OpTypeInt 32 0\n"																	\
3369																								\
3370	"%f32 = OpTypeFloat 32\n"																	\
3371	"%v3f32 = OpTypeVector %f32 3\n"															\
3372	"%v4f32 = OpTypeVector %f32 4\n"															\
3373	"%v4bool = OpTypeVector %bool 4\n"															\
3374																								\
3375	"%v4f32_function = OpTypeFunction %v4f32 %v4f32\n"											\
3376	"%fun = OpTypeFunction %void\n"																\
3377																								\
3378	"%ip_f32 = OpTypePointer Input %f32\n"														\
3379	"%ip_i32 = OpTypePointer Input %i32\n"														\
3380	"%ip_v3f32 = OpTypePointer Input %v3f32\n"													\
3381	"%ip_v4f32 = OpTypePointer Input %v4f32\n"													\
3382																								\
3383	"%op_f32 = OpTypePointer Output %f32\n"														\
3384	"%op_v4f32 = OpTypePointer Output %v4f32\n"													\
3385																								\
3386	"%fp_f32   = OpTypePointer Function %f32\n"													\
3387	"%fp_i32   = OpTypePointer Function %i32\n"													\
3388	"%fp_v4f32 = OpTypePointer Function %v4f32\n"
3389
3390#define SPIRV_ASSEMBLY_CONSTANTS																\
3391	"%c_f32_1 = OpConstant %f32 1.0\n"															\
3392	"%c_f32_0 = OpConstant %f32 0.0\n"															\
3393	"%c_f32_0_5 = OpConstant %f32 0.5\n"														\
3394	"%c_f32_n1  = OpConstant %f32 -1.\n"														\
3395	"%c_f32_7 = OpConstant %f32 7.0\n"															\
3396	"%c_f32_8 = OpConstant %f32 8.0\n"															\
3397	"%c_i32_0 = OpConstant %i32 0\n"															\
3398	"%c_i32_1 = OpConstant %i32 1\n"															\
3399	"%c_i32_2 = OpConstant %i32 2\n"															\
3400	"%c_i32_3 = OpConstant %i32 3\n"															\
3401	"%c_i32_4 = OpConstant %i32 4\n"															\
3402	"%c_u32_0 = OpConstant %u32 0\n"															\
3403	"%c_u32_1 = OpConstant %u32 1\n"															\
3404	"%c_u32_2 = OpConstant %u32 2\n"															\
3405	"%c_u32_3 = OpConstant %u32 3\n"															\
3406	"%c_u32_32 = OpConstant %u32 32\n"															\
3407	"%c_u32_4 = OpConstant %u32 4\n"															\
3408	"%c_u32_31_bits = OpConstant %u32 0x7FFFFFFF\n"												\
3409	"%c_v4f32_1_1_1_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"		\
3410	"%c_v4f32_1_0_0_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_1\n"		\
3411	"%c_v4f32_0_5_0_5_0_5_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5\n"
3412
3413#define SPIRV_ASSEMBLY_ARRAYS																	\
3414	"%a1f32 = OpTypeArray %f32 %c_u32_1\n"														\
3415	"%a2f32 = OpTypeArray %f32 %c_u32_2\n"														\
3416	"%a3v4f32 = OpTypeArray %v4f32 %c_u32_3\n"													\
3417	"%a4f32 = OpTypeArray %f32 %c_u32_4\n"														\
3418	"%a32v4f32 = OpTypeArray %v4f32 %c_u32_32\n"												\
3419	"%ip_a3v4f32 = OpTypePointer Input %a3v4f32\n"												\
3420	"%ip_a32v4f32 = OpTypePointer Input %a32v4f32\n"											\
3421	"%op_a2f32 = OpTypePointer Output %a2f32\n"													\
3422	"%op_a3v4f32 = OpTypePointer Output %a3v4f32\n"												\
3423	"%op_a4f32 = OpTypePointer Output %a4f32\n"
3424
3425// Creates vertex-shader assembly by specializing a boilerplate StringTemplate
3426// on fragments, which must (at least) map "testfun" to an OpFunction definition
3427// for %test_code that takes and returns a %v4f32.  Boilerplate IDs are prefixed
3428// with "BP_" to avoid collisions with fragments.
3429//
3430// It corresponds roughly to this GLSL:
3431//;
3432// layout(location = 0) in vec4 position;
3433// layout(location = 1) in vec4 color;
3434// layout(location = 1) out highp vec4 vtxColor;
3435// void main (void) { gl_Position = position; vtxColor = test_func(color); }
3436string makeVertexShaderAssembly(const map<string, string>& fragments)
3437{
3438// \todo [2015-11-23 awoloszyn] Remove OpName once these have stabalized
3439	static const char vertexShaderBoilerplate[] =
3440		"OpCapability Shader\n"
3441		"OpCapability ClipDistance\n"
3442		"OpCapability CullDistance\n"
3443		"OpMemoryModel Logical GLSL450\n"
3444		"OpEntryPoint Vertex %main \"main\" %BP_stream %BP_position %BP_vtx_color %BP_color %BP_gl_VertexIndex %BP_gl_InstanceIndex\n"
3445		"${debug:opt}\n"
3446		"OpName %main \"main\"\n"
3447		"OpName %BP_gl_PerVertex \"gl_PerVertex\"\n"
3448		"OpMemberName %BP_gl_PerVertex 0 \"gl_Position\"\n"
3449		"OpMemberName %BP_gl_PerVertex 1 \"gl_PointSize\"\n"
3450		"OpMemberName %BP_gl_PerVertex 2 \"gl_ClipDistance\"\n"
3451		"OpMemberName %BP_gl_PerVertex 3 \"gl_CullDistance\"\n"
3452		"OpName %test_code \"testfun(vf4;\"\n"
3453		"OpName %BP_stream \"\"\n"
3454		"OpName %BP_position \"position\"\n"
3455		"OpName %BP_vtx_color \"vtxColor\"\n"
3456		"OpName %BP_color \"color\"\n"
3457		"OpName %BP_gl_VertexIndex \"gl_VertexIndex\"\n"
3458		"OpName %BP_gl_InstanceIndex \"gl_InstanceIndex\"\n"
3459		"OpMemberDecorate %BP_gl_PerVertex 0 BuiltIn Position\n"
3460		"OpMemberDecorate %BP_gl_PerVertex 1 BuiltIn PointSize\n"
3461		"OpMemberDecorate %BP_gl_PerVertex 2 BuiltIn ClipDistance\n"
3462		"OpMemberDecorate %BP_gl_PerVertex 3 BuiltIn CullDistance\n"
3463		"OpDecorate %BP_gl_PerVertex Block\n"
3464		"OpDecorate %BP_position Location 0\n"
3465		"OpDecorate %BP_vtx_color Location 1\n"
3466		"OpDecorate %BP_color Location 1\n"
3467		"OpDecorate %BP_gl_VertexIndex BuiltIn VertexIndex\n"
3468		"OpDecorate %BP_gl_InstanceIndex BuiltIn InstanceIndex\n"
3469		"${decoration:opt}\n"
3470		SPIRV_ASSEMBLY_TYPES
3471		SPIRV_ASSEMBLY_CONSTANTS
3472		SPIRV_ASSEMBLY_ARRAYS
3473		"%BP_gl_PerVertex = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
3474		"%BP_op_gl_PerVertex = OpTypePointer Output %BP_gl_PerVertex\n"
3475		"%BP_stream = OpVariable %BP_op_gl_PerVertex Output\n"
3476		"%BP_position = OpVariable %ip_v4f32 Input\n"
3477		"%BP_vtx_color = OpVariable %op_v4f32 Output\n"
3478		"%BP_color = OpVariable %ip_v4f32 Input\n"
3479		"%BP_gl_VertexIndex = OpVariable %ip_i32 Input\n"
3480		"%BP_gl_InstanceIndex = OpVariable %ip_i32 Input\n"
3481		"${pre_main:opt}\n"
3482		"%main = OpFunction %void None %fun\n"
3483		"%BP_label = OpLabel\n"
3484		"%BP_pos = OpLoad %v4f32 %BP_position\n"
3485		"%BP_gl_pos = OpAccessChain %op_v4f32 %BP_stream %c_i32_0\n"
3486		"OpStore %BP_gl_pos %BP_pos\n"
3487		"%BP_col = OpLoad %v4f32 %BP_color\n"
3488		"%BP_col_transformed = OpFunctionCall %v4f32 %test_code %BP_col\n"
3489		"OpStore %BP_vtx_color %BP_col_transformed\n"
3490		"OpReturn\n"
3491		"OpFunctionEnd\n"
3492		"${testfun}\n";
3493	return tcu::StringTemplate(vertexShaderBoilerplate).specialize(fragments);
3494}
3495
3496// Creates tess-control-shader assembly by specializing a boilerplate
3497// StringTemplate on fragments, which must (at least) map "testfun" to an
3498// OpFunction definition for %test_code that takes and returns a %v4f32.
3499// Boilerplate IDs are prefixed with "BP_" to avoid collisions with fragments.
3500//
3501// It roughly corresponds to the following GLSL.
3502//
3503// #version 450
3504// layout(vertices = 3) out;
3505// layout(location = 1) in vec4 in_color[];
3506// layout(location = 1) out vec4 out_color[];
3507//
3508// void main() {
3509//   out_color[gl_InvocationID] = testfun(in_color[gl_InvocationID]);
3510//   gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
3511//   if (gl_InvocationID == 0) {
3512//     gl_TessLevelOuter[0] = 1.0;
3513//     gl_TessLevelOuter[1] = 1.0;
3514//     gl_TessLevelOuter[2] = 1.0;
3515//     gl_TessLevelInner[0] = 1.0;
3516//   }
3517// }
3518string makeTessControlShaderAssembly (const map<string, string>& fragments)
3519{
3520	static const char tessControlShaderBoilerplate[] =
3521		"OpCapability Tessellation\n"
3522		"OpCapability ClipDistance\n"
3523		"OpCapability CullDistance\n"
3524		"OpMemoryModel Logical GLSL450\n"
3525		"OpEntryPoint TessellationControl %BP_main \"main\" %BP_out_color %BP_gl_InvocationID %BP_in_color %BP_gl_out %BP_gl_in %BP_gl_TessLevelOuter %BP_gl_TessLevelInner\n"
3526		"OpExecutionMode %BP_main OutputVertices 3\n"
3527		"${debug:opt}\n"
3528		"OpName %BP_main \"main\"\n"
3529		"OpName %test_code \"testfun(vf4;\"\n"
3530		"OpName %BP_out_color \"out_color\"\n"
3531		"OpName %BP_gl_InvocationID \"gl_InvocationID\"\n"
3532		"OpName %BP_in_color \"in_color\"\n"
3533		"OpName %BP_gl_PerVertex \"gl_PerVertex\"\n"
3534		"OpMemberName %BP_gl_PerVertex 0 \"gl_Position\"\n"
3535		"OpMemberName %BP_gl_PerVertex 1 \"gl_PointSize\"\n"
3536		"OpMemberName %BP_gl_PerVertex 2 \"gl_ClipDistance\"\n"
3537		"OpMemberName %BP_gl_PerVertex 3 \"gl_CullDistance\"\n"
3538		"OpName %BP_gl_out \"gl_out\"\n"
3539		"OpName %BP_gl_PVOut \"gl_PerVertex\"\n"
3540		"OpMemberName %BP_gl_PVOut 0 \"gl_Position\"\n"
3541		"OpMemberName %BP_gl_PVOut 1 \"gl_PointSize\"\n"
3542		"OpMemberName %BP_gl_PVOut 2 \"gl_ClipDistance\"\n"
3543		"OpMemberName %BP_gl_PVOut 3 \"gl_CullDistance\"\n"
3544		"OpName %BP_gl_in \"gl_in\"\n"
3545		"OpName %BP_gl_TessLevelOuter \"gl_TessLevelOuter\"\n"
3546		"OpName %BP_gl_TessLevelInner \"gl_TessLevelInner\"\n"
3547		"OpDecorate %BP_out_color Location 1\n"
3548		"OpDecorate %BP_gl_InvocationID BuiltIn InvocationId\n"
3549		"OpDecorate %BP_in_color Location 1\n"
3550		"OpMemberDecorate %BP_gl_PerVertex 0 BuiltIn Position\n"
3551		"OpMemberDecorate %BP_gl_PerVertex 1 BuiltIn PointSize\n"
3552		"OpMemberDecorate %BP_gl_PerVertex 2 BuiltIn ClipDistance\n"
3553		"OpMemberDecorate %BP_gl_PerVertex 3 BuiltIn CullDistance\n"
3554		"OpDecorate %BP_gl_PerVertex Block\n"
3555		"OpMemberDecorate %BP_gl_PVOut 0 BuiltIn Position\n"
3556		"OpMemberDecorate %BP_gl_PVOut 1 BuiltIn PointSize\n"
3557		"OpMemberDecorate %BP_gl_PVOut 2 BuiltIn ClipDistance\n"
3558		"OpMemberDecorate %BP_gl_PVOut 3 BuiltIn CullDistance\n"
3559		"OpDecorate %BP_gl_PVOut Block\n"
3560		"OpDecorate %BP_gl_TessLevelOuter Patch\n"
3561		"OpDecorate %BP_gl_TessLevelOuter BuiltIn TessLevelOuter\n"
3562		"OpDecorate %BP_gl_TessLevelInner Patch\n"
3563		"OpDecorate %BP_gl_TessLevelInner BuiltIn TessLevelInner\n"
3564		"${decoration:opt}\n"
3565		SPIRV_ASSEMBLY_TYPES
3566		SPIRV_ASSEMBLY_CONSTANTS
3567		SPIRV_ASSEMBLY_ARRAYS
3568		"%BP_out_color = OpVariable %op_a3v4f32 Output\n"
3569		"%BP_gl_InvocationID = OpVariable %ip_i32 Input\n"
3570		"%BP_in_color = OpVariable %ip_a32v4f32 Input\n"
3571		"%BP_gl_PerVertex = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
3572		"%BP_a3_gl_PerVertex = OpTypeArray %BP_gl_PerVertex %c_u32_3\n"
3573		"%BP_op_a3_gl_PerVertex = OpTypePointer Output %BP_a3_gl_PerVertex\n"
3574		"%BP_gl_out = OpVariable %BP_op_a3_gl_PerVertex Output\n"
3575		"%BP_gl_PVOut = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
3576		"%BP_a32_gl_PVOut = OpTypeArray %BP_gl_PVOut %c_u32_32\n"
3577		"%BP_ip_a32_gl_PVOut = OpTypePointer Input %BP_a32_gl_PVOut\n"
3578		"%BP_gl_in = OpVariable %BP_ip_a32_gl_PVOut Input\n"
3579		"%BP_gl_TessLevelOuter = OpVariable %op_a4f32 Output\n"
3580		"%BP_gl_TessLevelInner = OpVariable %op_a2f32 Output\n"
3581		"${pre_main:opt}\n"
3582
3583		"%BP_main = OpFunction %void None %fun\n"
3584		"%BP_label = OpLabel\n"
3585
3586		"%BP_gl_Invoc = OpLoad %i32 %BP_gl_InvocationID\n"
3587
3588		"%BP_in_col_loc = OpAccessChain %ip_v4f32 %BP_in_color %BP_gl_Invoc\n"
3589		"%BP_out_col_loc = OpAccessChain %op_v4f32 %BP_out_color %BP_gl_Invoc\n"
3590		"%BP_in_col_val = OpLoad %v4f32 %BP_in_col_loc\n"
3591		"%BP_clr_transformed = OpFunctionCall %v4f32 %test_code %BP_in_col_val\n"
3592		"OpStore %BP_out_col_loc %BP_clr_transformed\n"
3593
3594		"%BP_in_pos_loc = OpAccessChain %ip_v4f32 %BP_gl_in %BP_gl_Invoc %c_i32_0\n"
3595		"%BP_out_pos_loc = OpAccessChain %op_v4f32 %BP_gl_out %BP_gl_Invoc %c_i32_0\n"
3596		"%BP_in_pos_val = OpLoad %v4f32 %BP_in_pos_loc\n"
3597		"OpStore %BP_out_pos_loc %BP_in_pos_val\n"
3598
3599		"%BP_cmp = OpIEqual %bool %BP_gl_Invoc %c_i32_0\n"
3600		"OpSelectionMerge %BP_merge_label None\n"
3601		"OpBranchConditional %BP_cmp %BP_if_label %BP_merge_label\n"
3602		"%BP_if_label = OpLabel\n"
3603		"%BP_gl_TessLevelOuterPos_0 = OpAccessChain %op_f32 %BP_gl_TessLevelOuter %c_i32_0\n"
3604		"%BP_gl_TessLevelOuterPos_1 = OpAccessChain %op_f32 %BP_gl_TessLevelOuter %c_i32_1\n"
3605		"%BP_gl_TessLevelOuterPos_2 = OpAccessChain %op_f32 %BP_gl_TessLevelOuter %c_i32_2\n"
3606		"%BP_gl_TessLevelInnerPos_0 = OpAccessChain %op_f32 %BP_gl_TessLevelInner %c_i32_0\n"
3607		"OpStore %BP_gl_TessLevelOuterPos_0 %c_f32_1\n"
3608		"OpStore %BP_gl_TessLevelOuterPos_1 %c_f32_1\n"
3609		"OpStore %BP_gl_TessLevelOuterPos_2 %c_f32_1\n"
3610		"OpStore %BP_gl_TessLevelInnerPos_0 %c_f32_1\n"
3611		"OpBranch %BP_merge_label\n"
3612		"%BP_merge_label = OpLabel\n"
3613		"OpReturn\n"
3614		"OpFunctionEnd\n"
3615		"${testfun}\n";
3616	return tcu::StringTemplate(tessControlShaderBoilerplate).specialize(fragments);
3617}
3618
3619// Creates tess-evaluation-shader assembly by specializing a boilerplate
3620// StringTemplate on fragments, which must (at least) map "testfun" to an
3621// OpFunction definition for %test_code that takes and returns a %v4f32.
3622// Boilerplate IDs are prefixed with "BP_" to avoid collisions with fragments.
3623//
3624// It roughly corresponds to the following glsl.
3625//
3626// #version 450
3627//
3628// layout(triangles, equal_spacing, ccw) in;
3629// layout(location = 1) in vec4 in_color[];
3630// layout(location = 1) out vec4 out_color;
3631//
3632// #define interpolate(val)
3633//   vec4(gl_TessCoord.x) * val[0] + vec4(gl_TessCoord.y) * val[1] +
3634//          vec4(gl_TessCoord.z) * val[2]
3635//
3636// void main() {
3637//   gl_Position = vec4(gl_TessCoord.x) * gl_in[0].gl_Position +
3638//                  vec4(gl_TessCoord.y) * gl_in[1].gl_Position +
3639//                  vec4(gl_TessCoord.z) * gl_in[2].gl_Position;
3640//   out_color = testfun(interpolate(in_color));
3641// }
3642string makeTessEvalShaderAssembly(const map<string, string>& fragments)
3643{
3644	static const char tessEvalBoilerplate[] =
3645		"OpCapability Tessellation\n"
3646		"OpCapability ClipDistance\n"
3647		"OpCapability CullDistance\n"
3648		"OpMemoryModel Logical GLSL450\n"
3649		"OpEntryPoint TessellationEvaluation %BP_main \"main\" %BP_stream %BP_gl_TessCoord %BP_gl_in %BP_out_color %BP_in_color\n"
3650		"OpExecutionMode %BP_main Triangles\n"
3651		"OpExecutionMode %BP_main SpacingEqual\n"
3652		"OpExecutionMode %BP_main VertexOrderCcw\n"
3653		"${debug:opt}\n"
3654		"OpName %BP_main \"main\"\n"
3655		"OpName %test_code \"testfun(vf4;\"\n"
3656		"OpName %BP_gl_PerVertexOut \"gl_PerVertex\"\n"
3657		"OpMemberName %BP_gl_PerVertexOut 0 \"gl_Position\"\n"
3658		"OpMemberName %BP_gl_PerVertexOut 1 \"gl_PointSize\"\n"
3659		"OpMemberName %BP_gl_PerVertexOut 2 \"gl_ClipDistance\"\n"
3660		"OpMemberName %BP_gl_PerVertexOut 3 \"gl_CullDistance\"\n"
3661		"OpName %BP_stream \"\"\n"
3662		"OpName %BP_gl_TessCoord \"gl_TessCoord\"\n"
3663		"OpName %BP_gl_PerVertexIn \"gl_PerVertex\"\n"
3664		"OpMemberName %BP_gl_PerVertexIn 0 \"gl_Position\"\n"
3665		"OpMemberName %BP_gl_PerVertexIn 1 \"gl_PointSize\"\n"
3666		"OpMemberName %BP_gl_PerVertexIn 2 \"gl_ClipDistance\"\n"
3667		"OpMemberName %BP_gl_PerVertexIn 3 \"gl_CullDistance\"\n"
3668		"OpName %BP_gl_in \"gl_in\"\n"
3669		"OpName %BP_out_color \"out_color\"\n"
3670		"OpName %BP_in_color \"in_color\"\n"
3671		"OpMemberDecorate %BP_gl_PerVertexOut 0 BuiltIn Position\n"
3672		"OpMemberDecorate %BP_gl_PerVertexOut 1 BuiltIn PointSize\n"
3673		"OpMemberDecorate %BP_gl_PerVertexOut 2 BuiltIn ClipDistance\n"
3674		"OpMemberDecorate %BP_gl_PerVertexOut 3 BuiltIn CullDistance\n"
3675		"OpDecorate %BP_gl_PerVertexOut Block\n"
3676		"OpDecorate %BP_gl_TessCoord BuiltIn TessCoord\n"
3677		"OpMemberDecorate %BP_gl_PerVertexIn 0 BuiltIn Position\n"
3678		"OpMemberDecorate %BP_gl_PerVertexIn 1 BuiltIn PointSize\n"
3679		"OpMemberDecorate %BP_gl_PerVertexIn 2 BuiltIn ClipDistance\n"
3680		"OpMemberDecorate %BP_gl_PerVertexIn 3 BuiltIn CullDistance\n"
3681		"OpDecorate %BP_gl_PerVertexIn Block\n"
3682		"OpDecorate %BP_out_color Location 1\n"
3683		"OpDecorate %BP_in_color Location 1\n"
3684		"${decoration:opt}\n"
3685		SPIRV_ASSEMBLY_TYPES
3686		SPIRV_ASSEMBLY_CONSTANTS
3687		SPIRV_ASSEMBLY_ARRAYS
3688		"%BP_gl_PerVertexOut = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
3689		"%BP_op_gl_PerVertexOut = OpTypePointer Output %BP_gl_PerVertexOut\n"
3690		"%BP_stream = OpVariable %BP_op_gl_PerVertexOut Output\n"
3691		"%BP_gl_TessCoord = OpVariable %ip_v3f32 Input\n"
3692		"%BP_gl_PerVertexIn = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
3693		"%BP_a32_gl_PerVertexIn = OpTypeArray %BP_gl_PerVertexIn %c_u32_32\n"
3694		"%BP_ip_a32_gl_PerVertexIn = OpTypePointer Input %BP_a32_gl_PerVertexIn\n"
3695		"%BP_gl_in = OpVariable %BP_ip_a32_gl_PerVertexIn Input\n"
3696		"%BP_out_color = OpVariable %op_v4f32 Output\n"
3697		"%BP_in_color = OpVariable %ip_a32v4f32 Input\n"
3698		"${pre_main:opt}\n"
3699		"%BP_main = OpFunction %void None %fun\n"
3700		"%BP_label = OpLabel\n"
3701		"%BP_gl_TC_0 = OpAccessChain %ip_f32 %BP_gl_TessCoord %c_u32_0\n"
3702		"%BP_gl_TC_1 = OpAccessChain %ip_f32 %BP_gl_TessCoord %c_u32_1\n"
3703		"%BP_gl_TC_2 = OpAccessChain %ip_f32 %BP_gl_TessCoord %c_u32_2\n"
3704		"%BP_gl_in_gl_Pos_0 = OpAccessChain %ip_v4f32 %BP_gl_in %c_i32_0 %c_i32_0\n"
3705		"%BP_gl_in_gl_Pos_1 = OpAccessChain %ip_v4f32 %BP_gl_in %c_i32_1 %c_i32_0\n"
3706		"%BP_gl_in_gl_Pos_2 = OpAccessChain %ip_v4f32 %BP_gl_in %c_i32_2 %c_i32_0\n"
3707
3708		"%BP_gl_OPos = OpAccessChain %op_v4f32 %BP_stream %c_i32_0\n"
3709		"%BP_in_color_0 = OpAccessChain %ip_v4f32 %BP_in_color %c_i32_0\n"
3710		"%BP_in_color_1 = OpAccessChain %ip_v4f32 %BP_in_color %c_i32_1\n"
3711		"%BP_in_color_2 = OpAccessChain %ip_v4f32 %BP_in_color %c_i32_2\n"
3712
3713		"%BP_TC_W_0 = OpLoad %f32 %BP_gl_TC_0\n"
3714		"%BP_TC_W_1 = OpLoad %f32 %BP_gl_TC_1\n"
3715		"%BP_TC_W_2 = OpLoad %f32 %BP_gl_TC_2\n"
3716		"%BP_v4f32_TC_0 = OpCompositeConstruct %v4f32 %BP_TC_W_0 %BP_TC_W_0 %BP_TC_W_0 %BP_TC_W_0\n"
3717		"%BP_v4f32_TC_1 = OpCompositeConstruct %v4f32 %BP_TC_W_1 %BP_TC_W_1 %BP_TC_W_1 %BP_TC_W_1\n"
3718		"%BP_v4f32_TC_2 = OpCompositeConstruct %v4f32 %BP_TC_W_2 %BP_TC_W_2 %BP_TC_W_2 %BP_TC_W_2\n"
3719
3720		"%BP_gl_IP_0 = OpLoad %v4f32 %BP_gl_in_gl_Pos_0\n"
3721		"%BP_gl_IP_1 = OpLoad %v4f32 %BP_gl_in_gl_Pos_1\n"
3722		"%BP_gl_IP_2 = OpLoad %v4f32 %BP_gl_in_gl_Pos_2\n"
3723
3724		"%BP_IP_W_0 = OpFMul %v4f32 %BP_v4f32_TC_0 %BP_gl_IP_0\n"
3725		"%BP_IP_W_1 = OpFMul %v4f32 %BP_v4f32_TC_1 %BP_gl_IP_1\n"
3726		"%BP_IP_W_2 = OpFMul %v4f32 %BP_v4f32_TC_2 %BP_gl_IP_2\n"
3727
3728		"%BP_pos_sum_0 = OpFAdd %v4f32 %BP_IP_W_0 %BP_IP_W_1\n"
3729		"%BP_pos_sum_1 = OpFAdd %v4f32 %BP_pos_sum_0 %BP_IP_W_2\n"
3730
3731		"OpStore %BP_gl_OPos %BP_pos_sum_1\n"
3732
3733		"%BP_IC_0 = OpLoad %v4f32 %BP_in_color_0\n"
3734		"%BP_IC_1 = OpLoad %v4f32 %BP_in_color_1\n"
3735		"%BP_IC_2 = OpLoad %v4f32 %BP_in_color_2\n"
3736
3737		"%BP_IC_W_0 = OpFMul %v4f32 %BP_v4f32_TC_0 %BP_IC_0\n"
3738		"%BP_IC_W_1 = OpFMul %v4f32 %BP_v4f32_TC_1 %BP_IC_1\n"
3739		"%BP_IC_W_2 = OpFMul %v4f32 %BP_v4f32_TC_2 %BP_IC_2\n"
3740
3741		"%BP_col_sum_0 = OpFAdd %v4f32 %BP_IC_W_0 %BP_IC_W_1\n"
3742		"%BP_col_sum_1 = OpFAdd %v4f32 %BP_col_sum_0 %BP_IC_W_2\n"
3743
3744		"%BP_clr_transformed = OpFunctionCall %v4f32 %test_code %BP_col_sum_1\n"
3745
3746		"OpStore %BP_out_color %BP_clr_transformed\n"
3747		"OpReturn\n"
3748		"OpFunctionEnd\n"
3749		"${testfun}\n";
3750	return tcu::StringTemplate(tessEvalBoilerplate).specialize(fragments);
3751}
3752
3753// Creates geometry-shader assembly by specializing a boilerplate StringTemplate
3754// on fragments, which must (at least) map "testfun" to an OpFunction definition
3755// for %test_code that takes and returns a %v4f32.  Boilerplate IDs are prefixed
3756// with "BP_" to avoid collisions with fragments.
3757//
3758// Derived from this GLSL:
3759//
3760// #version 450
3761// layout(triangles) in;
3762// layout(triangle_strip, max_vertices = 3) out;
3763//
3764// layout(location = 1) in vec4 in_color[];
3765// layout(location = 1) out vec4 out_color;
3766//
3767// void main() {
3768//   gl_Position = gl_in[0].gl_Position;
3769//   out_color = test_fun(in_color[0]);
3770//   EmitVertex();
3771//   gl_Position = gl_in[1].gl_Position;
3772//   out_color = test_fun(in_color[1]);
3773//   EmitVertex();
3774//   gl_Position = gl_in[2].gl_Position;
3775//   out_color = test_fun(in_color[2]);
3776//   EmitVertex();
3777//   EndPrimitive();
3778// }
3779string makeGeometryShaderAssembly(const map<string, string>& fragments)
3780{
3781	static const char geometryShaderBoilerplate[] =
3782		"OpCapability Geometry\n"
3783		"OpCapability ClipDistance\n"
3784		"OpCapability CullDistance\n"
3785		"OpMemoryModel Logical GLSL450\n"
3786		"OpEntryPoint Geometry %BP_main \"main\" %BP_out_gl_position %BP_gl_in %BP_out_color %BP_in_color\n"
3787		"OpExecutionMode %BP_main Triangles\n"
3788		"OpExecutionMode %BP_main OutputTriangleStrip\n"
3789		"OpExecutionMode %BP_main OutputVertices 3\n"
3790		"${debug:opt}\n"
3791		"OpName %BP_main \"main\"\n"
3792		"OpName %BP_per_vertex_in \"gl_PerVertex\"\n"
3793		"OpMemberName %BP_per_vertex_in 0 \"gl_Position\"\n"
3794		"OpMemberName %BP_per_vertex_in 1 \"gl_PointSize\"\n"
3795		"OpMemberName %BP_per_vertex_in 2 \"gl_ClipDistance\"\n"
3796		"OpMemberName %BP_per_vertex_in 3 \"gl_CullDistance\"\n"
3797		"OpName %BP_gl_in \"gl_in\"\n"
3798		"OpName %BP_out_color \"out_color\"\n"
3799		"OpName %BP_in_color \"in_color\"\n"
3800		"OpName %test_code \"testfun(vf4;\"\n"
3801		"OpDecorate %BP_out_gl_position BuiltIn Position\n"
3802		"OpMemberDecorate %BP_per_vertex_in 0 BuiltIn Position\n"
3803		"OpMemberDecorate %BP_per_vertex_in 1 BuiltIn PointSize\n"
3804		"OpMemberDecorate %BP_per_vertex_in 2 BuiltIn ClipDistance\n"
3805		"OpMemberDecorate %BP_per_vertex_in 3 BuiltIn CullDistance\n"
3806		"OpDecorate %BP_per_vertex_in Block\n"
3807		"OpDecorate %BP_out_color Location 1\n"
3808		"OpDecorate %BP_in_color Location 1\n"
3809		"${decoration:opt}\n"
3810		SPIRV_ASSEMBLY_TYPES
3811		SPIRV_ASSEMBLY_CONSTANTS
3812		SPIRV_ASSEMBLY_ARRAYS
3813		"%BP_per_vertex_in = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
3814		"%BP_a3_per_vertex_in = OpTypeArray %BP_per_vertex_in %c_u32_3\n"
3815		"%BP_ip_a3_per_vertex_in = OpTypePointer Input %BP_a3_per_vertex_in\n"
3816
3817		"%BP_gl_in = OpVariable %BP_ip_a3_per_vertex_in Input\n"
3818		"%BP_out_color = OpVariable %op_v4f32 Output\n"
3819		"%BP_in_color = OpVariable %ip_a3v4f32 Input\n"
3820		"%BP_out_gl_position = OpVariable %op_v4f32 Output\n"
3821		"${pre_main:opt}\n"
3822
3823		"%BP_main = OpFunction %void None %fun\n"
3824		"%BP_label = OpLabel\n"
3825		"%BP_gl_in_0_gl_position = OpAccessChain %ip_v4f32 %BP_gl_in %c_i32_0 %c_i32_0\n"
3826		"%BP_gl_in_1_gl_position = OpAccessChain %ip_v4f32 %BP_gl_in %c_i32_1 %c_i32_0\n"
3827		"%BP_gl_in_2_gl_position = OpAccessChain %ip_v4f32 %BP_gl_in %c_i32_2 %c_i32_0\n"
3828
3829		"%BP_in_position_0 = OpLoad %v4f32 %BP_gl_in_0_gl_position\n"
3830		"%BP_in_position_1 = OpLoad %v4f32 %BP_gl_in_1_gl_position\n"
3831		"%BP_in_position_2 = OpLoad %v4f32 %BP_gl_in_2_gl_position \n"
3832
3833		"%BP_in_color_0_ptr = OpAccessChain %ip_v4f32 %BP_in_color %c_i32_0\n"
3834		"%BP_in_color_1_ptr = OpAccessChain %ip_v4f32 %BP_in_color %c_i32_1\n"
3835		"%BP_in_color_2_ptr = OpAccessChain %ip_v4f32 %BP_in_color %c_i32_2\n"
3836
3837		"%BP_in_color_0 = OpLoad %v4f32 %BP_in_color_0_ptr\n"
3838		"%BP_in_color_1 = OpLoad %v4f32 %BP_in_color_1_ptr\n"
3839		"%BP_in_color_2 = OpLoad %v4f32 %BP_in_color_2_ptr\n"
3840
3841		"%BP_transformed_in_color_0 = OpFunctionCall %v4f32 %test_code %BP_in_color_0\n"
3842		"%BP_transformed_in_color_1 = OpFunctionCall %v4f32 %test_code %BP_in_color_1\n"
3843		"%BP_transformed_in_color_2 = OpFunctionCall %v4f32 %test_code %BP_in_color_2\n"
3844
3845
3846		"OpStore %BP_out_gl_position %BP_in_position_0\n"
3847		"OpStore %BP_out_color %BP_transformed_in_color_0\n"
3848		"OpEmitVertex\n"
3849
3850		"OpStore %BP_out_gl_position %BP_in_position_1\n"
3851		"OpStore %BP_out_color %BP_transformed_in_color_1\n"
3852		"OpEmitVertex\n"
3853
3854		"OpStore %BP_out_gl_position %BP_in_position_2\n"
3855		"OpStore %BP_out_color %BP_transformed_in_color_2\n"
3856		"OpEmitVertex\n"
3857
3858		"OpEndPrimitive\n"
3859		"OpReturn\n"
3860		"OpFunctionEnd\n"
3861		"${testfun}\n";
3862	return tcu::StringTemplate(geometryShaderBoilerplate).specialize(fragments);
3863}
3864
3865// Creates fragment-shader assembly by specializing a boilerplate StringTemplate
3866// on fragments, which must (at least) map "testfun" to an OpFunction definition
3867// for %test_code that takes and returns a %v4f32.  Boilerplate IDs are prefixed
3868// with "BP_" to avoid collisions with fragments.
3869//
3870// Derived from this GLSL:
3871//
3872// layout(location = 1) in highp vec4 vtxColor;
3873// layout(location = 0) out highp vec4 fragColor;
3874// highp vec4 testfun(highp vec4 x) { return x; }
3875// void main(void) { fragColor = testfun(vtxColor); }
3876//
3877// with modifications including passing vtxColor by value and ripping out
3878// testfun() definition.
3879string makeFragmentShaderAssembly(const map<string, string>& fragments)
3880{
3881	static const char fragmentShaderBoilerplate[] =
3882		"OpCapability Shader\n"
3883		"OpMemoryModel Logical GLSL450\n"
3884		"OpEntryPoint Fragment %BP_main \"main\" %BP_vtxColor %BP_fragColor\n"
3885		"OpExecutionMode %BP_main OriginUpperLeft\n"
3886		"${debug:opt}\n"
3887		"OpName %BP_main \"main\"\n"
3888		"OpName %BP_fragColor \"fragColor\"\n"
3889		"OpName %BP_vtxColor \"vtxColor\"\n"
3890		"OpName %test_code \"testfun(vf4;\"\n"
3891		"OpDecorate %BP_fragColor Location 0\n"
3892		"OpDecorate %BP_vtxColor Location 1\n"
3893		"${decoration:opt}\n"
3894		SPIRV_ASSEMBLY_TYPES
3895		SPIRV_ASSEMBLY_CONSTANTS
3896		SPIRV_ASSEMBLY_ARRAYS
3897		"%BP_fragColor = OpVariable %op_v4f32 Output\n"
3898		"%BP_vtxColor = OpVariable %ip_v4f32 Input\n"
3899		"${pre_main:opt}\n"
3900		"%BP_main = OpFunction %void None %fun\n"
3901		"%BP_label_main = OpLabel\n"
3902		"%BP_tmp1 = OpLoad %v4f32 %BP_vtxColor\n"
3903		"%BP_tmp2 = OpFunctionCall %v4f32 %test_code %BP_tmp1\n"
3904		"OpStore %BP_fragColor %BP_tmp2\n"
3905		"OpReturn\n"
3906		"OpFunctionEnd\n"
3907		"${testfun}\n";
3908	return tcu::StringTemplate(fragmentShaderBoilerplate).specialize(fragments);
3909}
3910
3911// Creates fragments that specialize into a simple pass-through shader (of any kind).
3912map<string, string> passthruFragments(void)
3913{
3914	map<string, string> fragments;
3915	fragments["testfun"] =
3916		// A %test_code function that returns its argument unchanged.
3917		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
3918		"%param1 = OpFunctionParameter %v4f32\n"
3919		"%label_testfun = OpLabel\n"
3920		"OpReturnValue %param1\n"
3921		"OpFunctionEnd\n";
3922	return fragments;
3923}
3924
3925// Adds shader assembly text to dst.spirvAsmSources for all shader kinds.
3926// Vertex shader gets custom code from context, the rest are pass-through.
3927void addShaderCodeCustomVertex(vk::SourceCollections& dst, InstanceContext context)
3928{
3929	map<string, string> passthru = passthruFragments();
3930	dst.spirvAsmSources.add("vert") << makeVertexShaderAssembly(context.testCodeFragments);
3931	dst.spirvAsmSources.add("frag") << makeFragmentShaderAssembly(passthru);
3932}
3933
3934// Adds shader assembly text to dst.spirvAsmSources for all shader kinds.
3935// Tessellation control shader gets custom code from context, the rest are
3936// pass-through.
3937void addShaderCodeCustomTessControl(vk::SourceCollections& dst, InstanceContext context)
3938{
3939	map<string, string> passthru = passthruFragments();
3940	dst.spirvAsmSources.add("vert") << makeVertexShaderAssembly(passthru);
3941	dst.spirvAsmSources.add("tessc") << makeTessControlShaderAssembly(context.testCodeFragments);
3942	dst.spirvAsmSources.add("tesse") << makeTessEvalShaderAssembly(passthru);
3943	dst.spirvAsmSources.add("frag") << makeFragmentShaderAssembly(passthru);
3944}
3945
3946// Adds shader assembly text to dst.spirvAsmSources for all shader kinds.
3947// Tessellation evaluation shader gets custom code from context, the rest are
3948// pass-through.
3949void addShaderCodeCustomTessEval(vk::SourceCollections& dst, InstanceContext context)
3950{
3951	map<string, string> passthru = passthruFragments();
3952	dst.spirvAsmSources.add("vert") << makeVertexShaderAssembly(passthru);
3953	dst.spirvAsmSources.add("tessc") << makeTessControlShaderAssembly(passthru);
3954	dst.spirvAsmSources.add("tesse") << makeTessEvalShaderAssembly(context.testCodeFragments);
3955	dst.spirvAsmSources.add("frag") << makeFragmentShaderAssembly(passthru);
3956}
3957
3958// Adds shader assembly text to dst.spirvAsmSources for all shader kinds.
3959// Geometry shader gets custom code from context, the rest are pass-through.
3960void addShaderCodeCustomGeometry(vk::SourceCollections& dst, InstanceContext context)
3961{
3962	map<string, string> passthru = passthruFragments();
3963	dst.spirvAsmSources.add("vert") << makeVertexShaderAssembly(passthru);
3964	dst.spirvAsmSources.add("geom") << makeGeometryShaderAssembly(context.testCodeFragments);
3965	dst.spirvAsmSources.add("frag") << makeFragmentShaderAssembly(passthru);
3966}
3967
3968// Adds shader assembly text to dst.spirvAsmSources for all shader kinds.
3969// Fragment shader gets custom code from context, the rest are pass-through.
3970void addShaderCodeCustomFragment(vk::SourceCollections& dst, InstanceContext context)
3971{
3972	map<string, string> passthru = passthruFragments();
3973	dst.spirvAsmSources.add("vert") << makeVertexShaderAssembly(passthru);
3974	dst.spirvAsmSources.add("frag") << makeFragmentShaderAssembly(context.testCodeFragments);
3975}
3976
3977void createCombinedModule(vk::SourceCollections& dst, InstanceContext)
3978{
3979	// \todo [2015-12-07 awoloszyn] Make tessellation / geometry conditional
3980	// \todo [2015-12-07 awoloszyn] Remove OpName and OpMemberName at some point
3981	dst.spirvAsmSources.add("module") <<
3982		"OpCapability Shader\n"
3983		"OpCapability ClipDistance\n"
3984		"OpCapability CullDistance\n"
3985		"OpCapability Geometry\n"
3986		"OpCapability Tessellation\n"
3987		"OpMemoryModel Logical GLSL450\n"
3988
3989		"OpEntryPoint Vertex %vert_main \"main\" %vert_Position %vert_vtxColor %vert_color %vert_vtxPosition %vert_vertex_id %vert_instance_id\n"
3990		"OpEntryPoint Geometry %geom_main \"main\" %geom_out_gl_position %geom_gl_in %geom_out_color %geom_in_color\n"
3991		"OpEntryPoint TessellationControl %tessc_main \"main\" %tessc_out_color %tessc_gl_InvocationID %tessc_in_color %tessc_out_position %tessc_in_position %tessc_gl_TessLevelOuter %tessc_gl_TessLevelInner\n"
3992		"OpEntryPoint TessellationEvaluation %tesse_main \"main\" %tesse_stream %tesse_gl_tessCoord %tesse_in_position %tesse_out_color %tesse_in_color \n"
3993		"OpEntryPoint Fragment %frag_main \"main\" %frag_vtxColor %frag_fragColor\n"
3994
3995		"OpExecutionMode %geom_main Triangles\n"
3996		"OpExecutionMode %geom_main OutputTriangleStrip\n"
3997		"OpExecutionMode %geom_main OutputVertices 3\n"
3998
3999		"OpExecutionMode %tessc_main OutputVertices 3\n"
4000
4001		"OpExecutionMode %tesse_main Triangles\n"
4002
4003		"OpExecutionMode %frag_main OriginUpperLeft\n"
4004
4005		"OpName %vert_main \"main\"\n"
4006		"OpName %vert_vtxPosition \"vtxPosition\"\n"
4007		"OpName %vert_Position \"position\"\n"
4008		"OpName %vert_vtxColor \"vtxColor\"\n"
4009		"OpName %vert_color \"color\"\n"
4010		"OpName %vert_vertex_id \"gl_VertexIndex\"\n"
4011		"OpName %vert_instance_id \"gl_InstanceIndex\"\n"
4012		"OpName %geom_main \"main\"\n"
4013		"OpName %geom_per_vertex_in \"gl_PerVertex\"\n"
4014		"OpMemberName %geom_per_vertex_in 0 \"gl_Position\"\n"
4015		"OpMemberName %geom_per_vertex_in 1 \"gl_PointSize\"\n"
4016		"OpMemberName %geom_per_vertex_in 2 \"gl_ClipDistance\"\n"
4017		"OpMemberName %geom_per_vertex_in 3 \"gl_CullDistance\"\n"
4018		"OpName %geom_gl_in \"gl_in\"\n"
4019		"OpName %geom_out_color \"out_color\"\n"
4020		"OpName %geom_in_color \"in_color\"\n"
4021		"OpName %tessc_main \"main\"\n"
4022		"OpName %tessc_out_color \"out_color\"\n"
4023		"OpName %tessc_gl_InvocationID \"gl_InvocationID\"\n"
4024		"OpName %tessc_in_color \"in_color\"\n"
4025		"OpName %tessc_out_position \"out_position\"\n"
4026		"OpName %tessc_in_position \"in_position\"\n"
4027		"OpName %tessc_gl_TessLevelOuter \"gl_TessLevelOuter\"\n"
4028		"OpName %tessc_gl_TessLevelInner \"gl_TessLevelInner\"\n"
4029		"OpName %tesse_main \"main\"\n"
4030		"OpName %tesse_per_vertex_out \"gl_PerVertex\"\n"
4031		"OpMemberName %tesse_per_vertex_out 0 \"gl_Position\"\n"
4032		"OpMemberName %tesse_per_vertex_out 1 \"gl_PointSize\"\n"
4033		"OpMemberName %tesse_per_vertex_out 2 \"gl_ClipDistance\"\n"
4034		"OpMemberName %tesse_per_vertex_out 3 \"gl_CullDistance\"\n"
4035		"OpName %tesse_stream \"\"\n"
4036		"OpName %tesse_gl_tessCoord \"gl_TessCoord\"\n"
4037		"OpName %tesse_in_position \"in_position\"\n"
4038		"OpName %tesse_out_color \"out_color\"\n"
4039		"OpName %tesse_in_color \"in_color\"\n"
4040		"OpName %frag_main \"main\"\n"
4041		"OpName %frag_fragColor \"fragColor\"\n"
4042		"OpName %frag_vtxColor \"vtxColor\"\n"
4043
4044		"; Vertex decorations\n"
4045		"OpDecorate %vert_vtxPosition Location 2\n"
4046		"OpDecorate %vert_Position Location 0\n"
4047		"OpDecorate %vert_vtxColor Location 1\n"
4048		"OpDecorate %vert_color Location 1\n"
4049		"OpDecorate %vert_vertex_id BuiltIn VertexIndex\n"
4050		"OpDecorate %vert_instance_id BuiltIn InstanceIndex\n"
4051
4052		"; Geometry decorations\n"
4053		"OpDecorate %geom_out_gl_position BuiltIn Position\n"
4054		"OpMemberDecorate %geom_per_vertex_in 0 BuiltIn Position\n"
4055		"OpMemberDecorate %geom_per_vertex_in 1 BuiltIn PointSize\n"
4056		"OpMemberDecorate %geom_per_vertex_in 2 BuiltIn ClipDistance\n"
4057		"OpMemberDecorate %geom_per_vertex_in 3 BuiltIn CullDistance\n"
4058		"OpDecorate %geom_per_vertex_in Block\n"
4059		"OpDecorate %geom_out_color Location 1\n"
4060		"OpDecorate %geom_in_color Location 1\n"
4061
4062		"; Tessellation Control decorations\n"
4063		"OpDecorate %tessc_out_color Location 1\n"
4064		"OpDecorate %tessc_gl_InvocationID BuiltIn InvocationId\n"
4065		"OpDecorate %tessc_in_color Location 1\n"
4066		"OpDecorate %tessc_out_position Location 2\n"
4067		"OpDecorate %tessc_in_position Location 2\n"
4068		"OpDecorate %tessc_gl_TessLevelOuter Patch\n"
4069		"OpDecorate %tessc_gl_TessLevelOuter BuiltIn TessLevelOuter\n"
4070		"OpDecorate %tessc_gl_TessLevelInner Patch\n"
4071		"OpDecorate %tessc_gl_TessLevelInner BuiltIn TessLevelInner\n"
4072
4073		"; Tessellation Evaluation decorations\n"
4074		"OpMemberDecorate %tesse_per_vertex_out 0 BuiltIn Position\n"
4075		"OpMemberDecorate %tesse_per_vertex_out 1 BuiltIn PointSize\n"
4076		"OpMemberDecorate %tesse_per_vertex_out 2 BuiltIn ClipDistance\n"
4077		"OpMemberDecorate %tesse_per_vertex_out 3 BuiltIn CullDistance\n"
4078		"OpDecorate %tesse_per_vertex_out Block\n"
4079		"OpDecorate %tesse_gl_tessCoord BuiltIn TessCoord\n"
4080		"OpDecorate %tesse_in_position Location 2\n"
4081		"OpDecorate %tesse_out_color Location 1\n"
4082		"OpDecorate %tesse_in_color Location 1\n"
4083
4084		"; Fragment decorations\n"
4085		"OpDecorate %frag_fragColor Location 0\n"
4086		"OpDecorate %frag_vtxColor Location 1\n"
4087
4088		SPIRV_ASSEMBLY_TYPES
4089		SPIRV_ASSEMBLY_CONSTANTS
4090		SPIRV_ASSEMBLY_ARRAYS
4091
4092		"; Vertex Variables\n"
4093		"%vert_vtxPosition = OpVariable %op_v4f32 Output\n"
4094		"%vert_Position = OpVariable %ip_v4f32 Input\n"
4095		"%vert_vtxColor = OpVariable %op_v4f32 Output\n"
4096		"%vert_color = OpVariable %ip_v4f32 Input\n"
4097		"%vert_vertex_id = OpVariable %ip_i32 Input\n"
4098		"%vert_instance_id = OpVariable %ip_i32 Input\n"
4099
4100		"; Geometry Variables\n"
4101		"%geom_per_vertex_in = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
4102		"%geom_a3_per_vertex_in = OpTypeArray %geom_per_vertex_in %c_u32_3\n"
4103		"%geom_ip_a3_per_vertex_in = OpTypePointer Input %geom_a3_per_vertex_in\n"
4104		"%geom_gl_in = OpVariable %geom_ip_a3_per_vertex_in Input\n"
4105		"%geom_out_color = OpVariable %op_v4f32 Output\n"
4106		"%geom_in_color = OpVariable %ip_a3v4f32 Input\n"
4107		"%geom_out_gl_position = OpVariable %op_v4f32 Output\n"
4108
4109		"; Tessellation Control Variables\n"
4110		"%tessc_out_color = OpVariable %op_a3v4f32 Output\n"
4111		"%tessc_gl_InvocationID = OpVariable %ip_i32 Input\n"
4112		"%tessc_in_color = OpVariable %ip_a32v4f32 Input\n"
4113		"%tessc_out_position = OpVariable %op_a3v4f32 Output\n"
4114		"%tessc_in_position = OpVariable %ip_a32v4f32 Input\n"
4115		"%tessc_gl_TessLevelOuter = OpVariable %op_a4f32 Output\n"
4116		"%tessc_gl_TessLevelInner = OpVariable %op_a2f32 Output\n"
4117
4118		"; Tessellation Evaluation Decorations\n"
4119		"%tesse_per_vertex_out = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
4120		"%tesse_op_per_vertex_out = OpTypePointer Output %tesse_per_vertex_out\n"
4121		"%tesse_stream = OpVariable %tesse_op_per_vertex_out Output\n"
4122		"%tesse_gl_tessCoord = OpVariable %ip_v3f32 Input\n"
4123		"%tesse_in_position = OpVariable %ip_a32v4f32 Input\n"
4124		"%tesse_out_color = OpVariable %op_v4f32 Output\n"
4125		"%tesse_in_color = OpVariable %ip_a32v4f32 Input\n"
4126
4127		"; Fragment Variables\n"
4128		"%frag_fragColor = OpVariable %op_v4f32 Output\n"
4129		"%frag_vtxColor = OpVariable %ip_v4f32 Input\n"
4130
4131		"; Vertex Entry\n"
4132		"%vert_main = OpFunction %void None %fun\n"
4133		"%vert_label = OpLabel\n"
4134		"%vert_tmp_position = OpLoad %v4f32 %vert_Position\n"
4135		"OpStore %vert_vtxPosition %vert_tmp_position\n"
4136		"%vert_tmp_color = OpLoad %v4f32 %vert_color\n"
4137		"OpStore %vert_vtxColor %vert_tmp_color\n"
4138		"OpReturn\n"
4139		"OpFunctionEnd\n"
4140
4141		"; Geometry Entry\n"
4142		"%geom_main = OpFunction %void None %fun\n"
4143		"%geom_label = OpLabel\n"
4144		"%geom_gl_in_0_gl_position = OpAccessChain %ip_v4f32 %geom_gl_in %c_i32_0 %c_i32_0\n"
4145		"%geom_gl_in_1_gl_position = OpAccessChain %ip_v4f32 %geom_gl_in %c_i32_1 %c_i32_0\n"
4146		"%geom_gl_in_2_gl_position = OpAccessChain %ip_v4f32 %geom_gl_in %c_i32_2 %c_i32_0\n"
4147		"%geom_in_position_0 = OpLoad %v4f32 %geom_gl_in_0_gl_position\n"
4148		"%geom_in_position_1 = OpLoad %v4f32 %geom_gl_in_1_gl_position\n"
4149		"%geom_in_position_2 = OpLoad %v4f32 %geom_gl_in_2_gl_position \n"
4150		"%geom_in_color_0_ptr = OpAccessChain %ip_v4f32 %geom_in_color %c_i32_0\n"
4151		"%geom_in_color_1_ptr = OpAccessChain %ip_v4f32 %geom_in_color %c_i32_1\n"
4152		"%geom_in_color_2_ptr = OpAccessChain %ip_v4f32 %geom_in_color %c_i32_2\n"
4153		"%geom_in_color_0 = OpLoad %v4f32 %geom_in_color_0_ptr\n"
4154		"%geom_in_color_1 = OpLoad %v4f32 %geom_in_color_1_ptr\n"
4155		"%geom_in_color_2 = OpLoad %v4f32 %geom_in_color_2_ptr\n"
4156		"OpStore %geom_out_gl_position %geom_in_position_0\n"
4157		"OpStore %geom_out_color %geom_in_color_0\n"
4158		"OpEmitVertex\n"
4159		"OpStore %geom_out_gl_position %geom_in_position_1\n"
4160		"OpStore %geom_out_color %geom_in_color_1\n"
4161		"OpEmitVertex\n"
4162		"OpStore %geom_out_gl_position %geom_in_position_2\n"
4163		"OpStore %geom_out_color %geom_in_color_2\n"
4164		"OpEmitVertex\n"
4165		"OpEndPrimitive\n"
4166		"OpReturn\n"
4167		"OpFunctionEnd\n"
4168
4169		"; Tessellation Control Entry\n"
4170		"%tessc_main = OpFunction %void None %fun\n"
4171		"%tessc_label = OpLabel\n"
4172		"%tessc_invocation_id = OpLoad %i32 %tessc_gl_InvocationID\n"
4173		"%tessc_in_color_ptr = OpAccessChain %ip_v4f32 %tessc_in_color %tessc_invocation_id\n"
4174		"%tessc_in_position_ptr = OpAccessChain %ip_v4f32 %tessc_in_position %tessc_invocation_id\n"
4175		"%tessc_in_color_val = OpLoad %v4f32 %tessc_in_color_ptr\n"
4176		"%tessc_in_position_val = OpLoad %v4f32 %tessc_in_position_ptr\n"
4177		"%tessc_out_color_ptr = OpAccessChain %op_v4f32 %tessc_out_color %tessc_invocation_id\n"
4178		"%tessc_out_position_ptr = OpAccessChain %op_v4f32 %tessc_out_position %tessc_invocation_id\n"
4179		"OpStore %tessc_out_color_ptr %tessc_in_color_val\n"
4180		"OpStore %tessc_out_position_ptr %tessc_in_position_val\n"
4181		"%tessc_is_first_invocation = OpIEqual %bool %tessc_invocation_id %c_i32_0\n"
4182		"OpSelectionMerge %tessc_merge_label None\n"
4183		"OpBranchConditional %tessc_is_first_invocation %tessc_first_invocation %tessc_merge_label\n"
4184		"%tessc_first_invocation = OpLabel\n"
4185		"%tessc_tess_outer_0 = OpAccessChain %op_f32 %tessc_gl_TessLevelOuter %c_i32_0\n"
4186		"%tessc_tess_outer_1 = OpAccessChain %op_f32 %tessc_gl_TessLevelOuter %c_i32_1\n"
4187		"%tessc_tess_outer_2 = OpAccessChain %op_f32 %tessc_gl_TessLevelOuter %c_i32_2\n"
4188		"%tessc_tess_inner = OpAccessChain %op_f32 %tessc_gl_TessLevelInner %c_i32_0\n"
4189		"OpStore %tessc_tess_outer_0 %c_f32_1\n"
4190		"OpStore %tessc_tess_outer_1 %c_f32_1\n"
4191		"OpStore %tessc_tess_outer_2 %c_f32_1\n"
4192		"OpStore %tessc_tess_inner %c_f32_1\n"
4193		"OpBranch %tessc_merge_label\n"
4194		"%tessc_merge_label = OpLabel\n"
4195		"OpReturn\n"
4196		"OpFunctionEnd\n"
4197
4198		"; Tessellation Evaluation Entry\n"
4199		"%tesse_main = OpFunction %void None %fun\n"
4200		"%tesse_label = OpLabel\n"
4201		"%tesse_tc_0_ptr = OpAccessChain %ip_f32 %tesse_gl_tessCoord %c_u32_0\n"
4202		"%tesse_tc_1_ptr = OpAccessChain %ip_f32 %tesse_gl_tessCoord %c_u32_1\n"
4203		"%tesse_tc_2_ptr = OpAccessChain %ip_f32 %tesse_gl_tessCoord %c_u32_2\n"
4204		"%tesse_tc_0 = OpLoad %f32 %tesse_tc_0_ptr\n"
4205		"%tesse_tc_1 = OpLoad %f32 %tesse_tc_1_ptr\n"
4206		"%tesse_tc_2 = OpLoad %f32 %tesse_tc_2_ptr\n"
4207		"%tesse_in_pos_0_ptr = OpAccessChain %ip_v4f32 %tesse_in_position %c_i32_0\n"
4208		"%tesse_in_pos_1_ptr = OpAccessChain %ip_v4f32 %tesse_in_position %c_i32_1\n"
4209		"%tesse_in_pos_2_ptr = OpAccessChain %ip_v4f32 %tesse_in_position %c_i32_2\n"
4210		"%tesse_in_pos_0 = OpLoad %v4f32 %tesse_in_pos_0_ptr\n"
4211		"%tesse_in_pos_1 = OpLoad %v4f32 %tesse_in_pos_1_ptr\n"
4212		"%tesse_in_pos_2 = OpLoad %v4f32 %tesse_in_pos_2_ptr\n"
4213		"%tesse_in_pos_0_weighted = OpVectorTimesScalar %v4f32 %tesse_tc_0 %tesse_in_pos_0\n"
4214		"%tesse_in_pos_1_weighted = OpVectorTimesScalar %v4f32 %tesse_tc_1 %tesse_in_pos_1\n"
4215		"%tesse_in_pos_2_weighted = OpVectorTimesScalar %v4f32 %tesse_tc_2 %tesse_in_pos_2\n"
4216		"%tesse_out_pos_ptr = OpAccessChain %op_v4f32 %tesse_stream %c_i32_0\n"
4217		"%tesse_in_pos_0_plus_pos_1 = OpFAdd %v4f32 %tesse_in_pos_0_weighted %tesse_in_pos_1_weighted\n"
4218		"%tesse_computed_out = OpFAdd %v4f32 %tesse_in_pos_0_plus_pos_1 %tesse_in_pos_2_weighted\n"
4219		"OpStore %tesse_out_pos_ptr %tesse_computed_out\n"
4220		"%tesse_in_clr_0_ptr = OpAccessChain %ip_v4f32 %tesse_in_color %c_i32_0\n"
4221		"%tesse_in_clr_1_ptr = OpAccessChain %ip_v4f32 %tesse_in_color %c_i32_1\n"
4222		"%tesse_in_clr_2_ptr = OpAccessChain %ip_v4f32 %tesse_in_color %c_i32_2\n"
4223		"%tesse_in_clr_0 = OpLoad %v4f32 %tesse_in_clr_0_ptr\n"
4224		"%tesse_in_clr_1 = OpLoad %v4f32 %tesse_in_clr_1_ptr\n"
4225		"%tesse_in_clr_2 = OpLoad %v4f32 %tesse_in_clr_2_ptr\n"
4226		"%tesse_in_clr_0_weighted = OpVectorTimesScalar %v4f32 %tesse_tc_0 %tesse_in_clr_0\n"
4227		"%tesse_in_clr_1_weighted = OpVectorTimesScalar %v4f32 %tesse_tc_1 %tesse_in_clr_1\n"
4228		"%tesse_in_clr_2_weighted = OpVectorTimesScalar %v4f32 %tesse_tc_2 %tesse_in_clr_2\n"
4229		"%tesse_in_clr_0_plus_col_1 = OpFAdd %v4f32 %tesse_in_clr_0_weighted %tesse_in_clr_1_weighted\n"
4230		"%tesse_computed_clr = OpFAdd %v4f32 %tesse_in_clr_0_plus_col_1 %tesse_in_clr_2_weighted\n"
4231		"OpStore %tesse_out_color %tesse_computed_clr\n"
4232		"OpReturn\n"
4233		"OpFunctionEnd\n"
4234
4235		"; Fragment Entry\n"
4236		"%frag_main = OpFunction %void None %fun\n"
4237		"%frag_label_main = OpLabel\n"
4238		"%frag_tmp1 = OpLoad %v4f32 %frag_vtxColor\n"
4239		"OpStore %frag_fragColor %frag_tmp1\n"
4240		"OpReturn\n"
4241		"OpFunctionEnd\n";
4242}
4243
4244// This has two shaders of each stage. The first
4245// is a passthrough, the second inverts the color.
4246void createMultipleEntries(vk::SourceCollections& dst, InstanceContext)
4247{
4248	dst.spirvAsmSources.add("vert") <<
4249	// This module contains 2 vertex shaders. One that is a passthrough
4250	// and a second that inverts the color of the output (1.0 - color).
4251		"OpCapability Shader\n"
4252		"OpMemoryModel Logical GLSL450\n"
4253		"OpEntryPoint Vertex %main \"vert1\" %Position %vtxColor %color %vtxPosition %vertex_id %instance_id\n"
4254		"OpEntryPoint Vertex %main2 \"vert2\" %Position %vtxColor %color %vtxPosition %vertex_id %instance_id\n"
4255
4256		"OpName %main \"vert1\"\n"
4257		"OpName %main2 \"vert2\"\n"
4258		"OpName %vtxPosition \"vtxPosition\"\n"
4259		"OpName %Position \"position\"\n"
4260		"OpName %vtxColor \"vtxColor\"\n"
4261		"OpName %color \"color\"\n"
4262		"OpName %vertex_id \"gl_VertexIndex\"\n"
4263		"OpName %instance_id \"gl_InstanceIndex\"\n"
4264
4265		"OpDecorate %vtxPosition Location 2\n"
4266		"OpDecorate %Position Location 0\n"
4267		"OpDecorate %vtxColor Location 1\n"
4268		"OpDecorate %color Location 1\n"
4269		"OpDecorate %vertex_id BuiltIn VertexIndex\n"
4270		"OpDecorate %instance_id BuiltIn InstanceIndex\n"
4271		SPIRV_ASSEMBLY_TYPES
4272		SPIRV_ASSEMBLY_CONSTANTS
4273		SPIRV_ASSEMBLY_ARRAYS
4274		"%cval = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
4275		"%vtxPosition = OpVariable %op_v4f32 Output\n"
4276		"%Position = OpVariable %ip_v4f32 Input\n"
4277		"%vtxColor = OpVariable %op_v4f32 Output\n"
4278		"%color = OpVariable %ip_v4f32 Input\n"
4279		"%vertex_id = OpVariable %ip_i32 Input\n"
4280		"%instance_id = OpVariable %ip_i32 Input\n"
4281
4282		"%main = OpFunction %void None %fun\n"
4283		"%label = OpLabel\n"
4284		"%tmp_position = OpLoad %v4f32 %Position\n"
4285		"OpStore %vtxPosition %tmp_position\n"
4286		"%tmp_color = OpLoad %v4f32 %color\n"
4287		"OpStore %vtxColor %tmp_color\n"
4288		"OpReturn\n"
4289		"OpFunctionEnd\n"
4290
4291		"%main2 = OpFunction %void None %fun\n"
4292		"%label2 = OpLabel\n"
4293		"%tmp_position2 = OpLoad %v4f32 %Position\n"
4294		"OpStore %vtxPosition %tmp_position2\n"
4295		"%tmp_color2 = OpLoad %v4f32 %color\n"
4296		"%tmp_color3 = OpFSub %v4f32 %cval %tmp_color2\n"
4297		"%tmp_color4 = OpVectorInsertDynamic %v4f32 %tmp_color3 %c_f32_1 %c_i32_3\n"
4298		"OpStore %vtxColor %tmp_color4\n"
4299		"OpReturn\n"
4300		"OpFunctionEnd\n";
4301
4302	dst.spirvAsmSources.add("frag") <<
4303		// This is a single module that contains 2 fragment shaders.
4304		// One that passes color through and the other that inverts the output
4305		// color (1.0 - color).
4306		"OpCapability Shader\n"
4307		"OpMemoryModel Logical GLSL450\n"
4308		"OpEntryPoint Fragment %main \"frag1\" %vtxColor %fragColor\n"
4309		"OpEntryPoint Fragment %main2 \"frag2\" %vtxColor %fragColor\n"
4310		"OpExecutionMode %main OriginUpperLeft\n"
4311		"OpExecutionMode %main2 OriginUpperLeft\n"
4312
4313		"OpName %main \"frag1\"\n"
4314		"OpName %main2 \"frag2\"\n"
4315		"OpName %fragColor \"fragColor\"\n"
4316		"OpName %vtxColor \"vtxColor\"\n"
4317		"OpDecorate %fragColor Location 0\n"
4318		"OpDecorate %vtxColor Location 1\n"
4319		SPIRV_ASSEMBLY_TYPES
4320		SPIRV_ASSEMBLY_CONSTANTS
4321		SPIRV_ASSEMBLY_ARRAYS
4322		"%cval = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
4323		"%fragColor = OpVariable %op_v4f32 Output\n"
4324		"%vtxColor = OpVariable %ip_v4f32 Input\n"
4325
4326		"%main = OpFunction %void None %fun\n"
4327		"%label_main = OpLabel\n"
4328		"%tmp1 = OpLoad %v4f32 %vtxColor\n"
4329		"OpStore %fragColor %tmp1\n"
4330		"OpReturn\n"
4331		"OpFunctionEnd\n"
4332
4333		"%main2 = OpFunction %void None %fun\n"
4334		"%label_main2 = OpLabel\n"
4335		"%tmp2 = OpLoad %v4f32 %vtxColor\n"
4336		"%tmp3 = OpFSub %v4f32 %cval %tmp2\n"
4337		"%tmp4 = OpVectorInsertDynamic %v4f32 %tmp3 %c_f32_1 %c_i32_3\n"
4338		"OpStore %fragColor %tmp4\n"
4339		"OpReturn\n"
4340		"OpFunctionEnd\n";
4341
4342	dst.spirvAsmSources.add("geom") <<
4343		"OpCapability Geometry\n"
4344		"OpCapability ClipDistance\n"
4345		"OpCapability CullDistance\n"
4346		"OpMemoryModel Logical GLSL450\n"
4347		"OpEntryPoint Geometry %geom1_main \"geom1\" %out_gl_position %gl_in %out_color %in_color\n"
4348		"OpEntryPoint Geometry %geom2_main \"geom2\" %out_gl_position %gl_in %out_color %in_color\n"
4349		"OpExecutionMode %geom1_main Triangles\n"
4350		"OpExecutionMode %geom2_main Triangles\n"
4351		"OpExecutionMode %geom1_main OutputTriangleStrip\n"
4352		"OpExecutionMode %geom2_main OutputTriangleStrip\n"
4353		"OpExecutionMode %geom1_main OutputVertices 3\n"
4354		"OpExecutionMode %geom2_main OutputVertices 3\n"
4355		"OpName %geom1_main \"geom1\"\n"
4356		"OpName %geom2_main \"geom2\"\n"
4357		"OpName %per_vertex_in \"gl_PerVertex\"\n"
4358		"OpMemberName %per_vertex_in 0 \"gl_Position\"\n"
4359		"OpMemberName %per_vertex_in 1 \"gl_PointSize\"\n"
4360		"OpMemberName %per_vertex_in 2 \"gl_ClipDistance\"\n"
4361		"OpMemberName %per_vertex_in 3 \"gl_CullDistance\"\n"
4362		"OpName %gl_in \"gl_in\"\n"
4363		"OpName %out_color \"out_color\"\n"
4364		"OpName %in_color \"in_color\"\n"
4365		"OpDecorate %out_gl_position BuiltIn Position\n"
4366		"OpMemberDecorate %per_vertex_in 0 BuiltIn Position\n"
4367		"OpMemberDecorate %per_vertex_in 1 BuiltIn PointSize\n"
4368		"OpMemberDecorate %per_vertex_in 2 BuiltIn ClipDistance\n"
4369		"OpMemberDecorate %per_vertex_in 3 BuiltIn CullDistance\n"
4370		"OpDecorate %per_vertex_in Block\n"
4371		"OpDecorate %out_color Location 1\n"
4372		"OpDecorate %in_color Location 1\n"
4373		SPIRV_ASSEMBLY_TYPES
4374		SPIRV_ASSEMBLY_CONSTANTS
4375		SPIRV_ASSEMBLY_ARRAYS
4376		"%cval = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
4377		"%per_vertex_in = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
4378		"%a3_per_vertex_in = OpTypeArray %per_vertex_in %c_u32_3\n"
4379		"%ip_a3_per_vertex_in = OpTypePointer Input %a3_per_vertex_in\n"
4380		"%gl_in = OpVariable %ip_a3_per_vertex_in Input\n"
4381		"%out_color = OpVariable %op_v4f32 Output\n"
4382		"%in_color = OpVariable %ip_a3v4f32 Input\n"
4383		"%out_gl_position = OpVariable %op_v4f32 Output\n"
4384
4385		"%geom1_main = OpFunction %void None %fun\n"
4386		"%geom1_label = OpLabel\n"
4387		"%geom1_gl_in_0_gl_position = OpAccessChain %ip_v4f32 %gl_in %c_i32_0 %c_i32_0\n"
4388		"%geom1_gl_in_1_gl_position = OpAccessChain %ip_v4f32 %gl_in %c_i32_1 %c_i32_0\n"
4389		"%geom1_gl_in_2_gl_position = OpAccessChain %ip_v4f32 %gl_in %c_i32_2 %c_i32_0\n"
4390		"%geom1_in_position_0 = OpLoad %v4f32 %geom1_gl_in_0_gl_position\n"
4391		"%geom1_in_position_1 = OpLoad %v4f32 %geom1_gl_in_1_gl_position\n"
4392		"%geom1_in_position_2 = OpLoad %v4f32 %geom1_gl_in_2_gl_position \n"
4393		"%geom1_in_color_0_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_0\n"
4394		"%geom1_in_color_1_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_1\n"
4395		"%geom1_in_color_2_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_2\n"
4396		"%geom1_in_color_0 = OpLoad %v4f32 %geom1_in_color_0_ptr\n"
4397		"%geom1_in_color_1 = OpLoad %v4f32 %geom1_in_color_1_ptr\n"
4398		"%geom1_in_color_2 = OpLoad %v4f32 %geom1_in_color_2_ptr\n"
4399		"OpStore %out_gl_position %geom1_in_position_0\n"
4400		"OpStore %out_color %geom1_in_color_0\n"
4401		"OpEmitVertex\n"
4402		"OpStore %out_gl_position %geom1_in_position_1\n"
4403		"OpStore %out_color %geom1_in_color_1\n"
4404		"OpEmitVertex\n"
4405		"OpStore %out_gl_position %geom1_in_position_2\n"
4406		"OpStore %out_color %geom1_in_color_2\n"
4407		"OpEmitVertex\n"
4408		"OpEndPrimitive\n"
4409		"OpReturn\n"
4410		"OpFunctionEnd\n"
4411
4412		"%geom2_main = OpFunction %void None %fun\n"
4413		"%geom2_label = OpLabel\n"
4414		"%geom2_gl_in_0_gl_position = OpAccessChain %ip_v4f32 %gl_in %c_i32_0 %c_i32_0\n"
4415		"%geom2_gl_in_1_gl_position = OpAccessChain %ip_v4f32 %gl_in %c_i32_1 %c_i32_0\n"
4416		"%geom2_gl_in_2_gl_position = OpAccessChain %ip_v4f32 %gl_in %c_i32_2 %c_i32_0\n"
4417		"%geom2_in_position_0 = OpLoad %v4f32 %geom2_gl_in_0_gl_position\n"
4418		"%geom2_in_position_1 = OpLoad %v4f32 %geom2_gl_in_1_gl_position\n"
4419		"%geom2_in_position_2 = OpLoad %v4f32 %geom2_gl_in_2_gl_position \n"
4420		"%geom2_in_color_0_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_0\n"
4421		"%geom2_in_color_1_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_1\n"
4422		"%geom2_in_color_2_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_2\n"
4423		"%geom2_in_color_0 = OpLoad %v4f32 %geom2_in_color_0_ptr\n"
4424		"%geom2_in_color_1 = OpLoad %v4f32 %geom2_in_color_1_ptr\n"
4425		"%geom2_in_color_2 = OpLoad %v4f32 %geom2_in_color_2_ptr\n"
4426		"%geom2_transformed_in_color_0 = OpFSub %v4f32 %cval %geom2_in_color_0\n"
4427		"%geom2_transformed_in_color_1 = OpFSub %v4f32 %cval %geom2_in_color_1\n"
4428		"%geom2_transformed_in_color_2 = OpFSub %v4f32 %cval %geom2_in_color_2\n"
4429		"%geom2_transformed_in_color_0_a = OpVectorInsertDynamic %v4f32 %geom2_transformed_in_color_0 %c_f32_1 %c_i32_3\n"
4430		"%geom2_transformed_in_color_1_a = OpVectorInsertDynamic %v4f32 %geom2_transformed_in_color_1 %c_f32_1 %c_i32_3\n"
4431		"%geom2_transformed_in_color_2_a = OpVectorInsertDynamic %v4f32 %geom2_transformed_in_color_2 %c_f32_1 %c_i32_3\n"
4432		"OpStore %out_gl_position %geom2_in_position_0\n"
4433		"OpStore %out_color %geom2_transformed_in_color_0_a\n"
4434		"OpEmitVertex\n"
4435		"OpStore %out_gl_position %geom2_in_position_1\n"
4436		"OpStore %out_color %geom2_transformed_in_color_1_a\n"
4437		"OpEmitVertex\n"
4438		"OpStore %out_gl_position %geom2_in_position_2\n"
4439		"OpStore %out_color %geom2_transformed_in_color_2_a\n"
4440		"OpEmitVertex\n"
4441		"OpEndPrimitive\n"
4442		"OpReturn\n"
4443		"OpFunctionEnd\n";
4444
4445	dst.spirvAsmSources.add("tessc") <<
4446		"OpCapability Tessellation\n"
4447		"OpMemoryModel Logical GLSL450\n"
4448		"OpEntryPoint TessellationControl %tessc1_main \"tessc1\" %out_color %gl_InvocationID %in_color %out_position %in_position %gl_TessLevelOuter %gl_TessLevelInner\n"
4449		"OpEntryPoint TessellationControl %tessc2_main \"tessc2\" %out_color %gl_InvocationID %in_color %out_position %in_position %gl_TessLevelOuter %gl_TessLevelInner\n"
4450		"OpExecutionMode %tessc1_main OutputVertices 3\n"
4451		"OpExecutionMode %tessc2_main OutputVertices 3\n"
4452		"OpName %tessc1_main \"tessc1\"\n"
4453		"OpName %tessc2_main \"tessc2\"\n"
4454		"OpName %out_color \"out_color\"\n"
4455		"OpName %gl_InvocationID \"gl_InvocationID\"\n"
4456		"OpName %in_color \"in_color\"\n"
4457		"OpName %out_position \"out_position\"\n"
4458		"OpName %in_position \"in_position\"\n"
4459		"OpName %gl_TessLevelOuter \"gl_TessLevelOuter\"\n"
4460		"OpName %gl_TessLevelInner \"gl_TessLevelInner\"\n"
4461		"OpDecorate %out_color Location 1\n"
4462		"OpDecorate %gl_InvocationID BuiltIn InvocationId\n"
4463		"OpDecorate %in_color Location 1\n"
4464		"OpDecorate %out_position Location 2\n"
4465		"OpDecorate %in_position Location 2\n"
4466		"OpDecorate %gl_TessLevelOuter Patch\n"
4467		"OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter\n"
4468		"OpDecorate %gl_TessLevelInner Patch\n"
4469		"OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner\n"
4470		SPIRV_ASSEMBLY_TYPES
4471		SPIRV_ASSEMBLY_CONSTANTS
4472		SPIRV_ASSEMBLY_ARRAYS
4473		"%cval = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
4474		"%out_color = OpVariable %op_a3v4f32 Output\n"
4475		"%gl_InvocationID = OpVariable %ip_i32 Input\n"
4476		"%in_color = OpVariable %ip_a32v4f32 Input\n"
4477		"%out_position = OpVariable %op_a3v4f32 Output\n"
4478		"%in_position = OpVariable %ip_a32v4f32 Input\n"
4479		"%gl_TessLevelOuter = OpVariable %op_a4f32 Output\n"
4480		"%gl_TessLevelInner = OpVariable %op_a2f32 Output\n"
4481
4482		"%tessc1_main = OpFunction %void None %fun\n"
4483		"%tessc1_label = OpLabel\n"
4484		"%tessc1_invocation_id = OpLoad %i32 %gl_InvocationID\n"
4485		"%tessc1_in_color_ptr = OpAccessChain %ip_v4f32 %in_color %tessc1_invocation_id\n"
4486		"%tessc1_in_position_ptr = OpAccessChain %ip_v4f32 %in_position %tessc1_invocation_id\n"
4487		"%tessc1_in_color_val = OpLoad %v4f32 %tessc1_in_color_ptr\n"
4488		"%tessc1_in_position_val = OpLoad %v4f32 %tessc1_in_position_ptr\n"
4489		"%tessc1_out_color_ptr = OpAccessChain %op_v4f32 %out_color %tessc1_invocation_id\n"
4490		"%tessc1_out_position_ptr = OpAccessChain %op_v4f32 %out_position %tessc1_invocation_id\n"
4491		"OpStore %tessc1_out_color_ptr %tessc1_in_color_val\n"
4492		"OpStore %tessc1_out_position_ptr %tessc1_in_position_val\n"
4493		"%tessc1_is_first_invocation = OpIEqual %bool %tessc1_invocation_id %c_i32_0\n"
4494		"OpSelectionMerge %tessc1_merge_label None\n"
4495		"OpBranchConditional %tessc1_is_first_invocation %tessc1_first_invocation %tessc1_merge_label\n"
4496		"%tessc1_first_invocation = OpLabel\n"
4497		"%tessc1_tess_outer_0 = OpAccessChain %op_f32 %gl_TessLevelOuter %c_i32_0\n"
4498		"%tessc1_tess_outer_1 = OpAccessChain %op_f32 %gl_TessLevelOuter %c_i32_1\n"
4499		"%tessc1_tess_outer_2 = OpAccessChain %op_f32 %gl_TessLevelOuter %c_i32_2\n"
4500		"%tessc1_tess_inner = OpAccessChain %op_f32 %gl_TessLevelInner %c_i32_0\n"
4501		"OpStore %tessc1_tess_outer_0 %c_f32_1\n"
4502		"OpStore %tessc1_tess_outer_1 %c_f32_1\n"
4503		"OpStore %tessc1_tess_outer_2 %c_f32_1\n"
4504		"OpStore %tessc1_tess_inner %c_f32_1\n"
4505		"OpBranch %tessc1_merge_label\n"
4506		"%tessc1_merge_label = OpLabel\n"
4507		"OpReturn\n"
4508		"OpFunctionEnd\n"
4509
4510		"%tessc2_main = OpFunction %void None %fun\n"
4511		"%tessc2_label = OpLabel\n"
4512		"%tessc2_invocation_id = OpLoad %i32 %gl_InvocationID\n"
4513		"%tessc2_in_color_ptr = OpAccessChain %ip_v4f32 %in_color %tessc2_invocation_id\n"
4514		"%tessc2_in_position_ptr = OpAccessChain %ip_v4f32 %in_position %tessc2_invocation_id\n"
4515		"%tessc2_in_color_val = OpLoad %v4f32 %tessc2_in_color_ptr\n"
4516		"%tessc2_in_position_val = OpLoad %v4f32 %tessc2_in_position_ptr\n"
4517		"%tessc2_out_color_ptr = OpAccessChain %op_v4f32 %out_color %tessc2_invocation_id\n"
4518		"%tessc2_out_position_ptr = OpAccessChain %op_v4f32 %out_position %tessc2_invocation_id\n"
4519		"%tessc2_transformed_color = OpFSub %v4f32 %cval %tessc2_in_color_val\n"
4520		"%tessc2_transformed_color_a = OpVectorInsertDynamic %v4f32 %tessc2_transformed_color %c_f32_1 %c_i32_3\n"
4521		"OpStore %tessc2_out_color_ptr %tessc2_transformed_color_a\n"
4522		"OpStore %tessc2_out_position_ptr %tessc2_in_position_val\n"
4523		"%tessc2_is_first_invocation = OpIEqual %bool %tessc2_invocation_id %c_i32_0\n"
4524		"OpSelectionMerge %tessc2_merge_label None\n"
4525		"OpBranchConditional %tessc2_is_first_invocation %tessc2_first_invocation %tessc2_merge_label\n"
4526		"%tessc2_first_invocation = OpLabel\n"
4527		"%tessc2_tess_outer_0 = OpAccessChain %op_f32 %gl_TessLevelOuter %c_i32_0\n"
4528		"%tessc2_tess_outer_1 = OpAccessChain %op_f32 %gl_TessLevelOuter %c_i32_1\n"
4529		"%tessc2_tess_outer_2 = OpAccessChain %op_f32 %gl_TessLevelOuter %c_i32_2\n"
4530		"%tessc2_tess_inner = OpAccessChain %op_f32 %gl_TessLevelInner %c_i32_0\n"
4531		"OpStore %tessc2_tess_outer_0 %c_f32_1\n"
4532		"OpStore %tessc2_tess_outer_1 %c_f32_1\n"
4533		"OpStore %tessc2_tess_outer_2 %c_f32_1\n"
4534		"OpStore %tessc2_tess_inner %c_f32_1\n"
4535		"OpBranch %tessc2_merge_label\n"
4536		"%tessc2_merge_label = OpLabel\n"
4537		"OpReturn\n"
4538		"OpFunctionEnd\n";
4539
4540	dst.spirvAsmSources.add("tesse") <<
4541		"OpCapability Tessellation\n"
4542		"OpCapability ClipDistance\n"
4543		"OpCapability CullDistance\n"
4544		"OpMemoryModel Logical GLSL450\n"
4545		"OpEntryPoint TessellationEvaluation %tesse1_main \"tesse1\" %stream %gl_tessCoord %in_position %out_color %in_color \n"
4546		"OpEntryPoint TessellationEvaluation %tesse2_main \"tesse2\" %stream %gl_tessCoord %in_position %out_color %in_color \n"
4547		"OpExecutionMode %tesse1_main Triangles\n"
4548		"OpExecutionMode %tesse2_main Triangles\n"
4549		"OpName %tesse1_main \"tesse1\"\n"
4550		"OpName %tesse2_main \"tesse2\"\n"
4551		"OpName %per_vertex_out \"gl_PerVertex\"\n"
4552		"OpMemberName %per_vertex_out 0 \"gl_Position\"\n"
4553		"OpMemberName %per_vertex_out 1 \"gl_PointSize\"\n"
4554		"OpMemberName %per_vertex_out 2 \"gl_ClipDistance\"\n"
4555		"OpMemberName %per_vertex_out 3 \"gl_CullDistance\"\n"
4556		"OpName %stream \"\"\n"
4557		"OpName %gl_tessCoord \"gl_TessCoord\"\n"
4558		"OpName %in_position \"in_position\"\n"
4559		"OpName %out_color \"out_color\"\n"
4560		"OpName %in_color \"in_color\"\n"
4561		"OpMemberDecorate %per_vertex_out 0 BuiltIn Position\n"
4562		"OpMemberDecorate %per_vertex_out 1 BuiltIn PointSize\n"
4563		"OpMemberDecorate %per_vertex_out 2 BuiltIn ClipDistance\n"
4564		"OpMemberDecorate %per_vertex_out 3 BuiltIn CullDistance\n"
4565		"OpDecorate %per_vertex_out Block\n"
4566		"OpDecorate %gl_tessCoord BuiltIn TessCoord\n"
4567		"OpDecorate %in_position Location 2\n"
4568		"OpDecorate %out_color Location 1\n"
4569		"OpDecorate %in_color Location 1\n"
4570		SPIRV_ASSEMBLY_TYPES
4571		SPIRV_ASSEMBLY_CONSTANTS
4572		SPIRV_ASSEMBLY_ARRAYS
4573		"%cval = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
4574		"%per_vertex_out = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
4575		"%op_per_vertex_out = OpTypePointer Output %per_vertex_out\n"
4576		"%stream = OpVariable %op_per_vertex_out Output\n"
4577		"%gl_tessCoord = OpVariable %ip_v3f32 Input\n"
4578		"%in_position = OpVariable %ip_a32v4f32 Input\n"
4579		"%out_color = OpVariable %op_v4f32 Output\n"
4580		"%in_color = OpVariable %ip_a32v4f32 Input\n"
4581
4582		"%tesse1_main = OpFunction %void None %fun\n"
4583		"%tesse1_label = OpLabel\n"
4584		"%tesse1_tc_0_ptr = OpAccessChain %ip_f32 %gl_tessCoord %c_u32_0\n"
4585		"%tesse1_tc_1_ptr = OpAccessChain %ip_f32 %gl_tessCoord %c_u32_1\n"
4586		"%tesse1_tc_2_ptr = OpAccessChain %ip_f32 %gl_tessCoord %c_u32_2\n"
4587		"%tesse1_tc_0 = OpLoad %f32 %tesse1_tc_0_ptr\n"
4588		"%tesse1_tc_1 = OpLoad %f32 %tesse1_tc_1_ptr\n"
4589		"%tesse1_tc_2 = OpLoad %f32 %tesse1_tc_2_ptr\n"
4590		"%tesse1_in_pos_0_ptr = OpAccessChain %ip_v4f32 %in_position %c_i32_0\n"
4591		"%tesse1_in_pos_1_ptr = OpAccessChain %ip_v4f32 %in_position %c_i32_1\n"
4592		"%tesse1_in_pos_2_ptr = OpAccessChain %ip_v4f32 %in_position %c_i32_2\n"
4593		"%tesse1_in_pos_0 = OpLoad %v4f32 %tesse1_in_pos_0_ptr\n"
4594		"%tesse1_in_pos_1 = OpLoad %v4f32 %tesse1_in_pos_1_ptr\n"
4595		"%tesse1_in_pos_2 = OpLoad %v4f32 %tesse1_in_pos_2_ptr\n"
4596		"%tesse1_in_pos_0_weighted = OpVectorTimesScalar %v4f32 %tesse1_tc_0 %tesse1_in_pos_0\n"
4597		"%tesse1_in_pos_1_weighted = OpVectorTimesScalar %v4f32 %tesse1_tc_1 %tesse1_in_pos_1\n"
4598		"%tesse1_in_pos_2_weighted = OpVectorTimesScalar %v4f32 %tesse1_tc_2 %tesse1_in_pos_2\n"
4599		"%tesse1_out_pos_ptr = OpAccessChain %op_v4f32 %stream %c_i32_0\n"
4600		"%tesse1_in_pos_0_plus_pos_1 = OpFAdd %v4f32 %tesse1_in_pos_0_weighted %tesse1_in_pos_1_weighted\n"
4601		"%tesse1_computed_out = OpFAdd %v4f32 %tesse1_in_pos_0_plus_pos_1 %tesse1_in_pos_2_weighted\n"
4602		"OpStore %tesse1_out_pos_ptr %tesse1_computed_out\n"
4603		"%tesse1_in_clr_0_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_0\n"
4604		"%tesse1_in_clr_1_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_1\n"
4605		"%tesse1_in_clr_2_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_2\n"
4606		"%tesse1_in_clr_0 = OpLoad %v4f32 %tesse1_in_clr_0_ptr\n"
4607		"%tesse1_in_clr_1 = OpLoad %v4f32 %tesse1_in_clr_1_ptr\n"
4608		"%tesse1_in_clr_2 = OpLoad %v4f32 %tesse1_in_clr_2_ptr\n"
4609		"%tesse1_in_clr_0_weighted = OpVectorTimesScalar %v4f32 %tesse1_tc_0 %tesse1_in_clr_0\n"
4610		"%tesse1_in_clr_1_weighted = OpVectorTimesScalar %v4f32 %tesse1_tc_1 %tesse1_in_clr_1\n"
4611		"%tesse1_in_clr_2_weighted = OpVectorTimesScalar %v4f32 %tesse1_tc_2 %tesse1_in_clr_2\n"
4612		"%tesse1_in_clr_0_plus_col_1 = OpFAdd %v4f32 %tesse1_in_clr_0_weighted %tesse1_in_clr_1_weighted\n"
4613		"%tesse1_computed_clr = OpFAdd %v4f32 %tesse1_in_clr_0_plus_col_1 %tesse1_in_clr_2_weighted\n"
4614		"OpStore %out_color %tesse1_computed_clr\n"
4615		"OpReturn\n"
4616		"OpFunctionEnd\n"
4617
4618		"%tesse2_main = OpFunction %void None %fun\n"
4619		"%tesse2_label = OpLabel\n"
4620		"%tesse2_tc_0_ptr = OpAccessChain %ip_f32 %gl_tessCoord %c_u32_0\n"
4621		"%tesse2_tc_1_ptr = OpAccessChain %ip_f32 %gl_tessCoord %c_u32_1\n"
4622		"%tesse2_tc_2_ptr = OpAccessChain %ip_f32 %gl_tessCoord %c_u32_2\n"
4623		"%tesse2_tc_0 = OpLoad %f32 %tesse2_tc_0_ptr\n"
4624		"%tesse2_tc_1 = OpLoad %f32 %tesse2_tc_1_ptr\n"
4625		"%tesse2_tc_2 = OpLoad %f32 %tesse2_tc_2_ptr\n"
4626		"%tesse2_in_pos_0_ptr = OpAccessChain %ip_v4f32 %in_position %c_i32_0\n"
4627		"%tesse2_in_pos_1_ptr = OpAccessChain %ip_v4f32 %in_position %c_i32_1\n"
4628		"%tesse2_in_pos_2_ptr = OpAccessChain %ip_v4f32 %in_position %c_i32_2\n"
4629		"%tesse2_in_pos_0 = OpLoad %v4f32 %tesse2_in_pos_0_ptr\n"
4630		"%tesse2_in_pos_1 = OpLoad %v4f32 %tesse2_in_pos_1_ptr\n"
4631		"%tesse2_in_pos_2 = OpLoad %v4f32 %tesse2_in_pos_2_ptr\n"
4632		"%tesse2_in_pos_0_weighted = OpVectorTimesScalar %v4f32 %tesse2_tc_0 %tesse2_in_pos_0\n"
4633		"%tesse2_in_pos_1_weighted = OpVectorTimesScalar %v4f32 %tesse2_tc_1 %tesse2_in_pos_1\n"
4634		"%tesse2_in_pos_2_weighted = OpVectorTimesScalar %v4f32 %tesse2_tc_2 %tesse2_in_pos_2\n"
4635		"%tesse2_out_pos_ptr = OpAccessChain %op_v4f32 %stream %c_i32_0\n"
4636		"%tesse2_in_pos_0_plus_pos_1 = OpFAdd %v4f32 %tesse2_in_pos_0_weighted %tesse2_in_pos_1_weighted\n"
4637		"%tesse2_computed_out = OpFAdd %v4f32 %tesse2_in_pos_0_plus_pos_1 %tesse2_in_pos_2_weighted\n"
4638		"OpStore %tesse2_out_pos_ptr %tesse2_computed_out\n"
4639		"%tesse2_in_clr_0_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_0\n"
4640		"%tesse2_in_clr_1_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_1\n"
4641		"%tesse2_in_clr_2_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_2\n"
4642		"%tesse2_in_clr_0 = OpLoad %v4f32 %tesse2_in_clr_0_ptr\n"
4643		"%tesse2_in_clr_1 = OpLoad %v4f32 %tesse2_in_clr_1_ptr\n"
4644		"%tesse2_in_clr_2 = OpLoad %v4f32 %tesse2_in_clr_2_ptr\n"
4645		"%tesse2_in_clr_0_weighted = OpVectorTimesScalar %v4f32 %tesse2_tc_0 %tesse2_in_clr_0\n"
4646		"%tesse2_in_clr_1_weighted = OpVectorTimesScalar %v4f32 %tesse2_tc_1 %tesse2_in_clr_1\n"
4647		"%tesse2_in_clr_2_weighted = OpVectorTimesScalar %v4f32 %tesse2_tc_2 %tesse2_in_clr_2\n"
4648		"%tesse2_in_clr_0_plus_col_1 = OpFAdd %v4f32 %tesse2_in_clr_0_weighted %tesse2_in_clr_1_weighted\n"
4649		"%tesse2_computed_clr = OpFAdd %v4f32 %tesse2_in_clr_0_plus_col_1 %tesse2_in_clr_2_weighted\n"
4650		"%tesse2_clr_transformed = OpFSub %v4f32 %cval %tesse2_computed_clr\n"
4651		"%tesse2_clr_transformed_a = OpVectorInsertDynamic %v4f32 %tesse2_clr_transformed %c_f32_1 %c_i32_3\n"
4652		"OpStore %out_color %tesse2_clr_transformed_a\n"
4653		"OpReturn\n"
4654		"OpFunctionEnd\n";
4655}
4656
4657// Sets up and runs a Vulkan pipeline, then spot-checks the resulting image.
4658// Feeds the pipeline a set of colored triangles, which then must occur in the
4659// rendered image.  The surface is cleared before executing the pipeline, so
4660// whatever the shaders draw can be directly spot-checked.
4661TestStatus runAndVerifyDefaultPipeline (Context& context, InstanceContext instance)
4662{
4663	const VkDevice								vkDevice				= context.getDevice();
4664	const DeviceInterface&						vk						= context.getDeviceInterface();
4665	const VkQueue								queue					= context.getUniversalQueue();
4666	const deUint32								queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
4667	const tcu::UVec2							renderSize				(256, 256);
4668	vector<ModuleHandleSp>						modules;
4669	map<VkShaderStageFlagBits, VkShaderModule>	moduleByStage;
4670	const int									testSpecificSeed		= 31354125;
4671	const int									seed					= context.getTestContext().getCommandLine().getBaseSeed() ^ testSpecificSeed;
4672	bool										supportsGeometry		= false;
4673	bool										supportsTessellation	= false;
4674	bool										hasTessellation         = false;
4675
4676	const VkPhysicalDeviceFeatures&				features				= context.getDeviceFeatures();
4677	supportsGeometry		= features.geometryShader == VK_TRUE;
4678	supportsTessellation	= features.tessellationShader == VK_TRUE;
4679	hasTessellation			= (instance.requiredStages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ||
4680								(instance.requiredStages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
4681
4682	if (hasTessellation && !supportsTessellation)
4683	{
4684		throw tcu::NotSupportedError(std::string("Tessellation not supported"));
4685	}
4686
4687	if ((instance.requiredStages & VK_SHADER_STAGE_GEOMETRY_BIT) &&
4688		!supportsGeometry)
4689	{
4690		throw tcu::NotSupportedError(std::string("Geometry not supported"));
4691	}
4692
4693	de::Random(seed).shuffle(instance.inputColors, instance.inputColors+4);
4694	de::Random(seed).shuffle(instance.outputColors, instance.outputColors+4);
4695	const Vec4								vertexData[]			=
4696	{
4697		// Upper left corner:
4698		Vec4(-1.0f, -1.0f, 0.0f, 1.0f), instance.inputColors[0].toVec(),
4699		Vec4(-0.5f, -1.0f, 0.0f, 1.0f), instance.inputColors[0].toVec(),
4700		Vec4(-1.0f, -0.5f, 0.0f, 1.0f), instance.inputColors[0].toVec(),
4701
4702		// Upper right corner:
4703		Vec4(+0.5f, -1.0f, 0.0f, 1.0f), instance.inputColors[1].toVec(),
4704		Vec4(+1.0f, -1.0f, 0.0f, 1.0f), instance.inputColors[1].toVec(),
4705		Vec4(+1.0f, -0.5f, 0.0f, 1.0f), instance.inputColors[1].toVec(),
4706
4707		// Lower left corner:
4708		Vec4(-1.0f, +0.5f, 0.0f, 1.0f), instance.inputColors[2].toVec(),
4709		Vec4(-0.5f, +1.0f, 0.0f, 1.0f), instance.inputColors[2].toVec(),
4710		Vec4(-1.0f, +1.0f, 0.0f, 1.0f), instance.inputColors[2].toVec(),
4711
4712		// Lower right corner:
4713		Vec4(+1.0f, +0.5f, 0.0f, 1.0f), instance.inputColors[3].toVec(),
4714		Vec4(+1.0f, +1.0f, 0.0f, 1.0f), instance.inputColors[3].toVec(),
4715		Vec4(+0.5f, +1.0f, 0.0f, 1.0f), instance.inputColors[3].toVec()
4716	};
4717	const size_t							singleVertexDataSize	= 2 * sizeof(Vec4);
4718	const size_t							vertexCount				= sizeof(vertexData) / singleVertexDataSize;
4719
4720	const VkBufferCreateInfo				vertexBufferParams		=
4721	{
4722		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	//	VkStructureType		sType;
4723		DE_NULL,								//	const void*			pNext;
4724		0u,										//	VkBufferCreateFlags	flags;
4725		(VkDeviceSize)sizeof(vertexData),		//	VkDeviceSize		size;
4726		VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,		//	VkBufferUsageFlags	usage;
4727		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode		sharingMode;
4728		1u,										//	deUint32			queueFamilyCount;
4729		&queueFamilyIndex,						//	const deUint32*		pQueueFamilyIndices;
4730	};
4731	const Unique<VkBuffer>					vertexBuffer			(createBuffer(vk, vkDevice, &vertexBufferParams));
4732	const UniquePtr<Allocation>				vertexBufferMemory		(context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *vertexBuffer), MemoryRequirement::HostVisible));
4733
4734	VK_CHECK(vk.bindBufferMemory(vkDevice, *vertexBuffer, vertexBufferMemory->getMemory(), vertexBufferMemory->getOffset()));
4735
4736	const VkDeviceSize						imageSizeBytes			= (VkDeviceSize)(sizeof(deUint32)*renderSize.x()*renderSize.y());
4737	const VkBufferCreateInfo				readImageBufferParams	=
4738	{
4739		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		//	VkStructureType		sType;
4740		DE_NULL,									//	const void*			pNext;
4741		0u,											//	VkBufferCreateFlags	flags;
4742		imageSizeBytes,								//	VkDeviceSize		size;
4743		VK_BUFFER_USAGE_TRANSFER_DST_BIT,			//	VkBufferUsageFlags	usage;
4744		VK_SHARING_MODE_EXCLUSIVE,					//	VkSharingMode		sharingMode;
4745		1u,											//	deUint32			queueFamilyCount;
4746		&queueFamilyIndex,							//	const deUint32*		pQueueFamilyIndices;
4747	};
4748	const Unique<VkBuffer>					readImageBuffer			(createBuffer(vk, vkDevice, &readImageBufferParams));
4749	const UniquePtr<Allocation>				readImageBufferMemory	(context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
4750
4751	VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(), readImageBufferMemory->getOffset()));
4752
4753	const VkImageCreateInfo					imageParams				=
4754	{
4755		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,									//	VkStructureType		sType;
4756		DE_NULL,																//	const void*			pNext;
4757		0u,																		//	VkImageCreateFlags	flags;
4758		VK_IMAGE_TYPE_2D,														//	VkImageType			imageType;
4759		VK_FORMAT_R8G8B8A8_UNORM,												//	VkFormat			format;
4760		{ renderSize.x(), renderSize.y(), 1 },									//	VkExtent3D			extent;
4761		1u,																		//	deUint32			mipLevels;
4762		1u,																		//	deUint32			arraySize;
4763		VK_SAMPLE_COUNT_1_BIT,													//	deUint32			samples;
4764		VK_IMAGE_TILING_OPTIMAL,												//	VkImageTiling		tiling;
4765		VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT,	//	VkImageUsageFlags	usage;
4766		VK_SHARING_MODE_EXCLUSIVE,												//	VkSharingMode		sharingMode;
4767		1u,																		//	deUint32			queueFamilyCount;
4768		&queueFamilyIndex,														//	const deUint32*		pQueueFamilyIndices;
4769		VK_IMAGE_LAYOUT_UNDEFINED,												//	VkImageLayout		initialLayout;
4770	};
4771
4772	const Unique<VkImage>					image					(createImage(vk, vkDevice, &imageParams));
4773	const UniquePtr<Allocation>				imageMemory				(context.getDefaultAllocator().allocate(getImageMemoryRequirements(vk, vkDevice, *image), MemoryRequirement::Any));
4774
4775	VK_CHECK(vk.bindImageMemory(vkDevice, *image, imageMemory->getMemory(), imageMemory->getOffset()));
4776
4777	const VkAttachmentDescription			colorAttDesc			=
4778	{
4779		0u,												//	VkAttachmentDescriptionFlags	flags;
4780		VK_FORMAT_R8G8B8A8_UNORM,						//	VkFormat						format;
4781		VK_SAMPLE_COUNT_1_BIT,							//	deUint32						samples;
4782		VK_ATTACHMENT_LOAD_OP_CLEAR,					//	VkAttachmentLoadOp				loadOp;
4783		VK_ATTACHMENT_STORE_OP_STORE,					//	VkAttachmentStoreOp				storeOp;
4784		VK_ATTACHMENT_LOAD_OP_DONT_CARE,				//	VkAttachmentLoadOp				stencilLoadOp;
4785		VK_ATTACHMENT_STORE_OP_DONT_CARE,				//	VkAttachmentStoreOp				stencilStoreOp;
4786		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,		//	VkImageLayout					initialLayout;
4787		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,		//	VkImageLayout					finalLayout;
4788	};
4789	const VkAttachmentReference				colorAttRef				=
4790	{
4791		0u,												//	deUint32		attachment;
4792		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,		//	VkImageLayout	layout;
4793	};
4794	const VkSubpassDescription				subpassDesc				=
4795	{
4796		0u,												//	VkSubpassDescriptionFlags		flags;
4797		VK_PIPELINE_BIND_POINT_GRAPHICS,				//	VkPipelineBindPoint				pipelineBindPoint;
4798		0u,												//	deUint32						inputCount;
4799		DE_NULL,										//	const VkAttachmentReference*	pInputAttachments;
4800		1u,												//	deUint32						colorCount;
4801		&colorAttRef,									//	const VkAttachmentReference*	pColorAttachments;
4802		DE_NULL,										//	const VkAttachmentReference*	pResolveAttachments;
4803		DE_NULL,										//	const VkAttachmentReference*	pDepthStencilAttachment;
4804		0u,												//	deUint32						preserveCount;
4805		DE_NULL,										//	const VkAttachmentReference*	pPreserveAttachments;
4806
4807	};
4808	const VkRenderPassCreateInfo			renderPassParams		=
4809	{
4810		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,		//	VkStructureType					sType;
4811		DE_NULL,										//	const void*						pNext;
4812		(VkRenderPassCreateFlags)0,
4813		1u,												//	deUint32						attachmentCount;
4814		&colorAttDesc,									//	const VkAttachmentDescription*	pAttachments;
4815		1u,												//	deUint32						subpassCount;
4816		&subpassDesc,									//	const VkSubpassDescription*		pSubpasses;
4817		0u,												//	deUint32						dependencyCount;
4818		DE_NULL,										//	const VkSubpassDependency*		pDependencies;
4819	};
4820	const Unique<VkRenderPass>				renderPass				(createRenderPass(vk, vkDevice, &renderPassParams));
4821
4822	const VkImageViewCreateInfo				colorAttViewParams		=
4823	{
4824		VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,		//	VkStructureType				sType;
4825		DE_NULL,										//	const void*					pNext;
4826		0u,												//	VkImageViewCreateFlags		flags;
4827		*image,											//	VkImage						image;
4828		VK_IMAGE_VIEW_TYPE_2D,							//	VkImageViewType				viewType;
4829		VK_FORMAT_R8G8B8A8_UNORM,						//	VkFormat					format;
4830		{
4831			VK_COMPONENT_SWIZZLE_R,
4832			VK_COMPONENT_SWIZZLE_G,
4833			VK_COMPONENT_SWIZZLE_B,
4834			VK_COMPONENT_SWIZZLE_A
4835		},												//	VkChannelMapping			channels;
4836		{
4837			VK_IMAGE_ASPECT_COLOR_BIT,						//	VkImageAspectFlags	aspectMask;
4838			0u,												//	deUint32			baseMipLevel;
4839			1u,												//	deUint32			mipLevels;
4840			0u,												//	deUint32			baseArrayLayer;
4841			1u,												//	deUint32			arraySize;
4842		},												//	VkImageSubresourceRange		subresourceRange;
4843	};
4844	const Unique<VkImageView>				colorAttView			(createImageView(vk, vkDevice, &colorAttViewParams));
4845
4846
4847	// Pipeline layout
4848	const VkPipelineLayoutCreateInfo		pipelineLayoutParams	=
4849	{
4850		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,			//	VkStructureType					sType;
4851		DE_NULL,												//	const void*						pNext;
4852		(VkPipelineLayoutCreateFlags)0,
4853		0u,														//	deUint32						descriptorSetCount;
4854		DE_NULL,												//	const VkDescriptorSetLayout*	pSetLayouts;
4855		0u,														//	deUint32						pushConstantRangeCount;
4856		DE_NULL,												//	const VkPushConstantRange*		pPushConstantRanges;
4857	};
4858	const Unique<VkPipelineLayout>			pipelineLayout			(createPipelineLayout(vk, vkDevice, &pipelineLayoutParams));
4859
4860	// Pipeline
4861	vector<VkPipelineShaderStageCreateInfo>		shaderStageParams;
4862	// We need these vectors to make sure that information about specialization constants for each stage can outlive createGraphicsPipeline().
4863	vector<vector<VkSpecializationMapEntry> >	specConstantEntries;
4864	vector<VkSpecializationInfo>				specializationInfos;
4865	createPipelineShaderStages(vk, vkDevice, instance, context, modules, shaderStageParams);
4866
4867	// And we don't want the reallocation of these vectors to invalidate pointers pointing to their contents.
4868	specConstantEntries.reserve(shaderStageParams.size());
4869	specializationInfos.reserve(shaderStageParams.size());
4870
4871	// Patch the specialization info field in PipelineShaderStageCreateInfos.
4872	for (vector<VkPipelineShaderStageCreateInfo>::iterator stageInfo = shaderStageParams.begin(); stageInfo != shaderStageParams.end(); ++stageInfo)
4873	{
4874		const StageToSpecConstantMap::const_iterator stageIt = instance.specConstants.find(stageInfo->stage);
4875
4876		if (stageIt != instance.specConstants.end())
4877		{
4878			const size_t						numSpecConstants	= stageIt->second.size();
4879			vector<VkSpecializationMapEntry>	entries;
4880			VkSpecializationInfo				specInfo;
4881
4882			entries.resize(numSpecConstants);
4883
4884			// Only support 32-bit integers as spec constants now. And their constant IDs are numbered sequentially starting from 0.
4885			for (size_t ndx = 0; ndx < numSpecConstants; ++ndx)
4886			{
4887				entries[ndx].constantID	= (deUint32)ndx;
4888				entries[ndx].offset		= deUint32(ndx * sizeof(deInt32));
4889				entries[ndx].size		= sizeof(deInt32);
4890			}
4891
4892			specConstantEntries.push_back(entries);
4893
4894			specInfo.mapEntryCount	= (deUint32)numSpecConstants;
4895			specInfo.pMapEntries	= specConstantEntries.back().data();
4896			specInfo.dataSize		= numSpecConstants * sizeof(deInt32);
4897			specInfo.pData			= stageIt->second.data();
4898			specializationInfos.push_back(specInfo);
4899
4900			stageInfo->pSpecializationInfo = &specializationInfos.back();
4901		}
4902	}
4903	const VkPipelineDepthStencilStateCreateInfo	depthStencilParams		=
4904	{
4905		VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,	//	VkStructureType		sType;
4906		DE_NULL,													//	const void*			pNext;
4907		(VkPipelineDepthStencilStateCreateFlags)0,
4908		DE_FALSE,													//	deUint32			depthTestEnable;
4909		DE_FALSE,													//	deUint32			depthWriteEnable;
4910		VK_COMPARE_OP_ALWAYS,										//	VkCompareOp			depthCompareOp;
4911		DE_FALSE,													//	deUint32			depthBoundsTestEnable;
4912		DE_FALSE,													//	deUint32			stencilTestEnable;
4913		{
4914			VK_STENCIL_OP_KEEP,											//	VkStencilOp	stencilFailOp;
4915			VK_STENCIL_OP_KEEP,											//	VkStencilOp	stencilPassOp;
4916			VK_STENCIL_OP_KEEP,											//	VkStencilOp	stencilDepthFailOp;
4917			VK_COMPARE_OP_ALWAYS,										//	VkCompareOp	stencilCompareOp;
4918			0u,															//	deUint32	stencilCompareMask;
4919			0u,															//	deUint32	stencilWriteMask;
4920			0u,															//	deUint32	stencilReference;
4921		},															//	VkStencilOpState	front;
4922		{
4923			VK_STENCIL_OP_KEEP,											//	VkStencilOp	stencilFailOp;
4924			VK_STENCIL_OP_KEEP,											//	VkStencilOp	stencilPassOp;
4925			VK_STENCIL_OP_KEEP,											//	VkStencilOp	stencilDepthFailOp;
4926			VK_COMPARE_OP_ALWAYS,										//	VkCompareOp	stencilCompareOp;
4927			0u,															//	deUint32	stencilCompareMask;
4928			0u,															//	deUint32	stencilWriteMask;
4929			0u,															//	deUint32	stencilReference;
4930		},															//	VkStencilOpState	back;
4931		-1.0f,														//	float				minDepthBounds;
4932		+1.0f,														//	float				maxDepthBounds;
4933	};
4934	const VkViewport						viewport0				=
4935	{
4936		0.0f,														//	float	originX;
4937		0.0f,														//	float	originY;
4938		(float)renderSize.x(),										//	float	width;
4939		(float)renderSize.y(),										//	float	height;
4940		0.0f,														//	float	minDepth;
4941		1.0f,														//	float	maxDepth;
4942	};
4943	const VkRect2D							scissor0				=
4944	{
4945		{
4946			0u,															//	deInt32	x;
4947			0u,															//	deInt32	y;
4948		},															//	VkOffset2D	offset;
4949		{
4950			renderSize.x(),												//	deInt32	width;
4951			renderSize.y(),												//	deInt32	height;
4952		},															//	VkExtent2D	extent;
4953	};
4954	const VkPipelineViewportStateCreateInfo		viewportParams			=
4955	{
4956		VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,		//	VkStructureType		sType;
4957		DE_NULL,													//	const void*			pNext;
4958		(VkPipelineViewportStateCreateFlags)0,
4959		1u,															//	deUint32			viewportCount;
4960		&viewport0,
4961		1u,
4962		&scissor0
4963	};
4964	const VkSampleMask							sampleMask				= ~0u;
4965	const VkPipelineMultisampleStateCreateInfo	multisampleParams		=
4966	{
4967		VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	//	VkStructureType			sType;
4968		DE_NULL,													//	const void*				pNext;
4969		(VkPipelineMultisampleStateCreateFlags)0,
4970		VK_SAMPLE_COUNT_1_BIT,										//	VkSampleCountFlagBits	rasterSamples;
4971		DE_FALSE,													//	deUint32				sampleShadingEnable;
4972		0.0f,														//	float					minSampleShading;
4973		&sampleMask,												//	const VkSampleMask*		pSampleMask;
4974		DE_FALSE,													//	VkBool32				alphaToCoverageEnable;
4975		DE_FALSE,													//	VkBool32				alphaToOneEnable;
4976	};
4977	const VkPipelineRasterizationStateCreateInfo	rasterParams		=
4978	{
4979		VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,	//	VkStructureType	sType;
4980		DE_NULL,													//	const void*		pNext;
4981		(VkPipelineRasterizationStateCreateFlags)0,
4982		DE_TRUE,													//	deUint32		depthClipEnable;
4983		DE_FALSE,													//	deUint32		rasterizerDiscardEnable;
4984		VK_POLYGON_MODE_FILL,										//	VkFillMode		fillMode;
4985		VK_CULL_MODE_NONE,											//	VkCullMode		cullMode;
4986		VK_FRONT_FACE_COUNTER_CLOCKWISE,							//	VkFrontFace		frontFace;
4987		VK_FALSE,													//	VkBool32		depthBiasEnable;
4988		0.0f,														//	float			depthBias;
4989		0.0f,														//	float			depthBiasClamp;
4990		0.0f,														//	float			slopeScaledDepthBias;
4991		1.0f,														//	float			lineWidth;
4992	};
4993	const VkPrimitiveTopology topology = hasTessellation? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
4994	const VkPipelineInputAssemblyStateCreateInfo	inputAssemblyParams	=
4995	{
4996		VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	//	VkStructureType		sType;
4997		DE_NULL,														//	const void*			pNext;
4998		(VkPipelineInputAssemblyStateCreateFlags)0,
4999		topology,														//	VkPrimitiveTopology	topology;
5000		DE_FALSE,														//	deUint32			primitiveRestartEnable;
5001	};
5002	const VkVertexInputBindingDescription		vertexBinding0 =
5003	{
5004		0u,									// deUint32					binding;
5005		deUint32(singleVertexDataSize),		// deUint32					strideInBytes;
5006		VK_VERTEX_INPUT_RATE_VERTEX			// VkVertexInputStepRate	stepRate;
5007	};
5008	const VkVertexInputAttributeDescription		vertexAttrib0[2] =
5009	{
5010		{
5011			0u,									// deUint32	location;
5012			0u,									// deUint32	binding;
5013			VK_FORMAT_R32G32B32A32_SFLOAT,		// VkFormat	format;
5014			0u									// deUint32	offsetInBytes;
5015		},
5016		{
5017			1u,									// deUint32	location;
5018			0u,									// deUint32	binding;
5019			VK_FORMAT_R32G32B32A32_SFLOAT,		// VkFormat	format;
5020			sizeof(Vec4),						// deUint32	offsetInBytes;
5021		}
5022	};
5023
5024	const VkPipelineVertexInputStateCreateInfo	vertexInputStateParams	=
5025	{
5026		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	//	VkStructureType								sType;
5027		DE_NULL,													//	const void*									pNext;
5028		(VkPipelineVertexInputStateCreateFlags)0,
5029		1u,															//	deUint32									bindingCount;
5030		&vertexBinding0,											//	const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
5031		2u,															//	deUint32									attributeCount;
5032		vertexAttrib0,												//	const VkVertexInputAttributeDescription*	pVertexAttributeDescriptions;
5033	};
5034	const VkPipelineColorBlendAttachmentState	attBlendParams			=
5035	{
5036		DE_FALSE,													//	deUint32		blendEnable;
5037		VK_BLEND_FACTOR_ONE,										//	VkBlend			srcBlendColor;
5038		VK_BLEND_FACTOR_ZERO,										//	VkBlend			destBlendColor;
5039		VK_BLEND_OP_ADD,											//	VkBlendOp		blendOpColor;
5040		VK_BLEND_FACTOR_ONE,										//	VkBlend			srcBlendAlpha;
5041		VK_BLEND_FACTOR_ZERO,										//	VkBlend			destBlendAlpha;
5042		VK_BLEND_OP_ADD,											//	VkBlendOp		blendOpAlpha;
5043		(VK_COLOR_COMPONENT_R_BIT|
5044		 VK_COLOR_COMPONENT_G_BIT|
5045		 VK_COLOR_COMPONENT_B_BIT|
5046		 VK_COLOR_COMPONENT_A_BIT),									//	VkChannelFlags	channelWriteMask;
5047	};
5048	const VkPipelineColorBlendStateCreateInfo	blendParams				=
5049	{
5050		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	//	VkStructureType								sType;
5051		DE_NULL,													//	const void*									pNext;
5052		(VkPipelineColorBlendStateCreateFlags)0,
5053		DE_FALSE,													//	VkBool32									logicOpEnable;
5054		VK_LOGIC_OP_COPY,											//	VkLogicOp									logicOp;
5055		1u,															//	deUint32									attachmentCount;
5056		&attBlendParams,											//	const VkPipelineColorBlendAttachmentState*	pAttachments;
5057		{ 0.0f, 0.0f, 0.0f, 0.0f },									//	float										blendConst[4];
5058	};
5059	const VkPipelineDynamicStateCreateInfo	dynamicStateInfo		=
5060	{
5061		VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,	//	VkStructureType			sType;
5062		DE_NULL,												//	const void*				pNext;
5063		(VkPipelineDynamicStateCreateFlags)0,
5064		0u,														//	deUint32				dynamicStateCount;
5065		DE_NULL													//	const VkDynamicState*	pDynamicStates;
5066	};
5067
5068	const VkPipelineTessellationStateCreateInfo	tessellationState	=
5069	{
5070		VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
5071		DE_NULL,
5072		(VkPipelineTessellationStateCreateFlags)0,
5073		3u
5074	};
5075
5076	const VkPipelineTessellationStateCreateInfo* tessellationInfo	=	hasTessellation ? &tessellationState: DE_NULL;
5077	const VkGraphicsPipelineCreateInfo		pipelineParams			=
5078	{
5079		VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,		//	VkStructureType									sType;
5080		DE_NULL,												//	const void*										pNext;
5081		0u,														//	VkPipelineCreateFlags							flags;
5082		(deUint32)shaderStageParams.size(),						//	deUint32										stageCount;
5083		&shaderStageParams[0],									//	const VkPipelineShaderStageCreateInfo*			pStages;
5084		&vertexInputStateParams,								//	const VkPipelineVertexInputStateCreateInfo*		pVertexInputState;
5085		&inputAssemblyParams,									//	const VkPipelineInputAssemblyStateCreateInfo*	pInputAssemblyState;
5086		tessellationInfo,										//	const VkPipelineTessellationStateCreateInfo*	pTessellationState;
5087		&viewportParams,										//	const VkPipelineViewportStateCreateInfo*		pViewportState;
5088		&rasterParams,											//	const VkPipelineRasterStateCreateInfo*			pRasterState;
5089		&multisampleParams,										//	const VkPipelineMultisampleStateCreateInfo*		pMultisampleState;
5090		&depthStencilParams,									//	const VkPipelineDepthStencilStateCreateInfo*	pDepthStencilState;
5091		&blendParams,											//	const VkPipelineColorBlendStateCreateInfo*		pColorBlendState;
5092		&dynamicStateInfo,										//	const VkPipelineDynamicStateCreateInfo*			pDynamicState;
5093		*pipelineLayout,										//	VkPipelineLayout								layout;
5094		*renderPass,											//	VkRenderPass									renderPass;
5095		0u,														//	deUint32										subpass;
5096		DE_NULL,												//	VkPipeline										basePipelineHandle;
5097		0u,														//	deInt32											basePipelineIndex;
5098	};
5099
5100	const Unique<VkPipeline>				pipeline				(createGraphicsPipeline(vk, vkDevice, DE_NULL, &pipelineParams));
5101
5102	// Framebuffer
5103	const VkFramebufferCreateInfo			framebufferParams		=
5104	{
5105		VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,				//	VkStructureType		sType;
5106		DE_NULL,												//	const void*			pNext;
5107		(VkFramebufferCreateFlags)0,
5108		*renderPass,											//	VkRenderPass		renderPass;
5109		1u,														//	deUint32			attachmentCount;
5110		&*colorAttView,											//	const VkImageView*	pAttachments;
5111		(deUint32)renderSize.x(),								//	deUint32			width;
5112		(deUint32)renderSize.y(),								//	deUint32			height;
5113		1u,														//	deUint32			layers;
5114	};
5115	const Unique<VkFramebuffer>				framebuffer				(createFramebuffer(vk, vkDevice, &framebufferParams));
5116
5117	const VkCommandPoolCreateInfo			cmdPoolParams			=
5118	{
5119		VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,					//	VkStructureType			sType;
5120		DE_NULL,													//	const void*				pNext;
5121		VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,				//	VkCmdPoolCreateFlags	flags;
5122		queueFamilyIndex,											//	deUint32				queueFamilyIndex;
5123	};
5124	const Unique<VkCommandPool>				cmdPool					(createCommandPool(vk, vkDevice, &cmdPoolParams));
5125
5126	// Command buffer
5127	const VkCommandBufferAllocateInfo		cmdBufParams			=
5128	{
5129		VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,			//	VkStructureType			sType;
5130		DE_NULL,												//	const void*				pNext;
5131		*cmdPool,												//	VkCmdPool				pool;
5132		VK_COMMAND_BUFFER_LEVEL_PRIMARY,						//	VkCmdBufferLevel		level;
5133		1u,														//	deUint32				count;
5134	};
5135	const Unique<VkCommandBuffer>			cmdBuf					(allocateCommandBuffer(vk, vkDevice, &cmdBufParams));
5136
5137	const VkCommandBufferBeginInfo			cmdBufBeginParams		=
5138	{
5139		VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,			//	VkStructureType				sType;
5140		DE_NULL,												//	const void*					pNext;
5141		(VkCommandBufferUsageFlags)0,
5142		(const VkCommandBufferInheritanceInfo*)DE_NULL,
5143	};
5144
5145	// Record commands
5146	VK_CHECK(vk.beginCommandBuffer(*cmdBuf, &cmdBufBeginParams));
5147
5148	{
5149		const VkMemoryBarrier		vertFlushBarrier	=
5150		{
5151			VK_STRUCTURE_TYPE_MEMORY_BARRIER,			//	VkStructureType		sType;
5152			DE_NULL,									//	const void*			pNext;
5153			VK_ACCESS_HOST_WRITE_BIT,					//	VkMemoryOutputFlags	outputMask;
5154			VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,		//	VkMemoryInputFlags	inputMask;
5155		};
5156		const VkImageMemoryBarrier	colorAttBarrier		=
5157		{
5158			VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,		//	VkStructureType			sType;
5159			DE_NULL,									//	const void*				pNext;
5160			0u,											//	VkMemoryOutputFlags		outputMask;
5161			VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,		//	VkMemoryInputFlags		inputMask;
5162			VK_IMAGE_LAYOUT_UNDEFINED,					//	VkImageLayout			oldLayout;
5163			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,	//	VkImageLayout			newLayout;
5164			queueFamilyIndex,							//	deUint32				srcQueueFamilyIndex;
5165			queueFamilyIndex,							//	deUint32				destQueueFamilyIndex;
5166			*image,										//	VkImage					image;
5167			{
5168				VK_IMAGE_ASPECT_COLOR_BIT,					//	VkImageAspect	aspect;
5169				0u,											//	deUint32		baseMipLevel;
5170				1u,											//	deUint32		mipLevels;
5171				0u,											//	deUint32		baseArraySlice;
5172				1u,											//	deUint32		arraySize;
5173			}											//	VkImageSubresourceRange	subresourceRange;
5174		};
5175		vk.cmdPipelineBarrier(*cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, (VkDependencyFlags)0, 1, &vertFlushBarrier, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &colorAttBarrier);
5176	}
5177
5178	{
5179		const VkClearValue			clearValue		= makeClearValueColorF32(0.125f, 0.25f, 0.75f, 1.0f);
5180		const VkRenderPassBeginInfo	passBeginParams	=
5181		{
5182			VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,			//	VkStructureType		sType;
5183			DE_NULL,											//	const void*			pNext;
5184			*renderPass,										//	VkRenderPass		renderPass;
5185			*framebuffer,										//	VkFramebuffer		framebuffer;
5186			{ { 0, 0 }, { renderSize.x(), renderSize.y() } },	//	VkRect2D			renderArea;
5187			1u,													//	deUint32			clearValueCount;
5188			&clearValue,										//	const VkClearValue*	pClearValues;
5189		};
5190		vk.cmdBeginRenderPass(*cmdBuf, &passBeginParams, VK_SUBPASS_CONTENTS_INLINE);
5191	}
5192
5193	vk.cmdBindPipeline(*cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
5194	{
5195		const VkDeviceSize bindingOffset = 0;
5196		vk.cmdBindVertexBuffers(*cmdBuf, 0u, 1u, &vertexBuffer.get(), &bindingOffset);
5197	}
5198	vk.cmdDraw(*cmdBuf, deUint32(vertexCount), 1u /*run pipeline once*/, 0u /*first vertex*/, 0u /*first instanceIndex*/);
5199	vk.cmdEndRenderPass(*cmdBuf);
5200
5201	{
5202		const VkImageMemoryBarrier	renderFinishBarrier	=
5203		{
5204			VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,		//	VkStructureType			sType;
5205			DE_NULL,									//	const void*				pNext;
5206			VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,		//	VkMemoryOutputFlags		outputMask;
5207			VK_ACCESS_TRANSFER_READ_BIT,				//	VkMemoryInputFlags		inputMask;
5208			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,	//	VkImageLayout			oldLayout;
5209			VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,		//	VkImageLayout			newLayout;
5210			queueFamilyIndex,							//	deUint32				srcQueueFamilyIndex;
5211			queueFamilyIndex,							//	deUint32				destQueueFamilyIndex;
5212			*image,										//	VkImage					image;
5213			{
5214				VK_IMAGE_ASPECT_COLOR_BIT,					//	VkImageAspectFlags	aspectMask;
5215				0u,											//	deUint32			baseMipLevel;
5216				1u,											//	deUint32			mipLevels;
5217				0u,											//	deUint32			baseArraySlice;
5218				1u,											//	deUint32			arraySize;
5219			}											//	VkImageSubresourceRange	subresourceRange;
5220		};
5221		vk.cmdPipelineBarrier(*cmdBuf, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &renderFinishBarrier);
5222	}
5223
5224	{
5225		const VkBufferImageCopy	copyParams	=
5226		{
5227			(VkDeviceSize)0u,						//	VkDeviceSize			bufferOffset;
5228			(deUint32)renderSize.x(),				//	deUint32				bufferRowLength;
5229			(deUint32)renderSize.y(),				//	deUint32				bufferImageHeight;
5230			{
5231				VK_IMAGE_ASPECT_COLOR_BIT,				//	VkImageAspect		aspect;
5232				0u,										//	deUint32			mipLevel;
5233				0u,										//	deUint32			arrayLayer;
5234				1u,										//	deUint32			arraySize;
5235			},										//	VkImageSubresourceCopy	imageSubresource;
5236			{ 0u, 0u, 0u },							//	VkOffset3D				imageOffset;
5237			{ renderSize.x(), renderSize.y(), 1u }	//	VkExtent3D				imageExtent;
5238		};
5239		vk.cmdCopyImageToBuffer(*cmdBuf, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, &copyParams);
5240	}
5241
5242	{
5243		const VkBufferMemoryBarrier	copyFinishBarrier	=
5244		{
5245			VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	//	VkStructureType		sType;
5246			DE_NULL,									//	const void*			pNext;
5247			VK_ACCESS_TRANSFER_WRITE_BIT,				//	VkMemoryOutputFlags	outputMask;
5248			VK_ACCESS_HOST_READ_BIT,					//	VkMemoryInputFlags	inputMask;
5249			queueFamilyIndex,							//	deUint32			srcQueueFamilyIndex;
5250			queueFamilyIndex,							//	deUint32			destQueueFamilyIndex;
5251			*readImageBuffer,							//	VkBuffer			buffer;
5252			0u,											//	VkDeviceSize		offset;
5253			imageSizeBytes								//	VkDeviceSize		size;
5254		};
5255		vk.cmdPipelineBarrier(*cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &copyFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
5256	}
5257
5258	VK_CHECK(vk.endCommandBuffer(*cmdBuf));
5259
5260	// Upload vertex data
5261	{
5262		const VkMappedMemoryRange	range			=
5263		{
5264			VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,	//	VkStructureType	sType;
5265			DE_NULL,								//	const void*		pNext;
5266			vertexBufferMemory->getMemory(),		//	VkDeviceMemory	mem;
5267			0,										//	VkDeviceSize	offset;
5268			(VkDeviceSize)sizeof(vertexData),		//	VkDeviceSize	size;
5269		};
5270		void*						vertexBufPtr	= vertexBufferMemory->getHostPtr();
5271
5272		deMemcpy(vertexBufPtr, &vertexData[0], sizeof(vertexData));
5273		VK_CHECK(vk.flushMappedMemoryRanges(vkDevice, 1u, &range));
5274	}
5275
5276	// Submit & wait for completion
5277	{
5278		const VkFenceCreateInfo	fenceParams	=
5279		{
5280			VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,	//	VkStructureType		sType;
5281			DE_NULL,								//	const void*			pNext;
5282			0u,										//	VkFenceCreateFlags	flags;
5283		};
5284		const Unique<VkFence>	fence		(createFence(vk, vkDevice, &fenceParams));
5285		const VkSubmitInfo		submitInfo	=
5286		{
5287			VK_STRUCTURE_TYPE_SUBMIT_INFO,
5288			DE_NULL,
5289			0u,
5290			(const VkSemaphore*)DE_NULL,
5291			(const VkPipelineStageFlags*)DE_NULL,
5292			1u,
5293			&cmdBuf.get(),
5294			0u,
5295			(const VkSemaphore*)DE_NULL,
5296		};
5297
5298		VK_CHECK(vk.queueSubmit(queue, 1u, &submitInfo, *fence));
5299		VK_CHECK(vk.waitForFences(vkDevice, 1u, &fence.get(), DE_TRUE, ~0ull));
5300	}
5301
5302	const void* imagePtr	= readImageBufferMemory->getHostPtr();
5303	const tcu::ConstPixelBufferAccess pixelBuffer(tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8),
5304												  renderSize.x(), renderSize.y(), 1, imagePtr);
5305	// Log image
5306	{
5307		const VkMappedMemoryRange	range		=
5308		{
5309			VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,	//	VkStructureType	sType;
5310			DE_NULL,								//	const void*		pNext;
5311			readImageBufferMemory->getMemory(),		//	VkDeviceMemory	mem;
5312			0,										//	VkDeviceSize	offset;
5313			imageSizeBytes,							//	VkDeviceSize	size;
5314		};
5315
5316		VK_CHECK(vk.invalidateMappedMemoryRanges(vkDevice, 1u, &range));
5317		context.getTestContext().getLog() << TestLog::Image("Result", "Result", pixelBuffer);
5318	}
5319
5320	const RGBA threshold(1, 1, 1, 1);
5321	const RGBA upperLeft(pixelBuffer.getPixel(1, 1));
5322	if (!tcu::compareThreshold(upperLeft, instance.outputColors[0], threshold))
5323		return TestStatus::fail("Upper left corner mismatch");
5324
5325	const RGBA upperRight(pixelBuffer.getPixel(pixelBuffer.getWidth() - 1, 1));
5326	if (!tcu::compareThreshold(upperRight, instance.outputColors[1], threshold))
5327		return TestStatus::fail("Upper right corner mismatch");
5328
5329	const RGBA lowerLeft(pixelBuffer.getPixel(1, pixelBuffer.getHeight() - 1));
5330	if (!tcu::compareThreshold(lowerLeft, instance.outputColors[2], threshold))
5331		return TestStatus::fail("Lower left corner mismatch");
5332
5333	const RGBA lowerRight(pixelBuffer.getPixel(pixelBuffer.getWidth() - 1, pixelBuffer.getHeight() - 1));
5334	if (!tcu::compareThreshold(lowerRight, instance.outputColors[3], threshold))
5335		return TestStatus::fail("Lower right corner mismatch");
5336
5337	return TestStatus::pass("Rendered output matches input");
5338}
5339
5340void createTestsForAllStages (const std::string& name, const RGBA (&inputColors)[4], const RGBA (&outputColors)[4], const map<string, string>& testCodeFragments, const vector<deInt32>& specConstants, tcu::TestCaseGroup* tests)
5341{
5342	const ShaderElement		vertFragPipelineStages[]		=
5343	{
5344		ShaderElement("vert", "main", VK_SHADER_STAGE_VERTEX_BIT),
5345		ShaderElement("frag", "main", VK_SHADER_STAGE_FRAGMENT_BIT),
5346	};
5347
5348	const ShaderElement		tessPipelineStages[]			=
5349	{
5350		ShaderElement("vert", "main", VK_SHADER_STAGE_VERTEX_BIT),
5351		ShaderElement("tessc", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
5352		ShaderElement("tesse", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
5353		ShaderElement("frag", "main", VK_SHADER_STAGE_FRAGMENT_BIT),
5354	};
5355
5356	const ShaderElement		geomPipelineStages[]				=
5357	{
5358		ShaderElement("vert", "main", VK_SHADER_STAGE_VERTEX_BIT),
5359		ShaderElement("geom", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
5360		ShaderElement("frag", "main", VK_SHADER_STAGE_FRAGMENT_BIT),
5361	};
5362
5363	StageToSpecConstantMap	specConstantMap;
5364
5365	specConstantMap[VK_SHADER_STAGE_VERTEX_BIT] = specConstants;
5366	addFunctionCaseWithPrograms<InstanceContext>(tests, name + "_vert", "", addShaderCodeCustomVertex, runAndVerifyDefaultPipeline,
5367												 createInstanceContext(vertFragPipelineStages, inputColors, outputColors, testCodeFragments, specConstantMap));
5368
5369	specConstantMap.clear();
5370	specConstantMap[VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT] = specConstants;
5371	addFunctionCaseWithPrograms<InstanceContext>(tests, name + "_tessc", "", addShaderCodeCustomTessControl, runAndVerifyDefaultPipeline,
5372												 createInstanceContext(tessPipelineStages, inputColors, outputColors, testCodeFragments, specConstantMap));
5373
5374	specConstantMap.clear();
5375	specConstantMap[VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT] = specConstants;
5376	addFunctionCaseWithPrograms<InstanceContext>(tests, name + "_tesse", "", addShaderCodeCustomTessEval, runAndVerifyDefaultPipeline,
5377												 createInstanceContext(tessPipelineStages, inputColors, outputColors, testCodeFragments, specConstantMap));
5378
5379	specConstantMap.clear();
5380	specConstantMap[VK_SHADER_STAGE_GEOMETRY_BIT] = specConstants;
5381	addFunctionCaseWithPrograms<InstanceContext>(tests, name + "_geom", "", addShaderCodeCustomGeometry, runAndVerifyDefaultPipeline,
5382												 createInstanceContext(geomPipelineStages, inputColors, outputColors, testCodeFragments, specConstantMap));
5383
5384	specConstantMap.clear();
5385	specConstantMap[VK_SHADER_STAGE_FRAGMENT_BIT] = specConstants;
5386	addFunctionCaseWithPrograms<InstanceContext>(tests, name + "_frag", "", addShaderCodeCustomFragment, runAndVerifyDefaultPipeline,
5387												 createInstanceContext(vertFragPipelineStages, inputColors, outputColors, testCodeFragments, specConstantMap));
5388}
5389
5390inline void createTestsForAllStages (const std::string& name, const RGBA (&inputColors)[4], const RGBA (&outputColors)[4], const map<string, string>& testCodeFragments, tcu::TestCaseGroup* tests)
5391{
5392	vector<deInt32> noSpecConstants;
5393	createTestsForAllStages(name, inputColors, outputColors, testCodeFragments, noSpecConstants, tests);
5394}
5395
5396} // anonymous
5397
5398tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
5399{
5400	struct NameCodePair { string name, code; };
5401	RGBA							defaultColors[4];
5402	de::MovePtr<tcu::TestCaseGroup> opSourceTests			(new tcu::TestCaseGroup(testCtx, "opsource", "OpSource instruction"));
5403	const std::string				opsourceGLSLWithFile	= "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile ";
5404	map<string, string>				fragments				= passthruFragments();
5405	const NameCodePair				tests[]					=
5406	{
5407		{"unknown", "OpSource Unknown 321"},
5408		{"essl", "OpSource ESSL 310"},
5409		{"glsl", "OpSource GLSL 450"},
5410		{"opencl_cpp", "OpSource OpenCL_CPP 120"},
5411		{"opencl_c", "OpSource OpenCL_C 120"},
5412		{"multiple", "OpSource GLSL 450\nOpSource GLSL 450"},
5413		{"file", opsourceGLSLWithFile},
5414		{"source", opsourceGLSLWithFile + "\"void main(){}\""},
5415		// Longest possible source string: SPIR-V limits instructions to 65535
5416		// words, of which the first 4 are opsourceGLSLWithFile; the rest will
5417		// contain 65530 UTF8 characters (one word each) plus one last word
5418		// containing 3 ASCII characters and \0.
5419		{"longsource", opsourceGLSLWithFile + '"' + makeLongUTF8String(65530) + "ccc" + '"'}
5420	};
5421
5422	getDefaultColors(defaultColors);
5423	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
5424	{
5425		fragments["debug"] = tests[testNdx].code;
5426		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
5427	}
5428
5429	return opSourceTests.release();
5430}
5431
5432tcu::TestCaseGroup* createOpSourceContinuedTests (tcu::TestContext& testCtx)
5433{
5434	struct NameCodePair { string name, code; };
5435	RGBA								defaultColors[4];
5436	de::MovePtr<tcu::TestCaseGroup>		opSourceTests		(new tcu::TestCaseGroup(testCtx, "opsourcecontinued", "OpSourceContinued instruction"));
5437	map<string, string>					fragments			= passthruFragments();
5438	const std::string					opsource			= "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile \"void main(){}\"\n";
5439	const NameCodePair					tests[]				=
5440	{
5441		{"empty", opsource + "OpSourceContinued \"\""},
5442		{"short", opsource + "OpSourceContinued \"abcde\""},
5443		{"multiple", opsource + "OpSourceContinued \"abcde\"\nOpSourceContinued \"fghij\""},
5444		// Longest possible source string: SPIR-V limits instructions to 65535
5445		// words, of which the first one is OpSourceContinued/length; the rest
5446		// will contain 65533 UTF8 characters (one word each) plus one last word
5447		// containing 3 ASCII characters and \0.
5448		{"long", opsource + "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\""}
5449	};
5450
5451	getDefaultColors(defaultColors);
5452	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
5453	{
5454		fragments["debug"] = tests[testNdx].code;
5455		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
5456	}
5457
5458	return opSourceTests.release();
5459}
5460
5461tcu::TestCaseGroup* createOpNoLineTests(tcu::TestContext& testCtx)
5462{
5463	RGBA								 defaultColors[4];
5464	de::MovePtr<tcu::TestCaseGroup>		 opLineTests		 (new tcu::TestCaseGroup(testCtx, "opnoline", "OpNoLine instruction"));
5465	map<string, string>					 fragments;
5466	getDefaultColors(defaultColors);
5467	fragments["debug"]			=
5468		"%name = OpString \"name\"\n";
5469
5470	fragments["pre_main"]	=
5471		"OpNoLine\n"
5472		"OpNoLine\n"
5473		"OpLine %name 1 1\n"
5474		"OpNoLine\n"
5475		"OpLine %name 1 1\n"
5476		"OpLine %name 1 1\n"
5477		"%second_function = OpFunction %v4f32 None %v4f32_function\n"
5478		"OpNoLine\n"
5479		"OpLine %name 1 1\n"
5480		"OpNoLine\n"
5481		"OpLine %name 1 1\n"
5482		"OpLine %name 1 1\n"
5483		"%second_param1 = OpFunctionParameter %v4f32\n"
5484		"OpNoLine\n"
5485		"OpNoLine\n"
5486		"%label_secondfunction = OpLabel\n"
5487		"OpNoLine\n"
5488		"OpReturnValue %second_param1\n"
5489		"OpFunctionEnd\n"
5490		"OpNoLine\n"
5491		"OpNoLine\n";
5492
5493	fragments["testfun"]		=
5494		// A %test_code function that returns its argument unchanged.
5495		"OpNoLine\n"
5496		"OpNoLine\n"
5497		"OpLine %name 1 1\n"
5498		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
5499		"OpNoLine\n"
5500		"%param1 = OpFunctionParameter %v4f32\n"
5501		"OpNoLine\n"
5502		"OpNoLine\n"
5503		"%label_testfun = OpLabel\n"
5504		"OpNoLine\n"
5505		"%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
5506		"OpReturnValue %val1\n"
5507		"OpFunctionEnd\n"
5508		"OpLine %name 1 1\n"
5509		"OpNoLine\n";
5510
5511	createTestsForAllStages("opnoline", defaultColors, defaultColors, fragments, opLineTests.get());
5512
5513	return opLineTests.release();
5514}
5515
5516
5517tcu::TestCaseGroup* createOpLineTests(tcu::TestContext& testCtx)
5518{
5519	RGBA													defaultColors[4];
5520	de::MovePtr<tcu::TestCaseGroup>							opLineTests			(new tcu::TestCaseGroup(testCtx, "opline", "OpLine instruction"));
5521	map<string, string>										fragments;
5522	std::vector<std::pair<std::string, std::string> >		problemStrings;
5523
5524	problemStrings.push_back(std::make_pair<std::string, std::string>("empty_name", ""));
5525	problemStrings.push_back(std::make_pair<std::string, std::string>("short_name", "short_name"));
5526	problemStrings.push_back(std::make_pair<std::string, std::string>("long_name", makeLongUTF8String(65530) + "ccc"));
5527	getDefaultColors(defaultColors);
5528
5529	fragments["debug"]			=
5530		"%other_name = OpString \"other_name\"\n";
5531
5532	fragments["pre_main"]	=
5533		"OpLine %file_name 32 0\n"
5534		"OpLine %file_name 32 32\n"
5535		"OpLine %file_name 32 40\n"
5536		"OpLine %other_name 32 40\n"
5537		"OpLine %other_name 0 100\n"
5538		"OpLine %other_name 0 4294967295\n"
5539		"OpLine %other_name 4294967295 0\n"
5540		"OpLine %other_name 32 40\n"
5541		"OpLine %file_name 0 0\n"
5542		"%second_function = OpFunction %v4f32 None %v4f32_function\n"
5543		"OpLine %file_name 1 0\n"
5544		"%second_param1 = OpFunctionParameter %v4f32\n"
5545		"OpLine %file_name 1 3\n"
5546		"OpLine %file_name 1 2\n"
5547		"%label_secondfunction = OpLabel\n"
5548		"OpLine %file_name 0 2\n"
5549		"OpReturnValue %second_param1\n"
5550		"OpFunctionEnd\n"
5551		"OpLine %file_name 0 2\n"
5552		"OpLine %file_name 0 2\n";
5553
5554	fragments["testfun"]		=
5555		// A %test_code function that returns its argument unchanged.
5556		"OpLine %file_name 1 0\n"
5557		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
5558		"OpLine %file_name 16 330\n"
5559		"%param1 = OpFunctionParameter %v4f32\n"
5560		"OpLine %file_name 14 442\n"
5561		"%label_testfun = OpLabel\n"
5562		"OpLine %file_name 11 1024\n"
5563		"%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
5564		"OpLine %file_name 2 97\n"
5565		"OpReturnValue %val1\n"
5566		"OpFunctionEnd\n"
5567		"OpLine %file_name 5 32\n";
5568
5569	for (size_t i = 0; i < problemStrings.size(); ++i)
5570	{
5571		map<string, string> testFragments = fragments;
5572		testFragments["debug"] += "%file_name = OpString \"" + problemStrings[i].second + "\"\n";
5573		createTestsForAllStages(string("opline") + "_" + problemStrings[i].first, defaultColors, defaultColors, testFragments, opLineTests.get());
5574	}
5575
5576	return opLineTests.release();
5577}
5578
5579tcu::TestCaseGroup* createOpConstantNullTests(tcu::TestContext& testCtx)
5580{
5581	de::MovePtr<tcu::TestCaseGroup> opConstantNullTests		(new tcu::TestCaseGroup(testCtx, "opconstantnull", "OpConstantNull instruction"));
5582	RGBA							colors[4];
5583
5584
5585	const char						functionStart[] =
5586		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
5587		"%param1 = OpFunctionParameter %v4f32\n"
5588		"%lbl    = OpLabel\n";
5589
5590	const char						functionEnd[]	=
5591		"OpReturnValue %transformed_param\n"
5592		"OpFunctionEnd\n";
5593
5594	struct NameConstantsCode
5595	{
5596		string name;
5597		string constants;
5598		string code;
5599	};
5600
5601	NameConstantsCode tests[] =
5602	{
5603		{
5604			"vec4",
5605			"%cnull = OpConstantNull %v4f32\n",
5606			"%transformed_param = OpFAdd %v4f32 %param1 %cnull\n"
5607		},
5608		{
5609			"float",
5610			"%cnull = OpConstantNull %f32\n",
5611			"%vp = OpVariable %fp_v4f32 Function\n"
5612			"%v  = OpLoad %v4f32 %vp\n"
5613			"%v0 = OpVectorInsertDynamic %v4f32 %v %cnull %c_i32_0\n"
5614			"%v1 = OpVectorInsertDynamic %v4f32 %v0 %cnull %c_i32_1\n"
5615			"%v2 = OpVectorInsertDynamic %v4f32 %v1 %cnull %c_i32_2\n"
5616			"%v3 = OpVectorInsertDynamic %v4f32 %v2 %cnull %c_i32_3\n"
5617			"%transformed_param = OpFAdd %v4f32 %param1 %v3\n"
5618		},
5619		{
5620			"bool",
5621			"%cnull             = OpConstantNull %bool\n",
5622			"%v                 = OpVariable %fp_v4f32 Function\n"
5623			"                     OpStore %v %param1\n"
5624			"                     OpSelectionMerge %false_label None\n"
5625			"                     OpBranchConditional %cnull %true_label %false_label\n"
5626			"%true_label        = OpLabel\n"
5627			"                     OpStore %v %c_v4f32_0_5_0_5_0_5_0_5\n"
5628			"                     OpBranch %false_label\n"
5629			"%false_label       = OpLabel\n"
5630			"%transformed_param = OpLoad %v4f32 %v\n"
5631		},
5632		{
5633			"i32",
5634			"%cnull             = OpConstantNull %i32\n",
5635			"%v                 = OpVariable %fp_v4f32 Function %c_v4f32_0_5_0_5_0_5_0_5\n"
5636			"%b                 = OpIEqual %bool %cnull %c_i32_0\n"
5637			"                     OpSelectionMerge %false_label None\n"
5638			"                     OpBranchConditional %b %true_label %false_label\n"
5639			"%true_label        = OpLabel\n"
5640			"                     OpStore %v %param1\n"
5641			"                     OpBranch %false_label\n"
5642			"%false_label       = OpLabel\n"
5643			"%transformed_param = OpLoad %v4f32 %v\n"
5644		},
5645		{
5646			"struct",
5647			"%stype             = OpTypeStruct %f32 %v4f32\n"
5648			"%fp_stype          = OpTypePointer Function %stype\n"
5649			"%cnull             = OpConstantNull %stype\n",
5650			"%v                 = OpVariable %fp_stype Function %cnull\n"
5651			"%f                 = OpAccessChain %fp_v4f32 %v %c_i32_1\n"
5652			"%f_val             = OpLoad %v4f32 %f\n"
5653			"%transformed_param = OpFAdd %v4f32 %param1 %f_val\n"
5654		},
5655		{
5656			"array",
5657			"%a4_v4f32          = OpTypeArray %v4f32 %c_u32_4\n"
5658			"%fp_a4_v4f32       = OpTypePointer Function %a4_v4f32\n"
5659			"%cnull             = OpConstantNull %a4_v4f32\n",
5660			"%v                 = OpVariable %fp_a4_v4f32 Function %cnull\n"
5661			"%f                 = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
5662			"%f1                = OpAccessChain %fp_v4f32 %v %c_u32_1\n"
5663			"%f2                = OpAccessChain %fp_v4f32 %v %c_u32_2\n"
5664			"%f3                = OpAccessChain %fp_v4f32 %v %c_u32_3\n"
5665			"%f_val             = OpLoad %v4f32 %f\n"
5666			"%f1_val            = OpLoad %v4f32 %f1\n"
5667			"%f2_val            = OpLoad %v4f32 %f2\n"
5668			"%f3_val            = OpLoad %v4f32 %f3\n"
5669			"%t0                = OpFAdd %v4f32 %param1 %f_val\n"
5670			"%t1                = OpFAdd %v4f32 %t0 %f1_val\n"
5671			"%t2                = OpFAdd %v4f32 %t1 %f2_val\n"
5672			"%transformed_param = OpFAdd %v4f32 %t2 %f3_val\n"
5673		},
5674		{
5675			"matrix",
5676			"%mat4x4_f32        = OpTypeMatrix %v4f32 4\n"
5677			"%cnull             = OpConstantNull %mat4x4_f32\n",
5678			// Our null matrix * any vector should result in a zero vector.
5679			"%v                 = OpVectorTimesMatrix %v4f32 %param1 %cnull\n"
5680			"%transformed_param = OpFAdd %v4f32 %param1 %v\n"
5681		}
5682	};
5683
5684	getHalfColorsFullAlpha(colors);
5685
5686	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
5687	{
5688		map<string, string> fragments;
5689		fragments["pre_main"] = tests[testNdx].constants;
5690		fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
5691		createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, opConstantNullTests.get());
5692	}
5693	return opConstantNullTests.release();
5694}
5695tcu::TestCaseGroup* createOpConstantCompositeTests(tcu::TestContext& testCtx)
5696{
5697	de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests		(new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "OpConstantComposite instruction"));
5698	RGBA							inputColors[4];
5699	RGBA							outputColors[4];
5700
5701
5702	const char						functionStart[]	 =
5703		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
5704		"%param1 = OpFunctionParameter %v4f32\n"
5705		"%lbl    = OpLabel\n";
5706
5707	const char						functionEnd[]		=
5708		"OpReturnValue %transformed_param\n"
5709		"OpFunctionEnd\n";
5710
5711	struct NameConstantsCode
5712	{
5713		string name;
5714		string constants;
5715		string code;
5716	};
5717
5718	NameConstantsCode tests[] =
5719	{
5720		{
5721			"vec4",
5722
5723			"%cval              = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0\n",
5724			"%transformed_param = OpFAdd %v4f32 %param1 %cval\n"
5725		},
5726		{
5727			"struct",
5728
5729			"%stype             = OpTypeStruct %v4f32 %f32\n"
5730			"%fp_stype          = OpTypePointer Function %stype\n"
5731			"%f32_n_1           = OpConstant %f32 -1.0\n"
5732			"%f32_1_5           = OpConstant %f32 !0x3fc00000\n" // +1.5
5733			"%cvec              = OpConstantComposite %v4f32 %f32_1_5 %f32_1_5 %f32_1_5 %c_f32_1\n"
5734			"%cval              = OpConstantComposite %stype %cvec %f32_n_1\n",
5735
5736			"%v                 = OpVariable %fp_stype Function %cval\n"
5737			"%vec_ptr           = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
5738			"%f32_ptr           = OpAccessChain %fp_f32 %v %c_u32_1\n"
5739			"%vec_val           = OpLoad %v4f32 %vec_ptr\n"
5740			"%f32_val           = OpLoad %f32 %f32_ptr\n"
5741			"%tmp1              = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_1 %f32_val\n" // vec4(-1)
5742			"%tmp2              = OpFAdd %v4f32 %tmp1 %param1\n" // param1 + vec4(-1)
5743			"%transformed_param = OpFAdd %v4f32 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
5744		},
5745		{
5746			// [1|0|0|0.5] [x] = x + 0.5
5747			// [0|1|0|0.5] [y] = y + 0.5
5748			// [0|0|1|0.5] [z] = z + 0.5
5749			// [0|0|0|1  ] [1] = 1
5750			"matrix",
5751
5752			"%mat4x4_f32          = OpTypeMatrix %v4f32 4\n"
5753		    "%v4f32_1_0_0_0       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_0\n"
5754		    "%v4f32_0_1_0_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_1 %c_f32_0 %c_f32_0\n"
5755		    "%v4f32_0_0_1_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_1 %c_f32_0\n"
5756		    "%v4f32_0_5_0_5_0_5_1 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_1\n"
5757			"%cval                = OpConstantComposite %mat4x4_f32 %v4f32_1_0_0_0 %v4f32_0_1_0_0 %v4f32_0_0_1_0 %v4f32_0_5_0_5_0_5_1\n",
5758
5759			"%transformed_param   = OpMatrixTimesVector %v4f32 %cval %param1\n"
5760		},
5761		{
5762			"array",
5763
5764			"%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
5765			"%fp_a4f32            = OpTypePointer Function %a4f32\n"
5766			"%f32_n_1             = OpConstant %f32 -1.0\n"
5767			"%f32_1_5             = OpConstant %f32 !0x3fc00000\n" // +1.5
5768			"%carr                = OpConstantComposite %a4f32 %c_f32_0 %f32_n_1 %f32_1_5 %c_f32_0\n",
5769
5770			"%v                   = OpVariable %fp_a4f32 Function %carr\n"
5771			"%f                   = OpAccessChain %fp_f32 %v %c_u32_0\n"
5772			"%f1                  = OpAccessChain %fp_f32 %v %c_u32_1\n"
5773			"%f2                  = OpAccessChain %fp_f32 %v %c_u32_2\n"
5774			"%f3                  = OpAccessChain %fp_f32 %v %c_u32_3\n"
5775			"%f_val               = OpLoad %f32 %f\n"
5776			"%f1_val              = OpLoad %f32 %f1\n"
5777			"%f2_val              = OpLoad %f32 %f2\n"
5778			"%f3_val              = OpLoad %f32 %f3\n"
5779			"%ftot1               = OpFAdd %f32 %f_val %f1_val\n"
5780			"%ftot2               = OpFAdd %f32 %ftot1 %f2_val\n"
5781			"%ftot3               = OpFAdd %f32 %ftot2 %f3_val\n"  // 0 - 1 + 1.5 + 0
5782			"%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %ftot3\n"
5783			"%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
5784		},
5785		{
5786			//
5787			// [
5788			//   {
5789			//      0.0,
5790			//      [ 1.0, 1.0, 1.0, 1.0]
5791			//   },
5792			//   {
5793			//      1.0,
5794			//      [ 0.0, 0.5, 0.0, 0.0]
5795			//   }, //     ^^^
5796			//   {
5797			//      0.0,
5798			//      [ 1.0, 1.0, 1.0, 1.0]
5799			//   }
5800			// ]
5801			"array_of_struct_of_array",
5802
5803			"%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
5804			"%fp_a4f32            = OpTypePointer Function %a4f32\n"
5805			"%stype               = OpTypeStruct %f32 %a4f32\n"
5806			"%a3stype             = OpTypeArray %stype %c_u32_3\n"
5807			"%fp_a3stype          = OpTypePointer Function %a3stype\n"
5808			"%ca4f32_0            = OpConstantComposite %a4f32 %c_f32_0 %c_f32_0_5 %c_f32_0 %c_f32_0\n"
5809			"%ca4f32_1            = OpConstantComposite %a4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
5810			"%cstype1             = OpConstantComposite %stype %c_f32_0 %ca4f32_1\n"
5811			"%cstype2             = OpConstantComposite %stype %c_f32_1 %ca4f32_0\n"
5812			"%carr                = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
5813
5814			"%v                   = OpVariable %fp_a3stype Function %carr\n"
5815			"%f                   = OpAccessChain %fp_f32 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
5816			"%f_l                 = OpLoad %f32 %f\n"
5817			"%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %f_l\n"
5818			"%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
5819		}
5820	};
5821
5822	getHalfColorsFullAlpha(inputColors);
5823	outputColors[0] = RGBA(255, 255, 255, 255);
5824	outputColors[1] = RGBA(255, 127, 127, 255);
5825	outputColors[2] = RGBA(127, 255, 127, 255);
5826	outputColors[3] = RGBA(127, 127, 255, 255);
5827
5828	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
5829	{
5830		map<string, string> fragments;
5831		fragments["pre_main"] = tests[testNdx].constants;
5832		fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
5833		createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, opConstantCompositeTests.get());
5834	}
5835	return opConstantCompositeTests.release();
5836}
5837
5838tcu::TestCaseGroup* createSelectionBlockOrderTests(tcu::TestContext& testCtx)
5839{
5840	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "selection_block_order", "Out-of-order blocks for selection"));
5841	RGBA							inputColors[4];
5842	RGBA							outputColors[4];
5843	map<string, string>				fragments;
5844
5845	// vec4 test_code(vec4 param) {
5846	//   vec4 result = param;
5847	//   for (int i = 0; i < 4; ++i) {
5848	//     if (i == 0) result[i] = 0.;
5849	//     else        result[i] = 1. - result[i];
5850	//   }
5851	//   return result;
5852	// }
5853	const char						function[]			=
5854		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
5855		"%param1    = OpFunctionParameter %v4f32\n"
5856		"%lbl       = OpLabel\n"
5857		"%iptr      = OpVariable %fp_i32 Function\n"
5858		"%result    = OpVariable %fp_v4f32 Function\n"
5859		"             OpStore %iptr %c_i32_0\n"
5860		"             OpStore %result %param1\n"
5861		"             OpBranch %loop\n"
5862
5863		// Loop entry block.
5864		"%loop      = OpLabel\n"
5865		"%ival      = OpLoad %i32 %iptr\n"
5866		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
5867		"             OpLoopMerge %exit %loop None\n"
5868		"             OpBranchConditional %lt_4 %if_entry %exit\n"
5869
5870		// Merge block for loop.
5871		"%exit      = OpLabel\n"
5872		"%ret       = OpLoad %v4f32 %result\n"
5873		"             OpReturnValue %ret\n"
5874
5875		// If-statement entry block.
5876		"%if_entry  = OpLabel\n"
5877		"%loc       = OpAccessChain %fp_f32 %result %ival\n"
5878		"%eq_0      = OpIEqual %bool %ival %c_i32_0\n"
5879		"             OpSelectionMerge %if_exit None\n"
5880		"             OpBranchConditional %eq_0 %if_true %if_false\n"
5881
5882		// False branch for if-statement.
5883		"%if_false  = OpLabel\n"
5884		"%val       = OpLoad %f32 %loc\n"
5885		"%sub       = OpFSub %f32 %c_f32_1 %val\n"
5886		"             OpStore %loc %sub\n"
5887		"             OpBranch %if_exit\n"
5888
5889		// Merge block for if-statement.
5890		"%if_exit   = OpLabel\n"
5891		"%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
5892		"             OpStore %iptr %ival_next\n"
5893		"             OpBranch %loop\n"
5894
5895		// True branch for if-statement.
5896		"%if_true   = OpLabel\n"
5897		"             OpStore %loc %c_f32_0\n"
5898		"             OpBranch %if_exit\n"
5899
5900		"             OpFunctionEnd\n";
5901
5902	fragments["testfun"]	= function;
5903
5904	inputColors[0]			= RGBA(127, 127, 127, 0);
5905	inputColors[1]			= RGBA(127, 0,   0,   0);
5906	inputColors[2]			= RGBA(0,   127, 0,   0);
5907	inputColors[3]			= RGBA(0,   0,   127, 0);
5908
5909	outputColors[0]			= RGBA(0, 128, 128, 255);
5910	outputColors[1]			= RGBA(0, 255, 255, 255);
5911	outputColors[2]			= RGBA(0, 128, 255, 255);
5912	outputColors[3]			= RGBA(0, 255, 128, 255);
5913
5914	createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
5915
5916	return group.release();
5917}
5918
5919tcu::TestCaseGroup* createSwitchBlockOrderTests(tcu::TestContext& testCtx)
5920{
5921	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "switch_block_order", "Out-of-order blocks for switch"));
5922	RGBA							inputColors[4];
5923	RGBA							outputColors[4];
5924	map<string, string>				fragments;
5925
5926	const char						typesAndConstants[]	=
5927		"%c_f32_p2  = OpConstant %f32 0.2\n"
5928		"%c_f32_p4  = OpConstant %f32 0.4\n"
5929		"%c_f32_p6  = OpConstant %f32 0.6\n"
5930		"%c_f32_p8  = OpConstant %f32 0.8\n";
5931
5932	// vec4 test_code(vec4 param) {
5933	//   vec4 result = param;
5934	//   for (int i = 0; i < 4; ++i) {
5935	//     switch (i) {
5936	//       case 0: result[i] += .2; break;
5937	//       case 1: result[i] += .6; break;
5938	//       case 2: result[i] += .4; break;
5939	//       case 3: result[i] += .8; break;
5940	//       default: break; // unreachable
5941	//     }
5942	//   }
5943	//   return result;
5944	// }
5945	const char						function[]			=
5946		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
5947		"%param1    = OpFunctionParameter %v4f32\n"
5948		"%lbl       = OpLabel\n"
5949		"%iptr      = OpVariable %fp_i32 Function\n"
5950		"%result    = OpVariable %fp_v4f32 Function\n"
5951		"             OpStore %iptr %c_i32_0\n"
5952		"             OpStore %result %param1\n"
5953		"             OpBranch %loop\n"
5954
5955		// Loop entry block.
5956		"%loop      = OpLabel\n"
5957		"%ival      = OpLoad %i32 %iptr\n"
5958		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
5959		"             OpLoopMerge %exit %loop None\n"
5960		"             OpBranchConditional %lt_4 %switch_entry %exit\n"
5961
5962		// Merge block for loop.
5963		"%exit      = OpLabel\n"
5964		"%ret       = OpLoad %v4f32 %result\n"
5965		"             OpReturnValue %ret\n"
5966
5967		// Switch-statement entry block.
5968		"%switch_entry   = OpLabel\n"
5969		"%loc            = OpAccessChain %fp_f32 %result %ival\n"
5970		"%val            = OpLoad %f32 %loc\n"
5971		"                  OpSelectionMerge %switch_exit None\n"
5972		"                  OpSwitch %ival %switch_default 0 %case0 1 %case1 2 %case2 3 %case3\n"
5973
5974		"%case2          = OpLabel\n"
5975		"%addp4          = OpFAdd %f32 %val %c_f32_p4\n"
5976		"                  OpStore %loc %addp4\n"
5977		"                  OpBranch %switch_exit\n"
5978
5979		"%switch_default = OpLabel\n"
5980		"                  OpUnreachable\n"
5981
5982		"%case3          = OpLabel\n"
5983		"%addp8          = OpFAdd %f32 %val %c_f32_p8\n"
5984		"                  OpStore %loc %addp8\n"
5985		"                  OpBranch %switch_exit\n"
5986
5987		"%case0          = OpLabel\n"
5988		"%addp2          = OpFAdd %f32 %val %c_f32_p2\n"
5989		"                  OpStore %loc %addp2\n"
5990		"                  OpBranch %switch_exit\n"
5991
5992		// Merge block for switch-statement.
5993		"%switch_exit    = OpLabel\n"
5994		"%ival_next      = OpIAdd %i32 %ival %c_i32_1\n"
5995		"                  OpStore %iptr %ival_next\n"
5996		"                  OpBranch %loop\n"
5997
5998		"%case1          = OpLabel\n"
5999		"%addp6          = OpFAdd %f32 %val %c_f32_p6\n"
6000		"                  OpStore %loc %addp6\n"
6001		"                  OpBranch %switch_exit\n"
6002
6003		"                  OpFunctionEnd\n";
6004
6005	fragments["pre_main"]	= typesAndConstants;
6006	fragments["testfun"]	= function;
6007
6008	inputColors[0]			= RGBA(127, 27,  127, 51);
6009	inputColors[1]			= RGBA(127, 0,   0,   51);
6010	inputColors[2]			= RGBA(0,   27,  0,   51);
6011	inputColors[3]			= RGBA(0,   0,   127, 51);
6012
6013	outputColors[0]			= RGBA(178, 180, 229, 255);
6014	outputColors[1]			= RGBA(178, 153, 102, 255);
6015	outputColors[2]			= RGBA(51,  180, 102, 255);
6016	outputColors[3]			= RGBA(51,  153, 229, 255);
6017
6018	createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
6019
6020	return group.release();
6021}
6022
6023tcu::TestCaseGroup* createDecorationGroupTests(tcu::TestContext& testCtx)
6024{
6025	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "decoration_group", "Decoration group tests"));
6026	RGBA							inputColors[4];
6027	RGBA							outputColors[4];
6028	map<string, string>				fragments;
6029
6030	const char						decorations[]		=
6031		"OpDecorate %array_group         ArrayStride 4\n"
6032		"OpDecorate %struct_member_group Offset 0\n"
6033		"%array_group         = OpDecorationGroup\n"
6034		"%struct_member_group = OpDecorationGroup\n"
6035
6036		"OpDecorate %group1 RelaxedPrecision\n"
6037		"OpDecorate %group3 RelaxedPrecision\n"
6038		"OpDecorate %group3 Invariant\n"
6039		"OpDecorate %group3 Restrict\n"
6040		"%group0 = OpDecorationGroup\n"
6041		"%group1 = OpDecorationGroup\n"
6042		"%group3 = OpDecorationGroup\n";
6043
6044	const char						typesAndConstants[]	=
6045		"%a3f32     = OpTypeArray %f32 %c_u32_3\n"
6046		"%struct1   = OpTypeStruct %a3f32\n"
6047		"%struct2   = OpTypeStruct %a3f32\n"
6048		"%fp_struct1 = OpTypePointer Function %struct1\n"
6049		"%fp_struct2 = OpTypePointer Function %struct2\n"
6050		"%c_f32_2    = OpConstant %f32 2.\n"
6051		"%c_f32_n2   = OpConstant %f32 -2.\n"
6052
6053		"%c_a3f32_1 = OpConstantComposite %a3f32 %c_f32_1 %c_f32_2 %c_f32_1\n"
6054		"%c_a3f32_2 = OpConstantComposite %a3f32 %c_f32_n1 %c_f32_n2 %c_f32_n1\n"
6055		"%c_struct1 = OpConstantComposite %struct1 %c_a3f32_1\n"
6056		"%c_struct2 = OpConstantComposite %struct2 %c_a3f32_2\n";
6057
6058	const char						function[]			=
6059		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6060		"%param     = OpFunctionParameter %v4f32\n"
6061		"%entry     = OpLabel\n"
6062		"%result    = OpVariable %fp_v4f32 Function\n"
6063		"%v_struct1 = OpVariable %fp_struct1 Function\n"
6064		"%v_struct2 = OpVariable %fp_struct2 Function\n"
6065		"             OpStore %result %param\n"
6066		"             OpStore %v_struct1 %c_struct1\n"
6067		"             OpStore %v_struct2 %c_struct2\n"
6068		"%ptr1      = OpAccessChain %fp_f32 %v_struct1 %c_i32_0 %c_i32_2\n"
6069		"%val1      = OpLoad %f32 %ptr1\n"
6070		"%ptr2      = OpAccessChain %fp_f32 %v_struct2 %c_i32_0 %c_i32_2\n"
6071		"%val2      = OpLoad %f32 %ptr2\n"
6072		"%addvalues = OpFAdd %f32 %val1 %val2\n"
6073		"%ptr       = OpAccessChain %fp_f32 %result %c_i32_1\n"
6074		"%val       = OpLoad %f32 %ptr\n"
6075		"%addresult = OpFAdd %f32 %addvalues %val\n"
6076		"             OpStore %ptr %addresult\n"
6077		"%ret       = OpLoad %v4f32 %result\n"
6078		"             OpReturnValue %ret\n"
6079		"             OpFunctionEnd\n";
6080
6081	struct CaseNameDecoration
6082	{
6083		string name;
6084		string decoration;
6085	};
6086
6087	CaseNameDecoration tests[] =
6088	{
6089		{
6090			"same_decoration_group_on_multiple_types",
6091			"OpGroupMemberDecorate %struct_member_group %struct1 0 %struct2 0\n"
6092		},
6093		{
6094			"empty_decoration_group",
6095			"OpGroupDecorate %group0      %a3f32\n"
6096			"OpGroupDecorate %group0      %result\n"
6097		},
6098		{
6099			"one_element_decoration_group",
6100			"OpGroupDecorate %array_group %a3f32\n"
6101		},
6102		{
6103			"multiple_elements_decoration_group",
6104			"OpGroupDecorate %group3      %v_struct1\n"
6105		},
6106		{
6107			"multiple_decoration_groups_on_same_variable",
6108			"OpGroupDecorate %group0      %v_struct2\n"
6109			"OpGroupDecorate %group1      %v_struct2\n"
6110			"OpGroupDecorate %group3      %v_struct2\n"
6111		},
6112		{
6113			"same_decoration_group_multiple_times",
6114			"OpGroupDecorate %group1      %addvalues\n"
6115			"OpGroupDecorate %group1      %addvalues\n"
6116			"OpGroupDecorate %group1      %addvalues\n"
6117		},
6118
6119	};
6120
6121	getHalfColorsFullAlpha(inputColors);
6122	getHalfColorsFullAlpha(outputColors);
6123
6124	for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
6125	{
6126		fragments["decoration"]	= decorations + tests[idx].decoration;
6127		fragments["pre_main"]	= typesAndConstants;
6128		fragments["testfun"]	= function;
6129
6130		createTestsForAllStages(tests[idx].name, inputColors, outputColors, fragments, group.get());
6131	}
6132
6133	return group.release();
6134}
6135
6136struct SpecConstantTwoIntGraphicsCase
6137{
6138	const char*		caseName;
6139	const char*		scDefinition0;
6140	const char*		scDefinition1;
6141	const char*		scResultType;
6142	const char*		scOperation;
6143	deInt32			scActualValue0;
6144	deInt32			scActualValue1;
6145	const char*		resultOperation;
6146	RGBA			expectedColors[4];
6147
6148					SpecConstantTwoIntGraphicsCase (const char* name,
6149											const char* definition0,
6150											const char* definition1,
6151											const char* resultType,
6152											const char* operation,
6153											deInt32		value0,
6154											deInt32		value1,
6155											const char* resultOp,
6156											const RGBA	(&output)[4])
6157						: caseName			(name)
6158						, scDefinition0		(definition0)
6159						, scDefinition1		(definition1)
6160						, scResultType		(resultType)
6161						, scOperation		(operation)
6162						, scActualValue0	(value0)
6163						, scActualValue1	(value1)
6164						, resultOperation	(resultOp)
6165	{
6166		expectedColors[0] = output[0];
6167		expectedColors[1] = output[1];
6168		expectedColors[2] = output[2];
6169		expectedColors[3] = output[3];
6170	}
6171};
6172
6173tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
6174{
6175	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
6176	vector<SpecConstantTwoIntGraphicsCase>	cases;
6177	RGBA							inputColors[4];
6178	RGBA							outputColors0[4];
6179	RGBA							outputColors1[4];
6180	RGBA							outputColors2[4];
6181
6182	const char	decorations1[]			=
6183		"OpDecorate %sc_0  SpecId 0\n"
6184		"OpDecorate %sc_1  SpecId 1\n";
6185
6186	const char	typesAndConstants1[]	=
6187		"%sc_0      = OpSpecConstant${SC_DEF0}\n"
6188		"%sc_1      = OpSpecConstant${SC_DEF1}\n"
6189		"%sc_op     = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
6190
6191	const char	function1[]				=
6192		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6193		"%param     = OpFunctionParameter %v4f32\n"
6194		"%label     = OpLabel\n"
6195		"%result    = OpVariable %fp_v4f32 Function\n"
6196		"             OpStore %result %param\n"
6197		"%gen       = ${GEN_RESULT}\n"
6198		"%index     = OpIAdd %i32 %gen %c_i32_1\n"
6199		"%loc       = OpAccessChain %fp_f32 %result %index\n"
6200		"%val       = OpLoad %f32 %loc\n"
6201		"%add       = OpFAdd %f32 %val %c_f32_0_5\n"
6202		"             OpStore %loc %add\n"
6203		"%ret       = OpLoad %v4f32 %result\n"
6204		"             OpReturnValue %ret\n"
6205		"             OpFunctionEnd\n";
6206
6207	inputColors[0] = RGBA(127, 127, 127, 255);
6208	inputColors[1] = RGBA(127, 0,   0,   255);
6209	inputColors[2] = RGBA(0,   127, 0,   255);
6210	inputColors[3] = RGBA(0,   0,   127, 255);
6211
6212	// Derived from inputColors[x] by adding 128 to inputColors[x][0].
6213	outputColors0[0] = RGBA(255, 127, 127, 255);
6214	outputColors0[1] = RGBA(255, 0,   0,   255);
6215	outputColors0[2] = RGBA(128, 127, 0,   255);
6216	outputColors0[3] = RGBA(128, 0,   127, 255);
6217
6218	// Derived from inputColors[x] by adding 128 to inputColors[x][1].
6219	outputColors1[0] = RGBA(127, 255, 127, 255);
6220	outputColors1[1] = RGBA(127, 128, 0,   255);
6221	outputColors1[2] = RGBA(0,   255, 0,   255);
6222	outputColors1[3] = RGBA(0,   128, 127, 255);
6223
6224	// Derived from inputColors[x] by adding 128 to inputColors[x][2].
6225	outputColors2[0] = RGBA(127, 127, 255, 255);
6226	outputColors2[1] = RGBA(127, 0,   128, 255);
6227	outputColors2[2] = RGBA(0,   127, 128, 255);
6228	outputColors2[3] = RGBA(0,   0,   255, 255);
6229
6230	const char addZeroToSc[]		= "OpIAdd %i32 %c_i32_0 %sc_op";
6231	const char selectTrueUsingSc[]	= "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
6232	const char selectFalseUsingSc[]	= "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
6233
6234	cases.push_back(SpecConstantTwoIntGraphicsCase("iadd",					" %i32 0",		" %i32 0",		"%i32",		"IAdd                 %sc_0 %sc_1",				19,		-20,	addZeroToSc,		outputColors0));
6235	cases.push_back(SpecConstantTwoIntGraphicsCase("isub",					" %i32 0",		" %i32 0",		"%i32",		"ISub                 %sc_0 %sc_1",				19,		20,		addZeroToSc,		outputColors0));
6236	cases.push_back(SpecConstantTwoIntGraphicsCase("imul",					" %i32 0",		" %i32 0",		"%i32",		"IMul                 %sc_0 %sc_1",				-1,		-1,		addZeroToSc,		outputColors2));
6237	cases.push_back(SpecConstantTwoIntGraphicsCase("sdiv",					" %i32 0",		" %i32 0",		"%i32",		"SDiv                 %sc_0 %sc_1",				-126,	126,	addZeroToSc,		outputColors0));
6238	cases.push_back(SpecConstantTwoIntGraphicsCase("udiv",					" %i32 0",		" %i32 0",		"%i32",		"UDiv                 %sc_0 %sc_1",				126,	126,	addZeroToSc,		outputColors2));
6239	cases.push_back(SpecConstantTwoIntGraphicsCase("srem",					" %i32 0",		" %i32 0",		"%i32",		"SRem                 %sc_0 %sc_1",				3,		2,		addZeroToSc,		outputColors2));
6240	cases.push_back(SpecConstantTwoIntGraphicsCase("smod",					" %i32 0",		" %i32 0",		"%i32",		"SMod                 %sc_0 %sc_1",				3,		2,		addZeroToSc,		outputColors2));
6241	cases.push_back(SpecConstantTwoIntGraphicsCase("umod",					" %i32 0",		" %i32 0",		"%i32",		"UMod                 %sc_0 %sc_1",				1001,	500,	addZeroToSc,		outputColors2));
6242	cases.push_back(SpecConstantTwoIntGraphicsCase("bitwiseand",			" %i32 0",		" %i32 0",		"%i32",		"BitwiseAnd           %sc_0 %sc_1",				0x33,	0x0d,	addZeroToSc,		outputColors2));
6243	cases.push_back(SpecConstantTwoIntGraphicsCase("bitwiseor",				" %i32 0",		" %i32 0",		"%i32",		"BitwiseOr            %sc_0 %sc_1",				0,		1,		addZeroToSc,		outputColors2));
6244	cases.push_back(SpecConstantTwoIntGraphicsCase("bitwisexor",			" %i32 0",		" %i32 0",		"%i32",		"BitwiseXor           %sc_0 %sc_1",				0x2e,	0x2f,	addZeroToSc,		outputColors2));
6245	cases.push_back(SpecConstantTwoIntGraphicsCase("shiftrightlogical",		" %i32 0",		" %i32 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",				2,		1,		addZeroToSc,		outputColors2));
6246	cases.push_back(SpecConstantTwoIntGraphicsCase("shiftrightarithmetic",	" %i32 0",		" %i32 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",				-4,		2,		addZeroToSc,		outputColors0));
6247	cases.push_back(SpecConstantTwoIntGraphicsCase("shiftleftlogical",		" %i32 0",		" %i32 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",				1,		0,		addZeroToSc,		outputColors2));
6248	cases.push_back(SpecConstantTwoIntGraphicsCase("slessthan",				" %i32 0",		" %i32 0",		"%bool",	"SLessThan            %sc_0 %sc_1",				-20,	-10,	selectTrueUsingSc,	outputColors2));
6249	cases.push_back(SpecConstantTwoIntGraphicsCase("ulessthan",				" %i32 0",		" %i32 0",		"%bool",	"ULessThan            %sc_0 %sc_1",				10,		20,		selectTrueUsingSc,	outputColors2));
6250	cases.push_back(SpecConstantTwoIntGraphicsCase("sgreaterthan",			" %i32 0",		" %i32 0",		"%bool",	"SGreaterThan         %sc_0 %sc_1",				-1000,	50,		selectFalseUsingSc,	outputColors2));
6251	cases.push_back(SpecConstantTwoIntGraphicsCase("ugreaterthan",			" %i32 0",		" %i32 0",		"%bool",	"UGreaterThan         %sc_0 %sc_1",				10,		5,		selectTrueUsingSc,	outputColors2));
6252	cases.push_back(SpecConstantTwoIntGraphicsCase("slessthanequal",		" %i32 0",		" %i32 0",		"%bool",	"SLessThanEqual       %sc_0 %sc_1",				-10,	-10,	selectTrueUsingSc,	outputColors2));
6253	cases.push_back(SpecConstantTwoIntGraphicsCase("ulessthanequal",		" %i32 0",		" %i32 0",		"%bool",	"ULessThanEqual       %sc_0 %sc_1",				50,		100,	selectTrueUsingSc,	outputColors2));
6254	cases.push_back(SpecConstantTwoIntGraphicsCase("sgreaterthanequal",		" %i32 0",		" %i32 0",		"%bool",	"SGreaterThanEqual    %sc_0 %sc_1",				-1000,	50,		selectFalseUsingSc,	outputColors2));
6255	cases.push_back(SpecConstantTwoIntGraphicsCase("ugreaterthanequal",		" %i32 0",		" %i32 0",		"%bool",	"UGreaterThanEqual    %sc_0 %sc_1",				10,		10,		selectTrueUsingSc,	outputColors2));
6256	cases.push_back(SpecConstantTwoIntGraphicsCase("iequal",				" %i32 0",		" %i32 0",		"%bool",	"IEqual               %sc_0 %sc_1",				42,		24,		selectFalseUsingSc,	outputColors2));
6257	cases.push_back(SpecConstantTwoIntGraphicsCase("logicaland",			"True %bool",	"True %bool",	"%bool",	"LogicalAnd           %sc_0 %sc_1",				0,		1,		selectFalseUsingSc,	outputColors2));
6258	cases.push_back(SpecConstantTwoIntGraphicsCase("logicalor",				"False %bool",	"False %bool",	"%bool",	"LogicalOr            %sc_0 %sc_1",				1,		0,		selectTrueUsingSc,	outputColors2));
6259	cases.push_back(SpecConstantTwoIntGraphicsCase("logicalequal",			"True %bool",	"True %bool",	"%bool",	"LogicalEqual         %sc_0 %sc_1",				0,		1,		selectFalseUsingSc,	outputColors2));
6260	cases.push_back(SpecConstantTwoIntGraphicsCase("logicalnotequal",		"False %bool",	"False %bool",	"%bool",	"LogicalNotEqual      %sc_0 %sc_1",				1,		0,		selectTrueUsingSc,	outputColors2));
6261	cases.push_back(SpecConstantTwoIntGraphicsCase("snegate",				" %i32 0",		" %i32 0",		"%i32",		"SNegate              %sc_0",					-1,		0,		addZeroToSc,		outputColors2));
6262	cases.push_back(SpecConstantTwoIntGraphicsCase("not",					" %i32 0",		" %i32 0",		"%i32",		"Not                  %sc_0",					-2,		0,		addZeroToSc,		outputColors2));
6263	cases.push_back(SpecConstantTwoIntGraphicsCase("logicalnot",			"False %bool",	"False %bool",	"%bool",	"LogicalNot           %sc_0",					1,		0,		selectFalseUsingSc,	outputColors2));
6264	cases.push_back(SpecConstantTwoIntGraphicsCase("select",				"False %bool",	" %i32 0",		"%i32",		"Select               %sc_0 %sc_1 %c_i32_0",	1,		1,		addZeroToSc,		outputColors2));
6265	// OpSConvert, OpFConvert: these two instructions involve ints/floats of different bitwidths.
6266	// \todo[2015-12-1 antiagainst] OpQuantizeToF16
6267
6268	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6269	{
6270		map<string, string>	specializations;
6271		map<string, string>	fragments;
6272		vector<deInt32>		specConstants;
6273
6274		specializations["SC_DEF0"]			= cases[caseNdx].scDefinition0;
6275		specializations["SC_DEF1"]			= cases[caseNdx].scDefinition1;
6276		specializations["SC_RESULT_TYPE"]	= cases[caseNdx].scResultType;
6277		specializations["SC_OP"]			= cases[caseNdx].scOperation;
6278		specializations["GEN_RESULT"]		= cases[caseNdx].resultOperation;
6279
6280		fragments["decoration"]				= tcu::StringTemplate(decorations1).specialize(specializations);
6281		fragments["pre_main"]				= tcu::StringTemplate(typesAndConstants1).specialize(specializations);
6282		fragments["testfun"]				= tcu::StringTemplate(function1).specialize(specializations);
6283
6284		specConstants.push_back(cases[caseNdx].scActualValue0);
6285		specConstants.push_back(cases[caseNdx].scActualValue1);
6286
6287		createTestsForAllStages(cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants, group.get());
6288	}
6289
6290	const char	decorations2[]			=
6291		"OpDecorate %sc_0  SpecId 0\n"
6292		"OpDecorate %sc_1  SpecId 1\n"
6293		"OpDecorate %sc_2  SpecId 2\n";
6294
6295	const char	typesAndConstants2[]	=
6296		"%v3i32     = OpTypeVector %i32 3\n"
6297
6298		"%sc_0      = OpSpecConstant %i32 0\n"
6299		"%sc_1      = OpSpecConstant %i32 0\n"
6300		"%sc_2      = OpSpecConstant %i32 0\n"
6301
6302		"%vec3_0      = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
6303		"%sc_vec3_0   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_0        %vec3_0    0\n"     // (sc_0, 0, 0)
6304		"%sc_vec3_1   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_1        %vec3_0    1\n"     // (0, sc_1, 0)
6305		"%sc_vec3_2   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_2        %vec3_0    2\n"     // (0, 0, sc_2)
6306		"%sc_vec3_01  = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0   %sc_vec3_1 1 0 4\n" // (0,    sc_0, sc_1)
6307		"%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_01  %sc_vec3_2 5 1 2\n" // (sc_2, sc_0, sc_1)
6308		"%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            0\n"     // sc_2
6309		"%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            1\n"     // sc_0
6310		"%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            2\n"     // sc_1
6311		"%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"        // (sc_2 - sc_0)
6312		"%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n";       // (sc_2 - sc_0) * sc_1
6313
6314	const char	function2[]				=
6315		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6316		"%param     = OpFunctionParameter %v4f32\n"
6317		"%label     = OpLabel\n"
6318		"%result    = OpVariable %fp_v4f32 Function\n"
6319		"             OpStore %result %param\n"
6320		"%loc       = OpAccessChain %fp_f32 %result %sc_final\n"
6321		"%val       = OpLoad %f32 %loc\n"
6322		"%add       = OpFAdd %f32 %val %c_f32_0_5\n"
6323		"             OpStore %loc %add\n"
6324		"%ret       = OpLoad %v4f32 %result\n"
6325		"             OpReturnValue %ret\n"
6326		"             OpFunctionEnd\n";
6327
6328	map<string, string>	fragments;
6329	vector<deInt32>		specConstants;
6330
6331	fragments["decoration"]	= decorations2;
6332	fragments["pre_main"]	= typesAndConstants2;
6333	fragments["testfun"]	= function2;
6334
6335	specConstants.push_back(56789);
6336	specConstants.push_back(-2);
6337	specConstants.push_back(56788);
6338
6339	createTestsForAllStages("vector_related", inputColors, outputColors2, fragments, specConstants, group.get());
6340
6341	return group.release();
6342}
6343
6344tcu::TestCaseGroup* createOpPhiTests(tcu::TestContext& testCtx)
6345{
6346	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
6347	RGBA							inputColors[4];
6348	RGBA							outputColors1[4];
6349	RGBA							outputColors2[4];
6350	RGBA							outputColors3[4];
6351	map<string, string>				fragments1;
6352	map<string, string>				fragments2;
6353	map<string, string>				fragments3;
6354
6355	const char	typesAndConstants1[]	=
6356		"%c_f32_p2  = OpConstant %f32 0.2\n"
6357		"%c_f32_p4  = OpConstant %f32 0.4\n"
6358		"%c_f32_p5  = OpConstant %f32 0.5\n"
6359		"%c_f32_p8  = OpConstant %f32 0.8\n";
6360
6361	// vec4 test_code(vec4 param) {
6362	//   vec4 result = param;
6363	//   for (int i = 0; i < 4; ++i) {
6364	//     float operand;
6365	//     switch (i) {
6366	//       case 0: operand = .2; break;
6367	//       case 1: operand = .5; break;
6368	//       case 2: operand = .4; break;
6369	//       case 3: operand = .0; break;
6370	//       default: break; // unreachable
6371	//     }
6372	//     result[i] += operand;
6373	//   }
6374	//   return result;
6375	// }
6376	const char	function1[]				=
6377		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6378		"%param1    = OpFunctionParameter %v4f32\n"
6379		"%lbl       = OpLabel\n"
6380		"%iptr      = OpVariable %fp_i32 Function\n"
6381		"%result    = OpVariable %fp_v4f32 Function\n"
6382		"             OpStore %iptr %c_i32_0\n"
6383		"             OpStore %result %param1\n"
6384		"             OpBranch %loop\n"
6385
6386		"%loop      = OpLabel\n"
6387		"%ival      = OpLoad %i32 %iptr\n"
6388		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
6389		"             OpLoopMerge %exit %loop None\n"
6390		"             OpBranchConditional %lt_4 %entry %exit\n"
6391
6392		"%entry     = OpLabel\n"
6393		"%loc       = OpAccessChain %fp_f32 %result %ival\n"
6394		"%val       = OpLoad %f32 %loc\n"
6395		"             OpSelectionMerge %phi None\n"
6396		"             OpSwitch %ival %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
6397
6398		"%case0     = OpLabel\n"
6399		"             OpBranch %phi\n"
6400		"%case1     = OpLabel\n"
6401		"             OpBranch %phi\n"
6402		"%case2     = OpLabel\n"
6403		"             OpBranch %phi\n"
6404		"%case3     = OpLabel\n"
6405		"             OpBranch %phi\n"
6406
6407		"%default   = OpLabel\n"
6408		"             OpUnreachable\n"
6409
6410		"%phi       = OpLabel\n"
6411		"%operand   = OpPhi %f32 %c_f32_p4 %case2 %c_f32_p5 %case1 %c_f32_p2 %case0 %c_f32_0 %case3\n" // not in the order of blocks
6412		"%add       = OpFAdd %f32 %val %operand\n"
6413		"             OpStore %loc %add\n"
6414		"%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
6415		"             OpStore %iptr %ival_next\n"
6416		"             OpBranch %loop\n"
6417
6418		"%exit      = OpLabel\n"
6419		"%ret       = OpLoad %v4f32 %result\n"
6420		"             OpReturnValue %ret\n"
6421
6422		"             OpFunctionEnd\n";
6423
6424	fragments1["pre_main"]	= typesAndConstants1;
6425	fragments1["testfun"]	= function1;
6426
6427	getHalfColorsFullAlpha(inputColors);
6428
6429	outputColors1[0]		= RGBA(178, 255, 229, 255);
6430	outputColors1[1]		= RGBA(178, 127, 102, 255);
6431	outputColors1[2]		= RGBA(51,  255, 102, 255);
6432	outputColors1[3]		= RGBA(51,  127, 229, 255);
6433
6434	createTestsForAllStages("out_of_order", inputColors, outputColors1, fragments1, group.get());
6435
6436	const char	typesAndConstants2[]	=
6437		"%c_f32_p2  = OpConstant %f32 0.2\n";
6438
6439	// Add .4 to the second element of the given parameter.
6440	const char	function2[]				=
6441		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6442		"%param     = OpFunctionParameter %v4f32\n"
6443		"%entry     = OpLabel\n"
6444		"%result    = OpVariable %fp_v4f32 Function\n"
6445		"             OpStore %result %param\n"
6446		"%loc       = OpAccessChain %fp_f32 %result %c_i32_1\n"
6447		"%val       = OpLoad %f32 %loc\n"
6448		"             OpBranch %phi\n"
6449
6450		"%phi        = OpLabel\n"
6451		"%step       = OpPhi %i32 %c_i32_0  %entry %step_next  %phi\n"
6452		"%accum      = OpPhi %f32 %val      %entry %accum_next %phi\n"
6453		"%step_next  = OpIAdd %i32 %step  %c_i32_1\n"
6454		"%accum_next = OpFAdd %f32 %accum %c_f32_p2\n"
6455		"%still_loop = OpSLessThan %bool %step %c_i32_2\n"
6456		"              OpLoopMerge %exit %phi None\n"
6457		"              OpBranchConditional %still_loop %phi %exit\n"
6458
6459		"%exit       = OpLabel\n"
6460		"              OpStore %loc %accum\n"
6461		"%ret        = OpLoad %v4f32 %result\n"
6462		"              OpReturnValue %ret\n"
6463
6464		"              OpFunctionEnd\n";
6465
6466	fragments2["pre_main"]	= typesAndConstants2;
6467	fragments2["testfun"]	= function2;
6468
6469	outputColors2[0]			= RGBA(127, 229, 127, 255);
6470	outputColors2[1]			= RGBA(127, 102, 0,   255);
6471	outputColors2[2]			= RGBA(0,   229, 0,   255);
6472	outputColors2[3]			= RGBA(0,   102, 127, 255);
6473
6474	createTestsForAllStages("induction", inputColors, outputColors2, fragments2, group.get());
6475
6476	const char	typesAndConstants3[]	=
6477		"%true      = OpConstantTrue %bool\n"
6478		"%false     = OpConstantFalse %bool\n"
6479		"%c_f32_p2  = OpConstant %f32 0.2\n";
6480
6481	// Swap the second and the third element of the given parameter.
6482	const char	function3[]				=
6483		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6484		"%param     = OpFunctionParameter %v4f32\n"
6485		"%entry     = OpLabel\n"
6486		"%result    = OpVariable %fp_v4f32 Function\n"
6487		"             OpStore %result %param\n"
6488		"%a_loc     = OpAccessChain %fp_f32 %result %c_i32_1\n"
6489		"%a_init    = OpLoad %f32 %a_loc\n"
6490		"%b_loc     = OpAccessChain %fp_f32 %result %c_i32_2\n"
6491		"%b_init    = OpLoad %f32 %b_loc\n"
6492		"             OpBranch %phi\n"
6493
6494		"%phi        = OpLabel\n"
6495		"%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
6496		"%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
6497		"%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
6498		"              OpLoopMerge %exit %phi None\n"
6499		"              OpBranchConditional %still_loop %phi %exit\n"
6500
6501		"%exit       = OpLabel\n"
6502		"              OpStore %a_loc %a_next\n"
6503		"              OpStore %b_loc %b_next\n"
6504		"%ret        = OpLoad %v4f32 %result\n"
6505		"              OpReturnValue %ret\n"
6506
6507		"              OpFunctionEnd\n";
6508
6509	fragments3["pre_main"]	= typesAndConstants3;
6510	fragments3["testfun"]	= function3;
6511
6512	outputColors3[0]			= RGBA(127, 127, 127, 255);
6513	outputColors3[1]			= RGBA(127, 0,   0,   255);
6514	outputColors3[2]			= RGBA(0,   0,   127, 255);
6515	outputColors3[3]			= RGBA(0,   127, 0,   255);
6516
6517	createTestsForAllStages("swap", inputColors, outputColors3, fragments3, group.get());
6518
6519	return group.release();
6520}
6521
6522tcu::TestCaseGroup* createNoContractionTests(tcu::TestContext& testCtx)
6523{
6524	de::MovePtr<tcu::TestCaseGroup> group			(new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
6525	RGBA							inputColors[4];
6526	RGBA							outputColors[4];
6527
6528	// With NoContraction, (1 + 2^-23) * (1 - 2^-23) - 1 should be conducted as a multiplication and an addition separately.
6529	// For the multiplication, the result is 1 - 2^-46, which is out of the precision range for 32-bit float. (32-bit float
6530	// only have 23-bit fraction.) So it will be rounded to 1. Or 0x1.fffffc. Then the final result is 0 or -0x1p-24.
6531	// On the contrary, the result will be 2^-46, which is a normalized number perfectly representable as 32-bit float.
6532	const char						constantsAndTypes[]	 =
6533		"%c_vec4_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_1\n"
6534		"%c_vec4_1       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
6535		"%c_f32_1pl2_23  = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
6536		"%c_f32_1mi2_23  = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
6537		"%c_f32_n1pn24   = OpConstant %f32 -0x1p-24\n"
6538		;
6539
6540	const char						function[]	 =
6541		"%test_code      = OpFunction %v4f32 None %v4f32_function\n"
6542		"%param          = OpFunctionParameter %v4f32\n"
6543		"%label          = OpLabel\n"
6544		"%var1           = OpVariable %fp_f32 Function %c_f32_1pl2_23\n"
6545		"%var2           = OpVariable %fp_f32 Function\n"
6546		"%red            = OpCompositeExtract %f32 %param 0\n"
6547		"%plus_red       = OpFAdd %f32 %c_f32_1mi2_23 %red\n"
6548		"                  OpStore %var2 %plus_red\n"
6549		"%val1           = OpLoad %f32 %var1\n"
6550		"%val2           = OpLoad %f32 %var2\n"
6551		"%mul            = OpFMul %f32 %val1 %val2\n"
6552		"%add            = OpFAdd %f32 %mul %c_f32_n1\n"
6553		"%is0            = OpFOrdEqual %bool %add %c_f32_0\n"
6554		"%isn1n24         = OpFOrdEqual %bool %add %c_f32_n1pn24\n"
6555		"%success        = OpLogicalOr %bool %is0 %isn1n24\n"
6556		"%v4success      = OpCompositeConstruct %v4bool %success %success %success %success\n"
6557		"%ret            = OpSelect %v4f32 %v4success %c_vec4_0 %c_vec4_1\n"
6558		"                  OpReturnValue %ret\n"
6559		"                  OpFunctionEnd\n";
6560
6561	struct CaseNameDecoration
6562	{
6563		string name;
6564		string decoration;
6565	};
6566
6567
6568	CaseNameDecoration tests[] = {
6569		{"multiplication",	"OpDecorate %mul NoContraction"},
6570		{"addition",		"OpDecorate %add NoContraction"},
6571		{"both",			"OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"},
6572	};
6573
6574	getHalfColorsFullAlpha(inputColors);
6575
6576	for (deUint8 idx = 0; idx < 4; ++idx)
6577	{
6578		inputColors[idx].setRed(0);
6579		outputColors[idx] = RGBA(0, 0, 0, 255);
6580	}
6581
6582	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(CaseNameDecoration); ++testNdx)
6583	{
6584		map<string, string> fragments;
6585
6586		fragments["decoration"] = tests[testNdx].decoration;
6587		fragments["pre_main"] = constantsAndTypes;
6588		fragments["testfun"] = function;
6589
6590		createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, group.get());
6591	}
6592
6593	return group.release();
6594}
6595
6596tcu::TestCaseGroup* createMemoryAccessTests(tcu::TestContext& testCtx)
6597{
6598	de::MovePtr<tcu::TestCaseGroup> memoryAccessTests (new tcu::TestCaseGroup(testCtx, "opmemoryaccess", "Memory Semantics"));
6599	RGBA							colors[4];
6600
6601	const char						constantsAndTypes[]	 =
6602		"%c_a2f32_1         = OpConstantComposite %a2f32 %c_f32_1 %c_f32_1\n"
6603		"%fp_a2f32          = OpTypePointer Function %a2f32\n"
6604		"%stype             = OpTypeStruct  %v4f32 %a2f32 %f32\n"
6605		"%fp_stype          = OpTypePointer Function %stype\n";
6606
6607	const char						function[]	 =
6608		"%test_code         = OpFunction %v4f32 None %v4f32_function\n"
6609		"%param1            = OpFunctionParameter %v4f32\n"
6610		"%lbl               = OpLabel\n"
6611		"%v1                = OpVariable %fp_v4f32 Function\n"
6612		"%v2                = OpVariable %fp_a2f32 Function\n"
6613		"%v3                = OpVariable %fp_f32 Function\n"
6614		"%v                 = OpVariable %fp_stype Function\n"
6615		"%vv                = OpVariable %fp_stype Function\n"
6616		"%vvv               = OpVariable %fp_f32 Function\n"
6617
6618		"                     OpStore %v1 %c_v4f32_1_1_1_1\n"
6619		"                     OpStore %v2 %c_a2f32_1\n"
6620		"                     OpStore %v3 %c_f32_1\n"
6621
6622		"%p_v4f32          = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
6623		"%p_a2f32          = OpAccessChain %fp_a2f32 %v %c_u32_1\n"
6624		"%p_f32            = OpAccessChain %fp_f32 %v %c_u32_2\n"
6625		"%v1_v             = OpLoad %v4f32 %v1 ${access_type}\n"
6626		"%v2_v             = OpLoad %a2f32 %v2 ${access_type}\n"
6627		"%v3_v             = OpLoad %f32 %v3 ${access_type}\n"
6628
6629		"                    OpStore %p_v4f32 %v1_v ${access_type}\n"
6630		"                    OpStore %p_a2f32 %v2_v ${access_type}\n"
6631		"                    OpStore %p_f32 %v3_v ${access_type}\n"
6632
6633		"                    OpCopyMemory %vv %v ${access_type}\n"
6634		"                    OpCopyMemory %vvv %p_f32 ${access_type}\n"
6635
6636		"%p_f32_2          = OpAccessChain %fp_f32 %vv %c_u32_2\n"
6637		"%v_f32_2          = OpLoad %f32 %p_f32_2\n"
6638		"%v_f32_3          = OpLoad %f32 %vvv\n"
6639
6640		"%ret1             = OpVectorTimesScalar %v4f32 %param1 %v_f32_2\n"
6641		"%ret2             = OpVectorTimesScalar %v4f32 %ret1 %v_f32_3\n"
6642		"                    OpReturnValue %ret2\n"
6643		"                    OpFunctionEnd\n";
6644
6645	struct NameMemoryAccess
6646	{
6647		string name;
6648		string accessType;
6649	};
6650
6651
6652	NameMemoryAccess tests[] =
6653	{
6654		{ "none", "" },
6655		{ "volatile", "Volatile" },
6656		{ "aligned",  "Aligned 1" },
6657		{ "volatile_aligned",  "Volatile|Aligned 1" },
6658		{ "nontemporal_aligned",  "Nontemporal|Aligned 1" },
6659		{ "volatile_nontemporal",  "Volatile|Nontemporal" },
6660		{ "volatile_nontermporal_aligned",  "Volatile|Nontemporal|Aligned 1" },
6661	};
6662
6663	getHalfColorsFullAlpha(colors);
6664
6665	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameMemoryAccess); ++testNdx)
6666	{
6667		map<string, string> fragments;
6668		map<string, string> memoryAccess;
6669		memoryAccess["access_type"] = tests[testNdx].accessType;
6670
6671		fragments["pre_main"] = constantsAndTypes;
6672		fragments["testfun"] = tcu::StringTemplate(function).specialize(memoryAccess);
6673		createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, memoryAccessTests.get());
6674	}
6675	return memoryAccessTests.release();
6676}
6677tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
6678{
6679	de::MovePtr<tcu::TestCaseGroup>		opUndefTests		 (new tcu::TestCaseGroup(testCtx, "opundef", "Test OpUndef"));
6680	RGBA								defaultColors[4];
6681	map<string, string>					fragments;
6682	getDefaultColors(defaultColors);
6683
6684	// First, simple cases that don't do anything with the OpUndef result.
6685	fragments["testfun"] =
6686		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6687		"%param1 = OpFunctionParameter %v4f32\n"
6688		"%label_testfun = OpLabel\n"
6689		"%undef = OpUndef %type\n"
6690		"OpReturnValue %param1\n"
6691		"OpFunctionEnd\n"
6692		;
6693	struct NameCodePair { string name, code; };
6694	const NameCodePair tests[] =
6695	{
6696		{"bool", "%type = OpTypeBool"},
6697		{"vec2uint32", "%type = OpTypeVector %u32 2"},
6698		{"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown"},
6699		{"sampler", "%type = OpTypeSampler"},
6700		{"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img"},
6701		{"pointer", "%type = OpTypePointer Function %i32"},
6702		{"runtimearray", "%type = OpTypeRuntimeArray %f32"},
6703		{"array", "%c_u32_100 = OpConstant %u32 100\n" "%type = OpTypeArray %i32 %c_u32_100"},
6704		{"struct", "%type = OpTypeStruct %f32 %i32 %u32"}};
6705	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
6706	{
6707		fragments["pre_main"] = tests[testNdx].code;
6708		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opUndefTests.get());
6709	}
6710	fragments.clear();
6711
6712	fragments["testfun"] =
6713		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6714		"%param1 = OpFunctionParameter %v4f32\n"
6715		"%label_testfun = OpLabel\n"
6716		"%undef = OpUndef %f32\n"
6717		"%zero = OpFMul %f32 %undef %c_f32_0\n"
6718		"%is_nan = OpIsNan %bool %zero\n" //OpUndef may result in NaN which may turn %zero into Nan.
6719		"%actually_zero = OpSelect %f32 %is_nan %c_f32_0 %zero\n"
6720		"%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
6721		"%b = OpFAdd %f32 %a %actually_zero\n"
6722		"%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
6723		"OpReturnValue %ret\n"
6724		"OpFunctionEnd\n"
6725		;
6726	createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
6727
6728	fragments["testfun"] =
6729		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6730		"%param1 = OpFunctionParameter %v4f32\n"
6731		"%label_testfun = OpLabel\n"
6732		"%undef = OpUndef %i32\n"
6733		"%zero = OpIMul %i32 %undef %c_i32_0\n"
6734		"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
6735		"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
6736		"OpReturnValue %ret\n"
6737		"OpFunctionEnd\n"
6738		;
6739	createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
6740
6741	fragments["testfun"] =
6742		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6743		"%param1 = OpFunctionParameter %v4f32\n"
6744		"%label_testfun = OpLabel\n"
6745		"%undef = OpUndef %u32\n"
6746		"%zero = OpIMul %u32 %undef %c_i32_0\n"
6747		"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
6748		"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
6749		"OpReturnValue %ret\n"
6750		"OpFunctionEnd\n"
6751		;
6752	createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
6753
6754	fragments["testfun"] =
6755		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6756		"%param1 = OpFunctionParameter %v4f32\n"
6757		"%label_testfun = OpLabel\n"
6758		"%undef = OpUndef %v4f32\n"
6759		"%vzero = OpVectorTimesScalar %v4f32 %undef %c_f32_0\n"
6760		"%zero_0 = OpVectorExtractDynamic %f32 %vzero %c_i32_0\n"
6761		"%zero_1 = OpVectorExtractDynamic %f32 %vzero %c_i32_1\n"
6762		"%zero_2 = OpVectorExtractDynamic %f32 %vzero %c_i32_2\n"
6763		"%zero_3 = OpVectorExtractDynamic %f32 %vzero %c_i32_3\n"
6764		"%is_nan_0 = OpIsNan %bool %zero_0\n"
6765		"%is_nan_1 = OpIsNan %bool %zero_1\n"
6766		"%is_nan_2 = OpIsNan %bool %zero_2\n"
6767		"%is_nan_3 = OpIsNan %bool %zero_3\n"
6768		"%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
6769		"%actually_zero_1 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_1\n"
6770		"%actually_zero_2 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_2\n"
6771		"%actually_zero_3 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_3\n"
6772		"%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
6773		"%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
6774		"%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
6775		"%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
6776		"%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
6777		"%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
6778		"%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
6779		"%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
6780		"%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
6781		"%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
6782		"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
6783		"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
6784		"OpReturnValue %ret\n"
6785		"OpFunctionEnd\n"
6786		;
6787	createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
6788
6789	fragments["pre_main"] =
6790		"%v2f32 = OpTypeVector %f32 2\n"
6791		"%m2x2f32 = OpTypeMatrix %v2f32 2\n";
6792	fragments["testfun"] =
6793		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6794		"%param1 = OpFunctionParameter %v4f32\n"
6795		"%label_testfun = OpLabel\n"
6796		"%undef = OpUndef %m2x2f32\n"
6797		"%mzero = OpMatrixTimesScalar %m2x2f32 %undef %c_f32_0\n"
6798		"%zero_0 = OpCompositeExtract %f32 %mzero 0 0\n"
6799		"%zero_1 = OpCompositeExtract %f32 %mzero 0 1\n"
6800		"%zero_2 = OpCompositeExtract %f32 %mzero 1 0\n"
6801		"%zero_3 = OpCompositeExtract %f32 %mzero 1 1\n"
6802		"%is_nan_0 = OpIsNan %bool %zero_0\n"
6803		"%is_nan_1 = OpIsNan %bool %zero_1\n"
6804		"%is_nan_2 = OpIsNan %bool %zero_2\n"
6805		"%is_nan_3 = OpIsNan %bool %zero_3\n"
6806		"%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
6807		"%actually_zero_1 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_1\n"
6808		"%actually_zero_2 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_2\n"
6809		"%actually_zero_3 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_3\n"
6810		"%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
6811		"%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
6812		"%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
6813		"%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
6814		"%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
6815		"%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
6816		"%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
6817		"%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
6818		"%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
6819		"%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
6820		"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
6821		"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
6822		"OpReturnValue %ret\n"
6823		"OpFunctionEnd\n"
6824		;
6825	createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
6826
6827	return opUndefTests.release();
6828}
6829
6830void createOpQuantizeSingleOptionTests(tcu::TestCaseGroup* testCtx)
6831{
6832	const RGBA		inputColors[4]		=
6833	{
6834		RGBA(0,		0,		0,		255),
6835		RGBA(0,		0,		255,	255),
6836		RGBA(0,		255,	0,		255),
6837		RGBA(0,		255,	255,	255)
6838	};
6839
6840	const RGBA		expectedColors[4]	=
6841	{
6842		RGBA(255,	 0,		 0,		 255),
6843		RGBA(255,	 0,		 0,		 255),
6844		RGBA(255,	 0,		 0,		 255),
6845		RGBA(255,	 0,		 0,		 255)
6846	};
6847
6848	const struct SingleFP16Possibility
6849	{
6850		const char* name;
6851		const char* constant;  // Value to assign to %test_constant.
6852		float		valueAsFloat;
6853		const char* condition; // Must assign to %cond an expression that evaluates to true after %c = OpQuantizeToF16(%test_constant + 0).
6854	}				tests[]				=
6855	{
6856		{
6857			"negative",
6858			"-0x1.3p1\n",
6859			-constructNormalizedFloat(1, 0x300000),
6860			"%cond = OpFOrdEqual %bool %c %test_constant\n"
6861		}, // -19
6862		{
6863			"positive",
6864			"0x1.0p7\n",
6865			constructNormalizedFloat(7, 0x000000),
6866			"%cond = OpFOrdEqual %bool %c %test_constant\n"
6867		},  // +128
6868		// SPIR-V requires that OpQuantizeToF16 flushes
6869		// any numbers that would end up denormalized in F16 to zero.
6870		{
6871			"denorm",
6872			"0x0.0006p-126\n",
6873			std::ldexp(1.5f, -140),
6874			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
6875		},  // denorm
6876		{
6877			"negative_denorm",
6878			"-0x0.0006p-126\n",
6879			-std::ldexp(1.5f, -140),
6880			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
6881		}, // -denorm
6882		{
6883			"too_small",
6884			"0x1.0p-16\n",
6885			std::ldexp(1.0f, -16),
6886			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
6887		},     // too small positive
6888		{
6889			"negative_too_small",
6890			"-0x1.0p-32\n",
6891			-std::ldexp(1.0f, -32),
6892			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
6893		},      // too small negative
6894		{
6895			"negative_inf",
6896			"-0x1.0p128\n",
6897			-std::ldexp(1.0f, 128),
6898
6899			"%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
6900			"%inf = OpIsInf %bool %c\n"
6901			"%cond = OpLogicalAnd %bool %gz %inf\n"
6902		},     // -inf to -inf
6903		{
6904			"inf",
6905			"0x1.0p128\n",
6906			std::ldexp(1.0f, 128),
6907
6908			"%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
6909			"%inf = OpIsInf %bool %c\n"
6910			"%cond = OpLogicalAnd %bool %gz %inf\n"
6911		},     // +inf to +inf
6912		{
6913			"round_to_negative_inf",
6914			"-0x1.0p32\n",
6915			-std::ldexp(1.0f, 32),
6916
6917			"%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
6918			"%inf = OpIsInf %bool %c\n"
6919			"%cond = OpLogicalAnd %bool %gz %inf\n"
6920		},     // round to -inf
6921		{
6922			"round_to_inf",
6923			"0x1.0p16\n",
6924			std::ldexp(1.0f, 16),
6925
6926			"%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
6927			"%inf = OpIsInf %bool %c\n"
6928			"%cond = OpLogicalAnd %bool %gz %inf\n"
6929		},     // round to +inf
6930		{
6931			"nan",
6932			"0x1.1p128\n",
6933			std::numeric_limits<float>::quiet_NaN(),
6934
6935			// Test for any NaN value, as NaNs are not preserved
6936			"%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
6937			"%cond = OpIsNan %bool %direct_quant\n"
6938		}, // nan
6939		{
6940			"negative_nan",
6941			"-0x1.0001p128\n",
6942			std::numeric_limits<float>::quiet_NaN(),
6943
6944			// Test for any NaN value, as NaNs are not preserved
6945			"%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
6946			"%cond = OpIsNan %bool %direct_quant\n"
6947		} // -nan
6948	};
6949	const char*		constants			=
6950		"%test_constant = OpConstant %f32 ";  // The value will be test.constant.
6951
6952	StringTemplate	function			(
6953		"%test_code     = OpFunction %v4f32 None %v4f32_function\n"
6954		"%param1        = OpFunctionParameter %v4f32\n"
6955		"%label_testfun = OpLabel\n"
6956		"%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
6957		"%b             = OpFAdd %f32 %test_constant %a\n"
6958		"%c             = OpQuantizeToF16 %f32 %b\n"
6959		"${condition}\n"
6960		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
6961		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
6962		"                 OpReturnValue %retval\n"
6963		"OpFunctionEnd\n"
6964	);
6965
6966	const char*		specDecorations		= "OpDecorate %test_constant SpecId 0\n";
6967	const char*		specConstants		=
6968			"%test_constant = OpSpecConstant %f32 0.\n"
6969			"%c             = OpSpecConstantOp %f32 QuantizeToF16 %test_constant\n";
6970
6971	StringTemplate	specConstantFunction(
6972		"%test_code     = OpFunction %v4f32 None %v4f32_function\n"
6973		"%param1        = OpFunctionParameter %v4f32\n"
6974		"%label_testfun = OpLabel\n"
6975		"${condition}\n"
6976		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
6977		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
6978		"                 OpReturnValue %retval\n"
6979		"OpFunctionEnd\n"
6980	);
6981
6982	for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
6983	{
6984		map<string, string>								codeSpecialization;
6985		map<string, string>								fragments;
6986		codeSpecialization["condition"]					= tests[idx].condition;
6987		fragments["testfun"]							= function.specialize(codeSpecialization);
6988		fragments["pre_main"]							= string(constants) + tests[idx].constant + "\n";
6989		createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
6990	}
6991
6992	for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
6993	{
6994		map<string, string>								codeSpecialization;
6995		map<string, string>								fragments;
6996		vector<deInt32>									passConstants;
6997		deInt32											specConstant;
6998
6999		codeSpecialization["condition"]					= tests[idx].condition;
7000		fragments["testfun"]							= specConstantFunction.specialize(codeSpecialization);
7001		fragments["decoration"]							= specDecorations;
7002		fragments["pre_main"]							= specConstants;
7003
7004		memcpy(&specConstant, &tests[idx].valueAsFloat, sizeof(float));
7005		passConstants.push_back(specConstant);
7006
7007		createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
7008	}
7009}
7010
7011void createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup* testCtx)
7012{
7013	RGBA inputColors[4] =  {
7014		RGBA(0,		0,		0,		255),
7015		RGBA(0,		0,		255,	255),
7016		RGBA(0,		255,	0,		255),
7017		RGBA(0,		255,	255,	255)
7018	};
7019
7020	RGBA expectedColors[4] =
7021	{
7022		RGBA(255,	 0,		 0,		 255),
7023		RGBA(255,	 0,		 0,		 255),
7024		RGBA(255,	 0,		 0,		 255),
7025		RGBA(255,	 0,		 0,		 255)
7026	};
7027
7028	struct DualFP16Possibility
7029	{
7030		const char* name;
7031		const char* input;
7032		float		inputAsFloat;
7033		const char* possibleOutput1;
7034		const char* possibleOutput2;
7035	} tests[] = {
7036		{
7037			"positive_round_up_or_round_down",
7038			"0x1.3003p8",
7039			constructNormalizedFloat(8, 0x300300),
7040			"0x1.304p8",
7041			"0x1.3p8"
7042		},
7043		{
7044			"negative_round_up_or_round_down",
7045			"-0x1.6008p-7",
7046			-constructNormalizedFloat(-7, 0x600800),
7047			"-0x1.6p-7",
7048			"-0x1.604p-7"
7049		},
7050		{
7051			"carry_bit",
7052			"0x1.01ep2",
7053			constructNormalizedFloat(2, 0x01e000),
7054			"0x1.01cp2",
7055			"0x1.02p2"
7056		},
7057		{
7058			"carry_to_exponent",
7059			"0x1.ffep1",
7060			constructNormalizedFloat(1, 0xffe000),
7061			"0x1.ffcp1",
7062			"0x1.0p2"
7063		},
7064	};
7065	StringTemplate constants (
7066		"%input_const = OpConstant %f32 ${input}\n"
7067		"%possible_solution1 = OpConstant %f32 ${output1}\n"
7068		"%possible_solution2 = OpConstant %f32 ${output2}\n"
7069		);
7070
7071	StringTemplate specConstants (
7072		"%input_const = OpSpecConstant %f32 0.\n"
7073		"%possible_solution1 = OpConstant %f32 ${output1}\n"
7074		"%possible_solution2 = OpConstant %f32 ${output2}\n"
7075	);
7076
7077	const char* specDecorations = "OpDecorate %input_const  SpecId 0\n";
7078
7079	const char* function  =
7080		"%test_code     = OpFunction %v4f32 None %v4f32_function\n"
7081		"%param1        = OpFunctionParameter %v4f32\n"
7082		"%label_testfun = OpLabel\n"
7083		"%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7084		// For the purposes of this test we assume that 0.f will always get
7085		// faithfully passed through the pipeline stages.
7086		"%b             = OpFAdd %f32 %input_const %a\n"
7087		"%c             = OpQuantizeToF16 %f32 %b\n"
7088		"%eq_1          = OpFOrdEqual %bool %c %possible_solution1\n"
7089		"%eq_2          = OpFOrdEqual %bool %c %possible_solution2\n"
7090		"%cond          = OpLogicalOr %bool %eq_1 %eq_2\n"
7091		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
7092		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1"
7093		"                 OpReturnValue %retval\n"
7094		"OpFunctionEnd\n";
7095
7096	for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
7097		map<string, string>									fragments;
7098		map<string, string>									constantSpecialization;
7099
7100		constantSpecialization["input"]						= tests[idx].input;
7101		constantSpecialization["output1"]					= tests[idx].possibleOutput1;
7102		constantSpecialization["output2"]					= tests[idx].possibleOutput2;
7103		fragments["testfun"]								= function;
7104		fragments["pre_main"]								= constants.specialize(constantSpecialization);
7105		createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
7106	}
7107
7108	for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
7109		map<string, string>									fragments;
7110		map<string, string>									constantSpecialization;
7111		vector<deInt32>										passConstants;
7112		deInt32												specConstant;
7113
7114		constantSpecialization["output1"]					= tests[idx].possibleOutput1;
7115		constantSpecialization["output2"]					= tests[idx].possibleOutput2;
7116		fragments["testfun"]								= function;
7117		fragments["decoration"]								= specDecorations;
7118		fragments["pre_main"]								= specConstants.specialize(constantSpecialization);
7119
7120		memcpy(&specConstant, &tests[idx].inputAsFloat, sizeof(float));
7121		passConstants.push_back(specConstant);
7122
7123		createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
7124	}
7125}
7126
7127tcu::TestCaseGroup* createOpQuantizeTests(tcu::TestContext& testCtx)
7128{
7129	de::MovePtr<tcu::TestCaseGroup> opQuantizeTests (new tcu::TestCaseGroup(testCtx, "opquantize", "Test OpQuantizeToF16"));
7130	createOpQuantizeSingleOptionTests(opQuantizeTests.get());
7131	createOpQuantizeTwoPossibilityTests(opQuantizeTests.get());
7132	return opQuantizeTests.release();
7133}
7134
7135struct ShaderPermutation
7136{
7137	deUint8 vertexPermutation;
7138	deUint8 geometryPermutation;
7139	deUint8 tesscPermutation;
7140	deUint8 tessePermutation;
7141	deUint8 fragmentPermutation;
7142};
7143
7144ShaderPermutation getShaderPermutation(deUint8 inputValue)
7145{
7146	ShaderPermutation	permutation =
7147	{
7148		static_cast<deUint8>(inputValue & 0x10? 1u: 0u),
7149		static_cast<deUint8>(inputValue & 0x08? 1u: 0u),
7150		static_cast<deUint8>(inputValue & 0x04? 1u: 0u),
7151		static_cast<deUint8>(inputValue & 0x02? 1u: 0u),
7152		static_cast<deUint8>(inputValue & 0x01? 1u: 0u)
7153	};
7154	return permutation;
7155}
7156
7157tcu::TestCaseGroup* createModuleTests(tcu::TestContext& testCtx)
7158{
7159	RGBA								defaultColors[4];
7160	RGBA								invertedColors[4];
7161	de::MovePtr<tcu::TestCaseGroup>		moduleTests			(new tcu::TestCaseGroup(testCtx, "module", "Multiple entry points into shaders"));
7162
7163	const ShaderElement					combinedPipeline[]	=
7164	{
7165		ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
7166		ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
7167		ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
7168		ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
7169		ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
7170	};
7171
7172	getDefaultColors(defaultColors);
7173	getInvertedDefaultColors(invertedColors);
7174	addFunctionCaseWithPrograms<InstanceContext>(moduleTests.get(), "same_module", "", createCombinedModule, runAndVerifyDefaultPipeline, createInstanceContext(combinedPipeline, map<string, string>()));
7175
7176	const char* numbers[] =
7177	{
7178		"1", "2"
7179	};
7180
7181	for (deInt8 idx = 0; idx < 32; ++idx)
7182	{
7183		ShaderPermutation			permutation		= getShaderPermutation(idx);
7184		string						name			= string("vert") + numbers[permutation.vertexPermutation] + "_geom" + numbers[permutation.geometryPermutation] + "_tessc" + numbers[permutation.tesscPermutation] + "_tesse" + numbers[permutation.tessePermutation] + "_frag" + numbers[permutation.fragmentPermutation];
7185		const ShaderElement			pipeline[]		=
7186		{
7187			ShaderElement("vert",	string("vert") +	numbers[permutation.vertexPermutation],		VK_SHADER_STAGE_VERTEX_BIT),
7188			ShaderElement("geom",	string("geom") +	numbers[permutation.geometryPermutation],	VK_SHADER_STAGE_GEOMETRY_BIT),
7189			ShaderElement("tessc",	string("tessc") +	numbers[permutation.tesscPermutation],		VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
7190			ShaderElement("tesse",	string("tesse") +	numbers[permutation.tessePermutation],		VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
7191			ShaderElement("frag",	string("frag") +	numbers[permutation.fragmentPermutation],	VK_SHADER_STAGE_FRAGMENT_BIT)
7192		};
7193
7194		// If there are an even number of swaps, then it should be no-op.
7195		// If there are an odd number, the color should be flipped.
7196		if ((permutation.vertexPermutation + permutation.geometryPermutation + permutation.tesscPermutation + permutation.tessePermutation + permutation.fragmentPermutation) % 2 == 0)
7197		{
7198			addFunctionCaseWithPrograms<InstanceContext>(moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline, createInstanceContext(pipeline, defaultColors, defaultColors, map<string, string>()));
7199		}
7200		else
7201		{
7202			addFunctionCaseWithPrograms<InstanceContext>(moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline, createInstanceContext(pipeline, defaultColors, invertedColors, map<string, string>()));
7203		}
7204	}
7205	return moduleTests.release();
7206}
7207
7208tcu::TestCaseGroup* createLoopTests(tcu::TestContext& testCtx)
7209{
7210	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "loop", "Looping control flow"));
7211	RGBA defaultColors[4];
7212	getDefaultColors(defaultColors);
7213	map<string, string> fragments;
7214	fragments["pre_main"] =
7215		"%c_f32_5 = OpConstant %f32 5.\n";
7216
7217	// A loop with a single block. The Continue Target is the loop block
7218	// itself. In SPIR-V terms, the "loop construct" contains no blocks at all
7219	// -- the "continue construct" forms the entire loop.
7220	fragments["testfun"] =
7221		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7222		"%param1 = OpFunctionParameter %v4f32\n"
7223
7224		"%entry = OpLabel\n"
7225		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7226		"OpBranch %loop\n"
7227
7228		";adds and subtracts 1.0 to %val in alternate iterations\n"
7229		"%loop = OpLabel\n"
7230		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
7231		"%delta = OpPhi %f32 %c_f32_1 %entry %minus_delta %loop\n"
7232		"%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
7233		"%val = OpFAdd %f32 %val1 %delta\n"
7234		"%minus_delta = OpFSub %f32 %c_f32_0 %delta\n"
7235		"%count__ = OpISub %i32 %count %c_i32_1\n"
7236		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
7237		"OpLoopMerge %exit %loop None\n"
7238		"OpBranchConditional %again %loop %exit\n"
7239
7240		"%exit = OpLabel\n"
7241		"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
7242		"OpReturnValue %result\n"
7243
7244		"OpFunctionEnd\n"
7245		;
7246	createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
7247
7248	// Body comprised of multiple basic blocks.
7249	const StringTemplate multiBlock(
7250		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7251		"%param1 = OpFunctionParameter %v4f32\n"
7252
7253		"%entry = OpLabel\n"
7254		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7255		"OpBranch %loop\n"
7256
7257		";adds and subtracts 1.0 to %val in alternate iterations\n"
7258		"%loop = OpLabel\n"
7259		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %gather\n"
7260		"%delta = OpPhi %f32 %c_f32_1 %entry %delta_next %gather\n"
7261		"%val1 = OpPhi %f32 %val0 %entry %val %gather\n"
7262		// There are several possibilities for the Continue Target below.  Each
7263		// will be specialized into a separate test case.
7264		"OpLoopMerge %exit ${continue_target} None\n"
7265		"OpBranch %if\n"
7266
7267		"%if = OpLabel\n"
7268		";delta_next = (delta > 0) ? -1 : 1;\n"
7269		"%gt0 = OpFOrdGreaterThan %bool %delta %c_f32_0\n"
7270		"OpSelectionMerge %gather DontFlatten\n"
7271		"OpBranchConditional %gt0 %even %odd ;tells us if %count is even or odd\n"
7272
7273		"%odd = OpLabel\n"
7274		"OpBranch %gather\n"
7275
7276		"%even = OpLabel\n"
7277		"OpBranch %gather\n"
7278
7279		"%gather = OpLabel\n"
7280		"%delta_next = OpPhi %f32 %c_f32_n1 %even %c_f32_1 %odd\n"
7281		"%val = OpFAdd %f32 %val1 %delta\n"
7282		"%count__ = OpISub %i32 %count %c_i32_1\n"
7283		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
7284		"OpBranchConditional %again %loop %exit\n"
7285
7286		"%exit = OpLabel\n"
7287		"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
7288		"OpReturnValue %result\n"
7289
7290		"OpFunctionEnd\n");
7291
7292	map<string, string> continue_target;
7293
7294	// The Continue Target is the loop block itself.
7295	continue_target["continue_target"] = "%loop";
7296	fragments["testfun"] = multiBlock.specialize(continue_target);
7297	createTestsForAllStages("multi_block_continue_construct", defaultColors, defaultColors, fragments, testGroup.get());
7298
7299	// The Continue Target is at the end of the loop.
7300	continue_target["continue_target"] = "%gather";
7301	fragments["testfun"] = multiBlock.specialize(continue_target);
7302	createTestsForAllStages("multi_block_loop_construct", defaultColors, defaultColors, fragments, testGroup.get());
7303
7304	// A loop with continue statement.
7305	fragments["testfun"] =
7306		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7307		"%param1 = OpFunctionParameter %v4f32\n"
7308
7309		"%entry = OpLabel\n"
7310		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7311		"OpBranch %loop\n"
7312
7313		";adds 4, 3, and 1 to %val0 (skips 2)\n"
7314		"%loop = OpLabel\n"
7315		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
7316		"%val1 = OpPhi %f32 %val0 %entry %val %continue\n"
7317		"OpLoopMerge %exit %continue None\n"
7318		"OpBranch %if\n"
7319
7320		"%if = OpLabel\n"
7321		";skip if %count==2\n"
7322		"%eq2 = OpIEqual %bool %count %c_i32_2\n"
7323		"OpSelectionMerge %continue DontFlatten\n"
7324		"OpBranchConditional %eq2 %continue %body\n"
7325
7326		"%body = OpLabel\n"
7327		"%fcount = OpConvertSToF %f32 %count\n"
7328		"%val2 = OpFAdd %f32 %val1 %fcount\n"
7329		"OpBranch %continue\n"
7330
7331		"%continue = OpLabel\n"
7332		"%val = OpPhi %f32 %val2 %body %val1 %if\n"
7333		"%count__ = OpISub %i32 %count %c_i32_1\n"
7334		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
7335		"OpBranchConditional %again %loop %exit\n"
7336
7337		"%exit = OpLabel\n"
7338		"%same = OpFSub %f32 %val %c_f32_8\n"
7339		"%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
7340		"OpReturnValue %result\n"
7341		"OpFunctionEnd\n";
7342	createTestsForAllStages("continue", defaultColors, defaultColors, fragments, testGroup.get());
7343
7344	// A loop with break.
7345	fragments["testfun"] =
7346		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7347		"%param1 = OpFunctionParameter %v4f32\n"
7348
7349		"%entry = OpLabel\n"
7350		";param1 components are between 0 and 1, so dot product is 4 or less\n"
7351		"%dot = OpDot %f32 %param1 %param1\n"
7352		"%div = OpFDiv %f32 %dot %c_f32_5\n"
7353		"%zero = OpConvertFToU %u32 %div\n"
7354		"%two = OpIAdd %i32 %zero %c_i32_2\n"
7355		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7356		"OpBranch %loop\n"
7357
7358		";adds 4 and 3 to %val0 (exits early)\n"
7359		"%loop = OpLabel\n"
7360		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
7361		"%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
7362		"OpLoopMerge %exit %continue None\n"
7363		"OpBranch %if\n"
7364
7365		"%if = OpLabel\n"
7366		";end loop if %count==%two\n"
7367		"%above2 = OpSGreaterThan %bool %count %two\n"
7368		"OpSelectionMerge %continue DontFlatten\n"
7369		"OpBranchConditional %above2 %body %exit\n"
7370
7371		"%body = OpLabel\n"
7372		"%fcount = OpConvertSToF %f32 %count\n"
7373		"%val2 = OpFAdd %f32 %val1 %fcount\n"
7374		"OpBranch %continue\n"
7375
7376		"%continue = OpLabel\n"
7377		"%count__ = OpISub %i32 %count %c_i32_1\n"
7378		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
7379		"OpBranchConditional %again %loop %exit\n"
7380
7381		"%exit = OpLabel\n"
7382		"%val_post = OpPhi %f32 %val2 %continue %val1 %if\n"
7383		"%same = OpFSub %f32 %val_post %c_f32_7\n"
7384		"%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
7385		"OpReturnValue %result\n"
7386		"OpFunctionEnd\n";
7387	createTestsForAllStages("break", defaultColors, defaultColors, fragments, testGroup.get());
7388
7389	// A loop with return.
7390	fragments["testfun"] =
7391		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7392		"%param1 = OpFunctionParameter %v4f32\n"
7393
7394		"%entry = OpLabel\n"
7395		";param1 components are between 0 and 1, so dot product is 4 or less\n"
7396		"%dot = OpDot %f32 %param1 %param1\n"
7397		"%div = OpFDiv %f32 %dot %c_f32_5\n"
7398		"%zero = OpConvertFToU %u32 %div\n"
7399		"%two = OpIAdd %i32 %zero %c_i32_2\n"
7400		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7401		"OpBranch %loop\n"
7402
7403		";returns early without modifying %param1\n"
7404		"%loop = OpLabel\n"
7405		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
7406		"%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
7407		"OpLoopMerge %exit %continue None\n"
7408		"OpBranch %if\n"
7409
7410		"%if = OpLabel\n"
7411		";return if %count==%two\n"
7412		"%above2 = OpSGreaterThan %bool %count %two\n"
7413		"OpSelectionMerge %continue DontFlatten\n"
7414		"OpBranchConditional %above2 %body %early_exit\n"
7415
7416		"%early_exit = OpLabel\n"
7417		"OpReturnValue %param1\n"
7418
7419		"%body = OpLabel\n"
7420		"%fcount = OpConvertSToF %f32 %count\n"
7421		"%val2 = OpFAdd %f32 %val1 %fcount\n"
7422		"OpBranch %continue\n"
7423
7424		"%continue = OpLabel\n"
7425		"%count__ = OpISub %i32 %count %c_i32_1\n"
7426		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
7427		"OpBranchConditional %again %loop %exit\n"
7428
7429		"%exit = OpLabel\n"
7430		";should never get here, so return an incorrect result\n"
7431		"%result = OpVectorInsertDynamic %v4f32 %param1 %val2 %c_i32_0\n"
7432		"OpReturnValue %result\n"
7433		"OpFunctionEnd\n";
7434	createTestsForAllStages("return", defaultColors, defaultColors, fragments, testGroup.get());
7435
7436	return testGroup.release();
7437}
7438
7439// Adds a new test to group using custom fragments for the tessellation-control
7440// stage and passthrough fragments for all other stages.  Uses default colors
7441// for input and expected output.
7442void addTessCtrlTest(tcu::TestCaseGroup* group, const char* name, const map<string, string>& fragments)
7443{
7444	RGBA defaultColors[4];
7445	getDefaultColors(defaultColors);
7446	const ShaderElement pipelineStages[] =
7447	{
7448		ShaderElement("vert", "main", VK_SHADER_STAGE_VERTEX_BIT),
7449		ShaderElement("tessc", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
7450		ShaderElement("tesse", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
7451		ShaderElement("frag", "main", VK_SHADER_STAGE_FRAGMENT_BIT),
7452	};
7453
7454	addFunctionCaseWithPrograms<InstanceContext>(group, name, "", addShaderCodeCustomTessControl,
7455												 runAndVerifyDefaultPipeline, createInstanceContext(
7456													 pipelineStages, defaultColors, defaultColors, fragments, StageToSpecConstantMap()));
7457}
7458
7459// A collection of tests putting OpControlBarrier in places GLSL forbids but SPIR-V allows.
7460tcu::TestCaseGroup* createBarrierTests(tcu::TestContext& testCtx)
7461{
7462	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "barrier", "OpControlBarrier"));
7463	map<string, string> fragments;
7464
7465	// A barrier inside a function body.
7466	fragments["pre_main"] =
7467		"%Workgroup = OpConstant %i32 2\n"
7468		"%SequentiallyConsistent = OpConstant %i32 0x10\n";
7469	fragments["testfun"] =
7470		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7471		"%param1 = OpFunctionParameter %v4f32\n"
7472		"%label_testfun = OpLabel\n"
7473		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7474		"OpReturnValue %param1\n"
7475		"OpFunctionEnd\n";
7476	addTessCtrlTest(testGroup.get(), "in_function", fragments);
7477
7478	// Common setup code for the following tests.
7479	fragments["pre_main"] =
7480		"%Workgroup = OpConstant %i32 2\n"
7481		"%SequentiallyConsistent = OpConstant %i32 0x10\n"
7482		"%c_f32_5 = OpConstant %f32 5.\n";
7483	const string setupPercentZero =	 // Begins %test_code function with code that sets %zero to 0u but cannot be optimized away.
7484		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7485		"%param1 = OpFunctionParameter %v4f32\n"
7486		"%entry = OpLabel\n"
7487		";param1 components are between 0 and 1, so dot product is 4 or less\n"
7488		"%dot = OpDot %f32 %param1 %param1\n"
7489		"%div = OpFDiv %f32 %dot %c_f32_5\n"
7490		"%zero = OpConvertFToU %u32 %div\n";
7491
7492	// Barriers inside OpSwitch branches.
7493	fragments["testfun"] =
7494		setupPercentZero +
7495		"OpSelectionMerge %switch_exit None\n"
7496		"OpSwitch %zero %switch_default 0 %case0 1 %case1 ;should always go to %case0\n"
7497
7498		"%case1 = OpLabel\n"
7499		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
7500		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7501		"%wrong_branch_alert1 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
7502		"OpBranch %switch_exit\n"
7503
7504		"%switch_default = OpLabel\n"
7505		"%wrong_branch_alert2 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
7506		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
7507		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7508		"OpBranch %switch_exit\n"
7509
7510		"%case0 = OpLabel\n"
7511		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7512		"OpBranch %switch_exit\n"
7513
7514		"%switch_exit = OpLabel\n"
7515		"%ret = OpPhi %v4f32 %param1 %case0 %wrong_branch_alert1 %case1 %wrong_branch_alert2 %switch_default\n"
7516		"OpReturnValue %ret\n"
7517		"OpFunctionEnd\n";
7518	addTessCtrlTest(testGroup.get(), "in_switch", fragments);
7519
7520	// Barriers inside if-then-else.
7521	fragments["testfun"] =
7522		setupPercentZero +
7523		"%eq0 = OpIEqual %bool %zero %c_u32_0\n"
7524		"OpSelectionMerge %exit DontFlatten\n"
7525		"OpBranchConditional %eq0 %then %else\n"
7526
7527		"%else = OpLabel\n"
7528		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
7529		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7530		"%wrong_branch_alert = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
7531		"OpBranch %exit\n"
7532
7533		"%then = OpLabel\n"
7534		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7535		"OpBranch %exit\n"
7536
7537		"%exit = OpLabel\n"
7538		"%ret = OpPhi %v4f32 %param1 %then %wrong_branch_alert %else\n"
7539		"OpReturnValue %ret\n"
7540		"OpFunctionEnd\n";
7541	addTessCtrlTest(testGroup.get(), "in_if", fragments);
7542
7543	// A barrier after control-flow reconvergence, tempting the compiler to attempt something like this:
7544	// http://lists.llvm.org/pipermail/llvm-dev/2009-October/026317.html.
7545	fragments["testfun"] =
7546		setupPercentZero +
7547		"%thread_id = OpLoad %i32 %BP_gl_InvocationID\n"
7548		"%thread0 = OpIEqual %bool %thread_id %c_i32_0\n"
7549		"OpSelectionMerge %exit DontFlatten\n"
7550		"OpBranchConditional %thread0 %then %else\n"
7551
7552		"%else = OpLabel\n"
7553		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7554		"OpBranch %exit\n"
7555
7556		"%then = OpLabel\n"
7557		"%val1 = OpVectorExtractDynamic %f32 %param1 %zero\n"
7558		"OpBranch %exit\n"
7559
7560		"%exit = OpLabel\n"
7561		"%val = OpPhi %f32 %val0 %else %val1 %then\n"
7562		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7563		"%ret = OpVectorInsertDynamic %v4f32 %param1 %val %zero\n"
7564		"OpReturnValue %ret\n"
7565		"OpFunctionEnd\n";
7566	addTessCtrlTest(testGroup.get(), "after_divergent_if", fragments);
7567
7568	// A barrier inside a loop.
7569	fragments["pre_main"] =
7570		"%Workgroup = OpConstant %i32 2\n"
7571		"%SequentiallyConsistent = OpConstant %i32 0x10\n"
7572		"%c_f32_10 = OpConstant %f32 10.\n";
7573	fragments["testfun"] =
7574		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7575		"%param1 = OpFunctionParameter %v4f32\n"
7576		"%entry = OpLabel\n"
7577		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7578		"OpBranch %loop\n"
7579
7580		";adds 4, 3, 2, and 1 to %val0\n"
7581		"%loop = OpLabel\n"
7582		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
7583		"%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
7584		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7585		"%fcount = OpConvertSToF %f32 %count\n"
7586		"%val = OpFAdd %f32 %val1 %fcount\n"
7587		"%count__ = OpISub %i32 %count %c_i32_1\n"
7588		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
7589		"OpLoopMerge %exit %loop None\n"
7590		"OpBranchConditional %again %loop %exit\n"
7591
7592		"%exit = OpLabel\n"
7593		"%same = OpFSub %f32 %val %c_f32_10\n"
7594		"%ret = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
7595		"OpReturnValue %ret\n"
7596		"OpFunctionEnd\n";
7597	addTessCtrlTest(testGroup.get(), "in_loop", fragments);
7598
7599	return testGroup.release();
7600}
7601
7602// Test for the OpFRem instruction.
7603tcu::TestCaseGroup* createFRemTests(tcu::TestContext& testCtx)
7604{
7605	de::MovePtr<tcu::TestCaseGroup>		testGroup(new tcu::TestCaseGroup(testCtx, "frem", "OpFRem"));
7606	map<string, string>					fragments;
7607	RGBA								inputColors[4];
7608	RGBA								outputColors[4];
7609
7610	fragments["pre_main"]				 =
7611		"%c_f32_3 = OpConstant %f32 3.0\n"
7612		"%c_f32_n3 = OpConstant %f32 -3.0\n"
7613		"%c_f32_4 = OpConstant %f32 4.0\n"
7614		"%c_f32_p75 = OpConstant %f32 0.75\n"
7615		"%c_v4f32_p75_p75_p75_p75 = OpConstantComposite %v4f32 %c_f32_p75 %c_f32_p75 %c_f32_p75 %c_f32_p75 \n"
7616		"%c_v4f32_4_4_4_4 = OpConstantComposite %v4f32 %c_f32_4 %c_f32_4 %c_f32_4 %c_f32_4\n"
7617		"%c_v4f32_3_n3_3_n3 = OpConstantComposite %v4f32 %c_f32_3 %c_f32_n3 %c_f32_3 %c_f32_n3\n";
7618
7619	// The test does the following.
7620	// vec4 result = (param1 * 8.0) - 4.0;
7621	// return (frem(result.x,3) + 0.75, frem(result.y, -3) + 0.75, 0, 1)
7622	fragments["testfun"]				 =
7623		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7624		"%param1 = OpFunctionParameter %v4f32\n"
7625		"%label_testfun = OpLabel\n"
7626		"%v_times_8 = OpVectorTimesScalar %v4f32 %param1 %c_f32_8\n"
7627		"%minus_4 = OpFSub %v4f32 %v_times_8 %c_v4f32_4_4_4_4\n"
7628		"%frem = OpFRem %v4f32 %minus_4 %c_v4f32_3_n3_3_n3\n"
7629		"%added = OpFAdd %v4f32 %frem %c_v4f32_p75_p75_p75_p75\n"
7630		"%xyz_1 = OpVectorInsertDynamic %v4f32 %added %c_f32_1 %c_i32_3\n"
7631		"%xy_0_1 = OpVectorInsertDynamic %v4f32 %xyz_1 %c_f32_0 %c_i32_2\n"
7632		"OpReturnValue %xy_0_1\n"
7633		"OpFunctionEnd\n";
7634
7635
7636	inputColors[0]		= RGBA(16,	16,		0, 255);
7637	inputColors[1]		= RGBA(232, 232,	0, 255);
7638	inputColors[2]		= RGBA(232, 16,		0, 255);
7639	inputColors[3]		= RGBA(16,	232,	0, 255);
7640
7641	outputColors[0]		= RGBA(64,	64,		0, 255);
7642	outputColors[1]		= RGBA(255, 255,	0, 255);
7643	outputColors[2]		= RGBA(255, 64,		0, 255);
7644	outputColors[3]		= RGBA(64,	255,	0, 255);
7645
7646	createTestsForAllStages("frem", inputColors, outputColors, fragments, testGroup.get());
7647	return testGroup.release();
7648}
7649
7650enum IntegerType
7651{
7652	INTEGER_TYPE_SIGNED_16,
7653	INTEGER_TYPE_SIGNED_32,
7654	INTEGER_TYPE_SIGNED_64,
7655
7656	INTEGER_TYPE_UNSIGNED_16,
7657	INTEGER_TYPE_UNSIGNED_32,
7658	INTEGER_TYPE_UNSIGNED_64,
7659};
7660
7661const string getBitWidthStr (IntegerType type)
7662{
7663	switch (type)
7664	{
7665		case INTEGER_TYPE_SIGNED_16:
7666		case INTEGER_TYPE_UNSIGNED_16:	return "16";
7667
7668		case INTEGER_TYPE_SIGNED_32:
7669		case INTEGER_TYPE_UNSIGNED_32:	return "32";
7670
7671		case INTEGER_TYPE_SIGNED_64:
7672		case INTEGER_TYPE_UNSIGNED_64:	return "64";
7673
7674		default:						DE_ASSERT(false);
7675										return "";
7676	}
7677}
7678
7679bool isSigned (IntegerType type)
7680{
7681	return (type <= INTEGER_TYPE_SIGNED_64);
7682}
7683
7684const string getTypeName (IntegerType type)
7685{
7686	string prefix = isSigned(type) ? "" : "u";
7687	return prefix + "int" + getBitWidthStr(type);
7688}
7689
7690const string getTestName (IntegerType from, IntegerType to)
7691{
7692	return getTypeName(from) + "_to_" + getTypeName(to);
7693}
7694
7695const string getAsmTypeDeclaration (IntegerType type)
7696{
7697	string sign = isSigned(type) ? " 1" : " 0";
7698	return "OpTypeInt " + getBitWidthStr(type) + sign;
7699}
7700
7701const string getConvertCaseShaderStr (const string& instruction, map<string, string> types)
7702{
7703	const StringTemplate shader (
7704		"OpCapability Shader\n"
7705		"${int_capabilities}"
7706		"OpMemoryModel Logical GLSL450\n"
7707		"OpEntryPoint GLCompute %main \"main\" %id\n"
7708		"OpExecutionMode %main LocalSize 1 1 1\n"
7709		"OpSource GLSL 430\n"
7710		"OpName %main           \"main\"\n"
7711		"OpName %id             \"gl_GlobalInvocationID\"\n"
7712		// Decorators
7713		"OpDecorate %id BuiltIn GlobalInvocationId\n"
7714		"OpDecorate %indata DescriptorSet 0\n"
7715		"OpDecorate %indata Binding 0\n"
7716		"OpDecorate %outdata DescriptorSet 0\n"
7717		"OpDecorate %outdata Binding 1\n"
7718		"OpDecorate %in_buf BufferBlock\n"
7719		"OpDecorate %out_buf BufferBlock\n"
7720		"OpMemberDecorate %in_buf 0 Offset 0\n"
7721		"OpMemberDecorate %out_buf 0 Offset 0\n"
7722		// Base types
7723		"%void       = OpTypeVoid\n"
7724		"%voidf      = OpTypeFunction %void\n"
7725		"%u32        = OpTypeInt 32 0\n"
7726		"%i32        = OpTypeInt 32 1\n"
7727		"%uvec3      = OpTypeVector %u32 3\n"
7728		"%uvec3ptr   = OpTypePointer Input %uvec3\n"
7729		// Custom types
7730		"%in_type    = ${inputType}\n"
7731		"%out_type   = ${outputType}\n"
7732		// Derived types
7733		"%in_ptr     = OpTypePointer Uniform %in_type\n"
7734		"%out_ptr    = OpTypePointer Uniform %out_type\n"
7735		"%in_arr     = OpTypeRuntimeArray %in_type\n"
7736		"%out_arr    = OpTypeRuntimeArray %out_type\n"
7737		"%in_buf     = OpTypeStruct %in_arr\n"
7738		"%out_buf    = OpTypeStruct %out_arr\n"
7739		"%in_bufptr  = OpTypePointer Uniform %in_buf\n"
7740		"%out_bufptr = OpTypePointer Uniform %out_buf\n"
7741		"%indata     = OpVariable %in_bufptr Uniform\n"
7742		"%outdata    = OpVariable %out_bufptr Uniform\n"
7743		"%inputptr   = OpTypePointer Input %in_type\n"
7744		"%id         = OpVariable %uvec3ptr Input\n"
7745		// Constants
7746		"%zero       = OpConstant %i32 0\n"
7747		// Main function
7748		"%main       = OpFunction %void None %voidf\n"
7749		"%label      = OpLabel\n"
7750		"%idval      = OpLoad %uvec3 %id\n"
7751		"%x          = OpCompositeExtract %u32 %idval 0\n"
7752		"%inloc      = OpAccessChain %in_ptr %indata %zero %x\n"
7753		"%outloc     = OpAccessChain %out_ptr %outdata %zero %x\n"
7754		"%inval      = OpLoad %in_type %inloc\n"
7755		"%conv       = ${instruction} %out_type %inval\n"
7756		"              OpStore %outloc %conv\n"
7757		"              OpReturn\n"
7758		"              OpFunctionEnd\n"
7759	);
7760
7761	types["instruction"] = instruction;
7762
7763	return shader.specialize(types);
7764}
7765
7766template<typename T>
7767BufferSp getSpecializedBuffer (deInt64 number)
7768{
7769	return BufferSp(new Buffer<T>(vector<T>(1, (T)number)));
7770}
7771
7772BufferSp getBuffer (IntegerType type, deInt64 number)
7773{
7774	switch (type)
7775	{
7776		case INTEGER_TYPE_SIGNED_16:	return getSpecializedBuffer<deInt16>(number);
7777		case INTEGER_TYPE_SIGNED_32:	return getSpecializedBuffer<deInt32>(number);
7778		case INTEGER_TYPE_SIGNED_64:	return getSpecializedBuffer<deInt64>(number);
7779
7780		case INTEGER_TYPE_UNSIGNED_16:	return getSpecializedBuffer<deUint16>(number);
7781		case INTEGER_TYPE_UNSIGNED_32:	return getSpecializedBuffer<deUint32>(number);
7782		case INTEGER_TYPE_UNSIGNED_64:	return getSpecializedBuffer<deUint64>(number);
7783
7784		default:						DE_ASSERT(false);
7785										return BufferSp(new Buffer<deInt32>(vector<deInt32>(1, 0)));
7786	}
7787}
7788
7789bool usesInt16 (IntegerType from, IntegerType to)
7790{
7791	return (from == INTEGER_TYPE_SIGNED_16 || from == INTEGER_TYPE_UNSIGNED_16
7792			|| to == INTEGER_TYPE_SIGNED_16 || to == INTEGER_TYPE_UNSIGNED_16);
7793}
7794
7795bool usesInt64 (IntegerType from, IntegerType to)
7796{
7797	return (from == INTEGER_TYPE_SIGNED_64 || from == INTEGER_TYPE_UNSIGNED_64
7798			|| to == INTEGER_TYPE_SIGNED_64 || to == INTEGER_TYPE_UNSIGNED_64);
7799}
7800
7801ConvertTestFeatures getUsedFeatures (IntegerType from, IntegerType to)
7802{
7803	if (usesInt16(from, to))
7804	{
7805		if (usesInt64(from, to))
7806		{
7807			return CONVERT_TEST_USES_INT16_INT64;
7808		}
7809		else
7810		{
7811			return CONVERT_TEST_USES_INT16;
7812		}
7813	}
7814	else
7815	{
7816		return CONVERT_TEST_USES_INT64;
7817	}
7818}
7819
7820struct ConvertCase
7821{
7822	ConvertCase (IntegerType from, IntegerType to, deInt64 number)
7823	: m_fromType		(from)
7824	, m_toType			(to)
7825	, m_features		(getUsedFeatures(from, to))
7826	, m_name			(getTestName(from, to))
7827	, m_inputBuffer		(getBuffer(from, number))
7828	, m_outputBuffer	(getBuffer(to, number))
7829	{
7830		m_asmTypes["inputType"]		= getAsmTypeDeclaration(from);
7831		m_asmTypes["outputType"]	= getAsmTypeDeclaration(to);
7832
7833		if (m_features == CONVERT_TEST_USES_INT16)
7834		{
7835			m_asmTypes["int_capabilities"] = "OpCapability Int16\n";
7836		}
7837		else if (m_features == CONVERT_TEST_USES_INT64)
7838		{
7839			m_asmTypes["int_capabilities"] = "OpCapability Int64\n";
7840		}
7841		else if (m_features == CONVERT_TEST_USES_INT16_INT64)
7842		{
7843			m_asmTypes["int_capabilities"] = "OpCapability Int16\n \
7844											  OpCapability Int64\n";
7845		}
7846		else
7847		{
7848			DE_ASSERT(false);
7849		}
7850	}
7851
7852	IntegerType				m_fromType;
7853	IntegerType				m_toType;
7854	ConvertTestFeatures		m_features;
7855	string					m_name;
7856	map<string, string>		m_asmTypes;
7857	BufferSp				m_inputBuffer;
7858	BufferSp				m_outputBuffer;
7859};
7860
7861void createSConvertCases (vector<ConvertCase>& testCases)
7862{
7863	// Convert int to int
7864	testCases.push_back(ConvertCase(INTEGER_TYPE_SIGNED_16,	INTEGER_TYPE_SIGNED_32,		14669));
7865	testCases.push_back(ConvertCase(INTEGER_TYPE_SIGNED_16,	INTEGER_TYPE_SIGNED_64,		3341));
7866
7867	testCases.push_back(ConvertCase(INTEGER_TYPE_SIGNED_32,	INTEGER_TYPE_SIGNED_64,		973610259));
7868
7869	// Convert int to unsigned int
7870	testCases.push_back(ConvertCase(INTEGER_TYPE_SIGNED_16,	INTEGER_TYPE_UNSIGNED_32,	9288));
7871	testCases.push_back(ConvertCase(INTEGER_TYPE_SIGNED_16,	INTEGER_TYPE_UNSIGNED_64,	15460));
7872
7873	testCases.push_back(ConvertCase(INTEGER_TYPE_SIGNED_32,	INTEGER_TYPE_UNSIGNED_64,	346213461));
7874}
7875
7876//  Test for the OpSConvert instruction.
7877tcu::TestCaseGroup* createSConvertTests (tcu::TestContext& testCtx)
7878{
7879	const string instruction				("OpSConvert");
7880	de::MovePtr<tcu::TestCaseGroup>	group	(new tcu::TestCaseGroup(testCtx, "sconvert", "OpSConvert"));
7881	vector<ConvertCase>				testCases;
7882	createSConvertCases(testCases);
7883
7884	for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
7885	{
7886		ComputeShaderSpec	spec;
7887
7888		spec.assembly = getConvertCaseShaderStr(instruction, test->m_asmTypes);
7889		spec.inputs.push_back(test->m_inputBuffer);
7890		spec.outputs.push_back(test->m_inputBuffer);
7891		spec.numWorkGroups = IVec3(1, 1, 1);
7892
7893		group->addChild(new ConvertTestCase(testCtx, test->m_name.c_str(), "Convert integers with OpSConvert.", spec, test->m_features));
7894	}
7895
7896	return group.release();
7897}
7898
7899void createUConvertCases (vector<ConvertCase>& testCases)
7900{
7901	// Convert unsigned int to unsigned int
7902	testCases.push_back(ConvertCase(INTEGER_TYPE_UNSIGNED_16,	INTEGER_TYPE_UNSIGNED_32,	60653));
7903	testCases.push_back(ConvertCase(INTEGER_TYPE_UNSIGNED_16,	INTEGER_TYPE_UNSIGNED_64,	17991));
7904
7905	testCases.push_back(ConvertCase(INTEGER_TYPE_UNSIGNED_32,	INTEGER_TYPE_UNSIGNED_64,	904256275));
7906
7907	// Convert unsigned int to int
7908	testCases.push_back(ConvertCase(INTEGER_TYPE_UNSIGNED_16,	INTEGER_TYPE_SIGNED_32,		38002));
7909	testCases.push_back(ConvertCase(INTEGER_TYPE_UNSIGNED_16,	INTEGER_TYPE_SIGNED_64,		64921));
7910
7911	testCases.push_back(ConvertCase(INTEGER_TYPE_UNSIGNED_32,	INTEGER_TYPE_SIGNED_64,		4294956295));
7912}
7913
7914//  Test for the OpUConvert instruction.
7915tcu::TestCaseGroup* createUConvertTests (tcu::TestContext& testCtx)
7916{
7917	const string instruction				("OpUConvert");
7918	de::MovePtr<tcu::TestCaseGroup>	group	(new tcu::TestCaseGroup(testCtx, "uconvert", "OpUConvert"));
7919	vector<ConvertCase>				testCases;
7920	createUConvertCases(testCases);
7921
7922	for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
7923	{
7924		ComputeShaderSpec	spec;
7925
7926		spec.assembly = getConvertCaseShaderStr(instruction, test->m_asmTypes);
7927		spec.inputs.push_back(test->m_inputBuffer);
7928		spec.outputs.push_back(test->m_inputBuffer);
7929		spec.numWorkGroups = IVec3(1, 1, 1);
7930
7931		group->addChild(new ConvertTestCase(testCtx, test->m_name.c_str(), "Convert integers with OpUConvert.", spec, test->m_features));
7932	}
7933
7934	return group.release();
7935}
7936
7937tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
7938{
7939	de::MovePtr<tcu::TestCaseGroup> instructionTests	(new tcu::TestCaseGroup(testCtx, "instruction", "Instructions with special opcodes/operands"));
7940	de::MovePtr<tcu::TestCaseGroup> computeTests		(new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
7941	de::MovePtr<tcu::TestCaseGroup> graphicsTests		(new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
7942
7943	computeTests->addChild(createOpNopGroup(testCtx));
7944	computeTests->addChild(createOpLineGroup(testCtx));
7945	computeTests->addChild(createOpNoLineGroup(testCtx));
7946	computeTests->addChild(createOpConstantNullGroup(testCtx));
7947	computeTests->addChild(createOpConstantCompositeGroup(testCtx));
7948	computeTests->addChild(createOpConstantUsageGroup(testCtx));
7949	computeTests->addChild(createSpecConstantGroup(testCtx));
7950	computeTests->addChild(createOpSourceGroup(testCtx));
7951	computeTests->addChild(createOpSourceExtensionGroup(testCtx));
7952	computeTests->addChild(createDecorationGroupGroup(testCtx));
7953	computeTests->addChild(createOpPhiGroup(testCtx));
7954	computeTests->addChild(createLoopControlGroup(testCtx));
7955	computeTests->addChild(createFunctionControlGroup(testCtx));
7956	computeTests->addChild(createSelectionControlGroup(testCtx));
7957	computeTests->addChild(createBlockOrderGroup(testCtx));
7958	computeTests->addChild(createMultipleShaderGroup(testCtx));
7959	computeTests->addChild(createMemoryAccessGroup(testCtx));
7960	computeTests->addChild(createOpCopyMemoryGroup(testCtx));
7961	computeTests->addChild(createOpCopyObjectGroup(testCtx));
7962	computeTests->addChild(createNoContractionGroup(testCtx));
7963	computeTests->addChild(createOpUndefGroup(testCtx));
7964	computeTests->addChild(createOpUnreachableGroup(testCtx));
7965	computeTests ->addChild(createOpQuantizeToF16Group(testCtx));
7966	computeTests ->addChild(createOpFRemGroup(testCtx));
7967	computeTests->addChild(createSConvertTests(testCtx));
7968	computeTests->addChild(createUConvertTests(testCtx));
7969
7970	RGBA defaultColors[4];
7971	getDefaultColors(defaultColors);
7972
7973	de::MovePtr<tcu::TestCaseGroup> opnopTests (new tcu::TestCaseGroup(testCtx, "opnop", "Test OpNop"));
7974	map<string, string> opNopFragments;
7975	opNopFragments["testfun"] =
7976		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7977		"%param1 = OpFunctionParameter %v4f32\n"
7978		"%label_testfun = OpLabel\n"
7979		"OpNop\n"
7980		"OpNop\n"
7981		"OpNop\n"
7982		"OpNop\n"
7983		"OpNop\n"
7984		"OpNop\n"
7985		"OpNop\n"
7986		"OpNop\n"
7987		"%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7988		"%b = OpFAdd %f32 %a %a\n"
7989		"OpNop\n"
7990		"%c = OpFSub %f32 %b %a\n"
7991		"%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
7992		"OpNop\n"
7993		"OpNop\n"
7994		"OpReturnValue %ret\n"
7995		"OpFunctionEnd\n"
7996		;
7997	createTestsForAllStages("opnop", defaultColors, defaultColors, opNopFragments, opnopTests.get());
7998
7999
8000	graphicsTests->addChild(opnopTests.release());
8001	graphicsTests->addChild(createOpSourceTests(testCtx));
8002	graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
8003	graphicsTests->addChild(createOpLineTests(testCtx));
8004	graphicsTests->addChild(createOpNoLineTests(testCtx));
8005	graphicsTests->addChild(createOpConstantNullTests(testCtx));
8006	graphicsTests->addChild(createOpConstantCompositeTests(testCtx));
8007	graphicsTests->addChild(createMemoryAccessTests(testCtx));
8008	graphicsTests->addChild(createOpUndefTests(testCtx));
8009	graphicsTests->addChild(createSelectionBlockOrderTests(testCtx));
8010	graphicsTests->addChild(createModuleTests(testCtx));
8011	graphicsTests->addChild(createSwitchBlockOrderTests(testCtx));
8012	graphicsTests->addChild(createOpPhiTests(testCtx));
8013	graphicsTests->addChild(createNoContractionTests(testCtx));
8014	graphicsTests->addChild(createOpQuantizeTests(testCtx));
8015	graphicsTests->addChild(createLoopTests(testCtx));
8016	graphicsTests->addChild(createSpecConstantTests(testCtx));
8017	graphicsTests->addChild(createSpecConstantOpQuantizeToF16Group(testCtx));
8018	graphicsTests->addChild(createBarrierTests(testCtx));
8019	graphicsTests->addChild(createDecorationGroupTests(testCtx));
8020	graphicsTests->addChild(createFRemTests(testCtx));
8021
8022	instructionTests->addChild(computeTests.release());
8023	instructionTests->addChild(graphicsTests.release());
8024
8025	return instructionTests.release();
8026}
8027
8028} // SpirVAssembly
8029} // vkt
8030