1/*-------------------------------------------------------------------------
2 * drawElements Quality Program Reference Renderer
3 * -----------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Reference renderer interface.
22 *//*--------------------------------------------------------------------*/
23
24#include "rrRenderer.hpp"
25#include "tcuVectorUtil.hpp"
26#include "tcuTextureUtil.hpp"
27#include "tcuFloat.hpp"
28#include "rrPrimitiveAssembler.hpp"
29#include "rrFragmentOperations.hpp"
30#include "rrRasterizer.hpp"
31#include "deMemory.h"
32
33#include <set>
34
35namespace rr
36{
37namespace
38{
39
40typedef double ClipFloat; // floating point type used in clipping
41
42typedef tcu::Vector<ClipFloat, 4> ClipVec4;
43
44struct RasterizationInternalBuffers
45{
46	std::vector<FragmentPacket>		fragmentPackets;
47	std::vector<GenericVec4>		shaderOutputs;
48	std::vector<Fragment>			shadedFragments;
49	float*							fragmentDepthBuffer;
50};
51
52deUint32 readIndexArray (const IndexType type, const void* ptr, size_t ndx)
53{
54	switch (type)
55	{
56		case INDEXTYPE_UINT8:
57			return ((const deUint8*)ptr)[ndx];
58
59		case INDEXTYPE_UINT16:
60		{
61			deUint16 retVal;
62			deMemcpy(&retVal, (const deUint8*)ptr + ndx * sizeof(deUint16), sizeof(deUint16));
63
64			return retVal;
65		}
66
67		case INDEXTYPE_UINT32:
68		{
69			deUint32 retVal;
70			deMemcpy(&retVal, (const deUint8*)ptr + ndx * sizeof(deUint32), sizeof(deUint32));
71
72			return retVal;
73		}
74
75		default:
76			DE_ASSERT(false);
77			return 0;
78	}
79}
80
81tcu::IVec4 getBufferSize (const rr::MultisampleConstPixelBufferAccess& multisampleBuffer)
82{
83	return tcu::IVec4(0, 0, multisampleBuffer.raw().getHeight(), multisampleBuffer.raw().getDepth());
84}
85
86bool isEmpty (const rr::MultisampleConstPixelBufferAccess& access)
87{
88	return access.raw().getWidth() == 0 || access.raw().getHeight() == 0 || access.raw().getDepth() == 0;
89}
90
91struct DrawContext
92{
93	int primitiveID;
94
95	DrawContext (void)
96		: primitiveID(0)
97	{
98	}
99};
100
101/*--------------------------------------------------------------------*//*!
102 * \brief Calculates intersection of two rects given as (left, bottom, width, height)
103 *//*--------------------------------------------------------------------*/
104tcu::IVec4 rectIntersection (const tcu::IVec4& a, const tcu::IVec4& b)
105{
106	const tcu::IVec2 pos	= tcu::IVec2(de::max(a.x(), b.x()), de::max(a.y(), b.y()));
107	const tcu::IVec2 endPos	= tcu::IVec2(de::min(a.x() + a.z(), b.x() + b.z()), de::min(a.y() + a.w(), b.y() + b.w()));
108
109	return tcu::IVec4(pos.x(), pos.y(), endPos.x() - pos.x(), endPos.y() - pos.y());
110}
111
112void convertPrimitiveToBaseType(std::vector<pa::Triangle>& output, std::vector<pa::Triangle>& input)
113{
114	std::swap(output, input);
115}
116
117void convertPrimitiveToBaseType(std::vector<pa::Line>& output, std::vector<pa::Line>& input)
118{
119	std::swap(output, input);
120}
121
122void convertPrimitiveToBaseType(std::vector<pa::Point>& output, std::vector<pa::Point>& input)
123{
124	std::swap(output, input);
125}
126
127void convertPrimitiveToBaseType(std::vector<pa::Line>& output, std::vector<pa::LineAdjacency>& input)
128{
129	output.resize(input.size());
130	for (size_t i = 0; i < input.size(); ++i)
131	{
132		const int adjacentProvokingVertex	= input[i].provokingIndex;
133		const int baseProvokingVertexIndex	= adjacentProvokingVertex-1;
134		output[i] = pa::Line(input[i].v1, input[i].v2, baseProvokingVertexIndex);
135	}
136}
137
138void convertPrimitiveToBaseType(std::vector<pa::Triangle>& output, std::vector<pa::TriangleAdjacency>& input)
139{
140	output.resize(input.size());
141	for (size_t i = 0; i < input.size(); ++i)
142	{
143		const int adjacentProvokingVertex	= input[i].provokingIndex;
144		const int baseProvokingVertexIndex	= adjacentProvokingVertex/2;
145		output[i] = pa::Triangle(input[i].v0, input[i].v2, input[i].v4, baseProvokingVertexIndex);
146	}
147}
148
149namespace cliputil
150{
151
152/*--------------------------------------------------------------------*//*!
153 * \brief Get clipped portion of the second endpoint
154 *
155 * Calculate the intersection of line segment v0-v1 and a given plane. Line
156 * segment is defined by a pair of one-dimensional homogeneous coordinates.
157 *
158 *//*--------------------------------------------------------------------*/
159ClipFloat getSegmentVolumeEdgeClip (const ClipFloat v0,
160									const ClipFloat w0,
161									const ClipFloat v1,
162									const ClipFloat w1,
163									const ClipFloat plane)
164{
165	return (plane*w0 - v0) / ((v1 - v0) - plane*(w1 - w0));
166}
167
168/*--------------------------------------------------------------------*//*!
169 * \brief Get clipped portion of the endpoint
170 *
171 * How much (in [0-1] range) of a line segment v0-v1 would be clipped
172 * of the v0 end of the line segment by clipping.
173 *//*--------------------------------------------------------------------*/
174ClipFloat getLineEndpointClipping (const ClipVec4& v0, const ClipVec4& v1)
175{
176	const ClipFloat clipVolumeSize = (ClipFloat)1.0;
177
178	if (v0.z() > v0.w())
179	{
180		// Clip +Z
181		return getSegmentVolumeEdgeClip(v0.z(), v0.w(), v1.z(), v1.w(), clipVolumeSize);
182	}
183	else if (v0.z() < -v0.w())
184	{
185		// Clip -Z
186		return getSegmentVolumeEdgeClip(v0.z(), v0.w(), v1.z(), v1.w(), -clipVolumeSize);
187	}
188	else
189	{
190		// no clipping
191		return (ClipFloat)0.0;
192	}
193}
194
195ClipVec4 vec4ToClipVec4 (const tcu::Vec4& v)
196{
197	return ClipVec4((ClipFloat)v.x(), (ClipFloat)v.y(), (ClipFloat)v.z(), (ClipFloat)v.w());
198}
199
200tcu::Vec4 clipVec4ToVec4 (const ClipVec4& v)
201{
202	return tcu::Vec4((float)v.x(), (float)v.y(), (float)v.z(), (float)v.w());
203}
204
205class ClipVolumePlane
206{
207public:
208	virtual bool		pointInClipVolume			(const ClipVec4& p) const						= 0;
209	virtual ClipFloat	clipLineSegmentEnd			(const ClipVec4& v0, const ClipVec4& v1) const	= 0;
210	virtual ClipVec4	getLineIntersectionPoint	(const ClipVec4& v0, const ClipVec4& v1) const	= 0;
211};
212
213template <int Sign, int CompNdx>
214class ComponentPlane : public ClipVolumePlane
215{
216	DE_STATIC_ASSERT(Sign == +1 || Sign == -1);
217
218public:
219	bool		pointInClipVolume			(const ClipVec4& p) const;
220	ClipFloat	clipLineSegmentEnd			(const ClipVec4& v0, const ClipVec4& v1) const;
221	ClipVec4	getLineIntersectionPoint	(const ClipVec4& v0, const ClipVec4& v1) const;
222};
223
224template <int Sign, int CompNdx>
225bool ComponentPlane<Sign, CompNdx>::pointInClipVolume (const ClipVec4& p) const
226{
227	const ClipFloat clipVolumeSize = (ClipFloat)1.0;
228
229	return (ClipFloat)(Sign * p[CompNdx]) <= clipVolumeSize * p.w();
230}
231
232template <int Sign, int CompNdx>
233ClipFloat ComponentPlane<Sign, CompNdx>::clipLineSegmentEnd (const ClipVec4& v0, const ClipVec4& v1) const
234{
235	const ClipFloat clipVolumeSize = (ClipFloat)1.0;
236
237	return getSegmentVolumeEdgeClip(v0[CompNdx], v0.w(),
238									v1[CompNdx], v1.w(),
239									(ClipFloat)Sign * clipVolumeSize);
240}
241
242template <int Sign, int CompNdx>
243ClipVec4 ComponentPlane<Sign, CompNdx>::getLineIntersectionPoint (const ClipVec4& v0, const ClipVec4& v1) const
244{
245	// A point on line might be far away, causing clipping ratio (clipLineSegmentEnd) to become extremely close to 1.0
246	// even if the another point is not on the plane. Prevent clipping ratio from saturating by using points on line
247	// that are (nearly) on this and (nearly) on the opposite plane.
248
249	const ClipVec4 	clippedV0	= tcu::mix(v0, v1, ComponentPlane<+1, CompNdx>().clipLineSegmentEnd(v0, v1));
250	const ClipVec4 	clippedV1	= tcu::mix(v0, v1, ComponentPlane<-1, CompNdx>().clipLineSegmentEnd(v0, v1));
251	const ClipFloat	clipRatio	= clipLineSegmentEnd(clippedV0, clippedV1);
252
253	// Find intersection point of line from v0 to v1 and the current plane. Avoid ratios near 1.0
254	if (clipRatio <= (ClipFloat)0.5)
255		return tcu::mix(clippedV0, clippedV1, clipRatio);
256	else
257	{
258		const ClipFloat complementClipRatio = clipLineSegmentEnd(clippedV1, clippedV0);
259		return tcu::mix(clippedV1, clippedV0, complementClipRatio);
260	}
261}
262
263struct TriangleVertex
264{
265	ClipVec4	position;
266	ClipFloat	weight[3];		//!< barycentrics
267};
268
269struct SubTriangle
270{
271	TriangleVertex vertices[3];
272};
273
274void clipTriangleOneVertex (std::vector<TriangleVertex>& clippedEdges, const ClipVolumePlane& plane, const TriangleVertex& clipped, const TriangleVertex& v1, const TriangleVertex& v2)
275{
276	const ClipFloat	degenerateLimit = (ClipFloat)1.0;
277
278	// calc clip pos
279	TriangleVertex	mid1;
280	TriangleVertex	mid2;
281	bool			outputDegenerate = false;
282
283	{
284		const TriangleVertex&	inside	= v1;
285		const TriangleVertex&	outside	= clipped;
286		      TriangleVertex&	middle	= mid1;
287
288		const ClipFloat			hitDist	= plane.clipLineSegmentEnd(inside.position, outside.position);
289
290		if (hitDist >= degenerateLimit)
291		{
292			// do not generate degenerate triangles
293			outputDegenerate = true;
294		}
295		else
296		{
297			const ClipVec4 approximatedClipPoint	= tcu::mix(inside.position, outside.position, hitDist);
298			const ClipVec4 anotherPointOnLine		= (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);
299
300			middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
301			middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
302			middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
303			middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
304		}
305	}
306
307	{
308		const TriangleVertex&	inside	= v2;
309		const TriangleVertex&	outside	= clipped;
310		      TriangleVertex&	middle	= mid2;
311
312		const ClipFloat			hitDist	= plane.clipLineSegmentEnd(inside.position, outside.position);
313
314		if (hitDist >= degenerateLimit)
315		{
316			// do not generate degenerate triangles
317			outputDegenerate = true;
318		}
319		else
320		{
321			const ClipVec4 approximatedClipPoint	= tcu::mix(inside.position, outside.position, hitDist);
322			const ClipVec4 anotherPointOnLine		= (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);
323
324			middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
325			middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
326			middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
327			middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
328		}
329	}
330
331	if (!outputDegenerate)
332	{
333		// gen quad (v1) -> mid1 -> mid2 -> (v2)
334		clippedEdges.push_back(v1);
335		clippedEdges.push_back(mid1);
336		clippedEdges.push_back(mid2);
337		clippedEdges.push_back(v2);
338	}
339	else
340	{
341		// don't modify
342		clippedEdges.push_back(v1);
343		clippedEdges.push_back(clipped);
344		clippedEdges.push_back(v2);
345	}
346}
347
348void clipTriangleTwoVertices (std::vector<TriangleVertex>& clippedEdges, const ClipVolumePlane& plane, const TriangleVertex& v0, const TriangleVertex& clipped1, const TriangleVertex& clipped2)
349{
350	const ClipFloat	unclippableLimit = (ClipFloat)1.0;
351
352	// calc clip pos
353	TriangleVertex	mid1;
354	TriangleVertex	mid2;
355	bool			unclippableVertex1 = false;
356	bool			unclippableVertex2 = false;
357
358	{
359		const TriangleVertex&	inside	= v0;
360		const TriangleVertex&	outside	= clipped1;
361		      TriangleVertex&	middle	= mid1;
362
363		const ClipFloat			hitDist	= plane.clipLineSegmentEnd(inside.position, outside.position);
364
365		if (hitDist >= unclippableLimit)
366		{
367			// this edge cannot be clipped because the edge is really close to the volume boundary
368			unclippableVertex1 = true;
369		}
370		else
371		{
372			const ClipVec4 approximatedClipPoint	= tcu::mix(inside.position, outside.position, hitDist);
373			const ClipVec4 anotherPointOnLine		= (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);
374
375			middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
376			middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
377			middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
378			middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
379		}
380	}
381
382	{
383		const TriangleVertex&	inside	= v0;
384		const TriangleVertex&	outside	= clipped2;
385		      TriangleVertex&	middle	= mid2;
386
387		const ClipFloat			hitDist	= plane.clipLineSegmentEnd(inside.position, outside.position);
388
389		if (hitDist >= unclippableLimit)
390		{
391			// this edge cannot be clipped because the edge is really close to the volume boundary
392			unclippableVertex2 = true;
393		}
394		else
395		{
396			const ClipVec4 approximatedClipPoint	= tcu::mix(inside.position, outside.position, hitDist);
397			const ClipVec4 anotherPointOnLine		= (hitDist > (ClipFloat)0.5) ? (inside.position) : (outside.position);
398
399			middle.position = plane.getLineIntersectionPoint(approximatedClipPoint, anotherPointOnLine);
400			middle.weight[0] = tcu::mix(inside.weight[0], outside.weight[0], hitDist);
401			middle.weight[1] = tcu::mix(inside.weight[1], outside.weight[1], hitDist);
402			middle.weight[2] = tcu::mix(inside.weight[2], outside.weight[2], hitDist);
403		}
404	}
405
406	if (!unclippableVertex1 && !unclippableVertex2)
407	{
408		// gen triangle (v0) -> mid1 -> mid2
409		clippedEdges.push_back(v0);
410		clippedEdges.push_back(mid1);
411		clippedEdges.push_back(mid2);
412	}
413	else if (!unclippableVertex1 && unclippableVertex2)
414	{
415		// clip just vertex 1
416		clippedEdges.push_back(v0);
417		clippedEdges.push_back(mid1);
418		clippedEdges.push_back(clipped2);
419	}
420	else if (unclippableVertex1 && !unclippableVertex2)
421	{
422		// clip just vertex 2
423		clippedEdges.push_back(v0);
424		clippedEdges.push_back(clipped1);
425		clippedEdges.push_back(mid2);
426	}
427	else
428	{
429		// don't modify
430		clippedEdges.push_back(v0);
431		clippedEdges.push_back(clipped1);
432		clippedEdges.push_back(clipped2);
433	}
434}
435
436void clipTriangleToPlane (std::vector<TriangleVertex>& clippedEdges, const TriangleVertex* vertices, const ClipVolumePlane& plane)
437{
438	const bool v0Clipped = !plane.pointInClipVolume(vertices[0].position);
439	const bool v1Clipped = !plane.pointInClipVolume(vertices[1].position);
440	const bool v2Clipped = !plane.pointInClipVolume(vertices[2].position);
441	const int  clipCount = ((v0Clipped) ? (1) : (0)) + ((v1Clipped) ? (1) : (0)) + ((v2Clipped) ? (1) : (0));
442
443	if (clipCount == 0)
444	{
445		// pass
446		clippedEdges.insert(clippedEdges.begin(), vertices, vertices + 3);
447	}
448	else if (clipCount == 1)
449	{
450		// clip one vertex
451		if (v0Clipped)			clipTriangleOneVertex(clippedEdges, plane, vertices[0], vertices[1], vertices[2]);
452		else if (v1Clipped)		clipTriangleOneVertex(clippedEdges, plane, vertices[1], vertices[2], vertices[0]);
453		else					clipTriangleOneVertex(clippedEdges, plane, vertices[2], vertices[0], vertices[1]);
454	}
455	else if (clipCount == 2)
456	{
457		// clip two vertices
458		if (!v0Clipped)			clipTriangleTwoVertices(clippedEdges, plane, vertices[0], vertices[1], vertices[2]);
459		else if (!v1Clipped)	clipTriangleTwoVertices(clippedEdges, plane, vertices[1], vertices[2], vertices[0]);
460		else					clipTriangleTwoVertices(clippedEdges, plane, vertices[2], vertices[0], vertices[1]);
461	}
462	else if (clipCount == 3)
463	{
464		// discard
465	}
466	else
467	{
468		DE_ASSERT(DE_FALSE);
469	}
470}
471
472} // cliputil
473
474tcu::Vec2 to2DCartesian (const tcu::Vec4& p)
475{
476	return tcu::Vec2(p.x(), p.y()) / p.w();
477}
478
479float cross2D (const tcu::Vec2& a, const tcu::Vec2& b)
480{
481	return tcu::cross(tcu::Vec3(a.x(), a.y(), 0.0f), tcu::Vec3(b.x(), b.y(), 0.0f)).z();
482}
483
484void flatshadePrimitiveVertices (pa::Triangle& target, size_t outputNdx)
485{
486	const rr::GenericVec4 flatValue = target.getProvokingVertex()->outputs[outputNdx];
487	target.v0->outputs[outputNdx] = flatValue;
488	target.v1->outputs[outputNdx] = flatValue;
489	target.v2->outputs[outputNdx] = flatValue;
490}
491
492void flatshadePrimitiveVertices (pa::Line& target, size_t outputNdx)
493{
494	const rr::GenericVec4 flatValue = target.getProvokingVertex()->outputs[outputNdx];
495	target.v0->outputs[outputNdx] = flatValue;
496	target.v1->outputs[outputNdx] = flatValue;
497}
498
499void flatshadePrimitiveVertices (pa::Point& target, size_t outputNdx)
500{
501	DE_UNREF(target);
502	DE_UNREF(outputNdx);
503}
504
505template <typename ContainerType>
506void flatshadeVertices (const Program& program, ContainerType& list)
507{
508	// flatshade
509	const std::vector<rr::VertexVaryingInfo>& fragInputs = (program.geometryShader) ? (program.geometryShader->getOutputs()) : (program.vertexShader->getOutputs());
510
511	for (size_t inputNdx = 0; inputNdx < fragInputs.size(); ++inputNdx)
512		if (fragInputs[inputNdx].flatshade)
513			for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
514				flatshadePrimitiveVertices(*it, inputNdx);
515}
516
517/*--------------------------------------------------------------------*//*!
518 * Clip triangles to the clip volume.
519 *//*--------------------------------------------------------------------*/
520void clipPrimitives (std::vector<pa::Triangle>&		list,
521					 const Program&					program,
522					 bool							clipWithZPlanes,
523					 VertexPacketAllocator&			vpalloc)
524{
525	using namespace cliputil;
526
527	cliputil::ComponentPlane<+1, 0> clipPosX;
528	cliputil::ComponentPlane<-1, 0> clipNegX;
529	cliputil::ComponentPlane<+1, 1> clipPosY;
530	cliputil::ComponentPlane<-1, 1> clipNegY;
531	cliputil::ComponentPlane<+1, 2> clipPosZ;
532	cliputil::ComponentPlane<-1, 2> clipNegZ;
533
534	const std::vector<rr::VertexVaryingInfo>&	fragInputs			= (program.geometryShader) ? (program.geometryShader->getOutputs()) : (program.vertexShader->getOutputs());
535	const ClipVolumePlane*						planes[]			= { &clipPosX, &clipNegX, &clipPosY, &clipNegY, &clipPosZ, &clipNegZ };
536	const int									numPlanes			= (clipWithZPlanes) ? (6) : (4);
537
538	std::vector<pa::Triangle>					outputTriangles;
539
540	for (int inputTriangleNdx = 0; inputTriangleNdx < (int)list.size(); ++inputTriangleNdx)
541	{
542		bool clippedByPlane[6];
543
544		// Needs clipping?
545		{
546			bool discardPrimitive	= false;
547			bool fullyInClipVolume	= true;
548
549			for (int planeNdx = 0; planeNdx < numPlanes; ++planeNdx)
550			{
551				const ClipVolumePlane*	plane			= planes[planeNdx];
552				const bool				v0InsidePlane	= plane->pointInClipVolume(vec4ToClipVec4(list[inputTriangleNdx].v0->position));
553				const bool				v1InsidePlane	= plane->pointInClipVolume(vec4ToClipVec4(list[inputTriangleNdx].v1->position));
554				const bool				v2InsidePlane	= plane->pointInClipVolume(vec4ToClipVec4(list[inputTriangleNdx].v2->position));
555
556				// Fully outside
557				if (!v0InsidePlane && !v1InsidePlane && !v2InsidePlane)
558				{
559					discardPrimitive = true;
560					break;
561				}
562				// Partially outside
563				else if (!v0InsidePlane || !v1InsidePlane || !v2InsidePlane)
564				{
565					clippedByPlane[planeNdx] = true;
566					fullyInClipVolume = false;
567				}
568				// Fully inside
569				else
570					clippedByPlane[planeNdx] = false;
571			}
572
573			if (discardPrimitive)
574				continue;
575
576			if (fullyInClipVolume)
577			{
578				outputTriangles.push_back(list[inputTriangleNdx]);
579				continue;
580			}
581		}
582
583		// Clip
584		{
585			std::vector<SubTriangle>	subTriangles	(1);
586			SubTriangle&				initialTri		= subTriangles[0];
587
588			initialTri.vertices[0].position = vec4ToClipVec4(list[inputTriangleNdx].v0->position);
589			initialTri.vertices[0].weight[0] = (ClipFloat)1.0;
590			initialTri.vertices[0].weight[1] = (ClipFloat)0.0;
591			initialTri.vertices[0].weight[2] = (ClipFloat)0.0;
592
593			initialTri.vertices[1].position = vec4ToClipVec4(list[inputTriangleNdx].v1->position);
594			initialTri.vertices[1].weight[0] = (ClipFloat)0.0;
595			initialTri.vertices[1].weight[1] = (ClipFloat)1.0;
596			initialTri.vertices[1].weight[2] = (ClipFloat)0.0;
597
598			initialTri.vertices[2].position = vec4ToClipVec4(list[inputTriangleNdx].v2->position);
599			initialTri.vertices[2].weight[0] = (ClipFloat)0.0;
600			initialTri.vertices[2].weight[1] = (ClipFloat)0.0;
601			initialTri.vertices[2].weight[2] = (ClipFloat)1.0;
602
603			// Clip all subtriangles to all relevant planes
604			for (int planeNdx = 0; planeNdx < numPlanes; ++planeNdx)
605			{
606				std::vector<SubTriangle> nextPhaseSubTriangles;
607
608				if (!clippedByPlane[planeNdx])
609					continue;
610
611				for (int subTriangleNdx = 0; subTriangleNdx < (int)subTriangles.size(); ++subTriangleNdx)
612				{
613					std::vector<TriangleVertex> convexPrimitive;
614
615					// Clip triangle and form a convex n-gon ( n c {3, 4} )
616					clipTriangleToPlane(convexPrimitive, subTriangles[subTriangleNdx].vertices, *planes[planeNdx]);
617
618					// Subtriangle completely discarded
619					if (convexPrimitive.empty())
620						continue;
621
622					DE_ASSERT(convexPrimitive.size() == 3 || convexPrimitive.size() == 4);
623
624					//Triangulate planar convex n-gon
625					{
626						TriangleVertex& v0 = convexPrimitive[0];
627
628						for (int subsubTriangleNdx = 1; subsubTriangleNdx + 1 < (int)convexPrimitive.size(); ++subsubTriangleNdx)
629						{
630							const float				degenerateEpsilon	= 1.0e-6f;
631							const TriangleVertex&	v1					= convexPrimitive[subsubTriangleNdx];
632							const TriangleVertex&	v2					= convexPrimitive[subsubTriangleNdx + 1];
633							const float				visibleArea			= de::abs(cross2D(to2DCartesian(clipVec4ToVec4(v1.position)) - to2DCartesian(clipVec4ToVec4(v0.position)),
634																						  to2DCartesian(clipVec4ToVec4(v2.position)) - to2DCartesian(clipVec4ToVec4(v0.position))));
635
636							// has surface area (is not a degenerate)
637							if (visibleArea >= degenerateEpsilon)
638							{
639								SubTriangle subsubTriangle;
640
641								subsubTriangle.vertices[0] = v0;
642								subsubTriangle.vertices[1] = v1;
643								subsubTriangle.vertices[2] = v2;
644
645								nextPhaseSubTriangles.push_back(subsubTriangle);
646							}
647						}
648					}
649				}
650
651				subTriangles.swap(nextPhaseSubTriangles);
652			}
653
654			// Rebuild pa::Triangles from subtriangles
655			for (int subTriangleNdx = 0; subTriangleNdx < (int)subTriangles.size(); ++subTriangleNdx)
656			{
657				VertexPacket*	p0				= vpalloc.alloc();
658				VertexPacket*	p1				= vpalloc.alloc();
659				VertexPacket*	p2				= vpalloc.alloc();
660				pa::Triangle	ngonFragment	(p0, p1, p2, -1);
661
662				p0->position = clipVec4ToVec4(subTriangles[subTriangleNdx].vertices[0].position);
663				p1->position = clipVec4ToVec4(subTriangles[subTriangleNdx].vertices[1].position);
664				p2->position = clipVec4ToVec4(subTriangles[subTriangleNdx].vertices[2].position);
665
666				for (size_t outputNdx = 0; outputNdx < fragInputs.size(); ++outputNdx)
667				{
668					if (fragInputs[outputNdx].type == GENERICVECTYPE_FLOAT)
669					{
670						const tcu::Vec4 out0 = list[inputTriangleNdx].v0->outputs[outputNdx].get<float>();
671						const tcu::Vec4 out1 = list[inputTriangleNdx].v1->outputs[outputNdx].get<float>();
672						const tcu::Vec4 out2 = list[inputTriangleNdx].v2->outputs[outputNdx].get<float>();
673
674						p0->outputs[outputNdx] = (float)subTriangles[subTriangleNdx].vertices[0].weight[0] * out0
675											   + (float)subTriangles[subTriangleNdx].vertices[0].weight[1] * out1
676											   + (float)subTriangles[subTriangleNdx].vertices[0].weight[2] * out2;
677
678						p1->outputs[outputNdx] = (float)subTriangles[subTriangleNdx].vertices[1].weight[0] * out0
679											   + (float)subTriangles[subTriangleNdx].vertices[1].weight[1] * out1
680											   + (float)subTriangles[subTriangleNdx].vertices[1].weight[2] * out2;
681
682						p2->outputs[outputNdx] = (float)subTriangles[subTriangleNdx].vertices[2].weight[0] * out0
683											   + (float)subTriangles[subTriangleNdx].vertices[2].weight[1] * out1
684											   + (float)subTriangles[subTriangleNdx].vertices[2].weight[2] * out2;
685					}
686					else
687					{
688						// only floats are interpolated, all others must be flatshaded then
689						p0->outputs[outputNdx] = list[inputTriangleNdx].getProvokingVertex()->outputs[outputNdx];
690						p1->outputs[outputNdx] = list[inputTriangleNdx].getProvokingVertex()->outputs[outputNdx];
691						p2->outputs[outputNdx] = list[inputTriangleNdx].getProvokingVertex()->outputs[outputNdx];
692					}
693				}
694
695				outputTriangles.push_back(ngonFragment);
696			}
697		}
698	}
699
700	// output result
701	list.swap(outputTriangles);
702}
703
704/*--------------------------------------------------------------------*//*!
705 * Clip lines to the near and far clip planes.
706 *
707 * Clipping to other planes is a by-product of the viewport test  (i.e.
708 * rasterization area selection).
709 *//*--------------------------------------------------------------------*/
710void clipPrimitives (std::vector<pa::Line>& 		list,
711					 const Program& 				program,
712					 bool 							clipWithZPlanes,
713					 VertexPacketAllocator&			vpalloc)
714{
715	DE_UNREF(vpalloc);
716
717	using namespace cliputil;
718
719	// Lines are clipped only by the far and the near planes here. Line clipping by other planes done in the rasterization phase
720
721	const std::vector<rr::VertexVaryingInfo>&	fragInputs	= (program.geometryShader) ? (program.geometryShader->getOutputs()) : (program.vertexShader->getOutputs());
722	std::vector<pa::Line>						visibleLines;
723
724	// Z-clipping disabled, don't do anything
725	if (!clipWithZPlanes)
726		return;
727
728	for (size_t ndx = 0; ndx < list.size(); ++ndx)
729	{
730		pa::Line& l = list[ndx];
731
732		// Totally discarded?
733		if ((l.v0->position.z() < -l.v0->position.w() && l.v1->position.z() < -l.v1->position.w()) ||
734			(l.v0->position.z() >  l.v0->position.w() && l.v1->position.z() >  l.v1->position.w()))
735			continue; // discard
736
737		// Something is visible
738
739		const ClipVec4	p0	= vec4ToClipVec4(l.v0->position);
740		const ClipVec4	p1	= vec4ToClipVec4(l.v1->position);
741		const ClipFloat	t0	= getLineEndpointClipping(p0, p1);
742		const ClipFloat	t1	= getLineEndpointClipping(p1, p0);
743
744		// Not clipped at all?
745		if (t0 == (ClipFloat)0.0 && t1 == (ClipFloat)0.0)
746		{
747			visibleLines.push_back(pa::Line(l.v0, l.v1, -1));
748		}
749		else
750		{
751			// Clip position
752			l.v0->position = clipVec4ToVec4(tcu::mix(p0, p1, t0));
753			l.v1->position = clipVec4ToVec4(tcu::mix(p1, p0, t1));
754
755			// Clip attributes
756			for (size_t outputNdx = 0; outputNdx < fragInputs.size(); ++outputNdx)
757			{
758				// only floats are clipped, other types are flatshaded
759				if (fragInputs[outputNdx].type == GENERICVECTYPE_FLOAT)
760				{
761					const tcu::Vec4 a0 = l.v0->outputs[outputNdx].get<float>();
762					const tcu::Vec4 a1 = l.v1->outputs[outputNdx].get<float>();
763
764					l.v0->outputs[outputNdx] = tcu::mix(a0, a1, (float)t0);
765					l.v1->outputs[outputNdx] = tcu::mix(a1, a0, (float)t1);
766				}
767			}
768
769			visibleLines.push_back(pa::Line(l.v0, l.v1, -1));
770		}
771	}
772
773	// return visible in list
774	std::swap(visibleLines, list);
775}
776
777/*--------------------------------------------------------------------*//*!
778 * Discard points not within clip volume. Clipping is a by-product
779 * of the viewport test.
780 *//*--------------------------------------------------------------------*/
781void clipPrimitives (std::vector<pa::Point>&		list,
782					 const Program&					program,
783					 bool							clipWithZPlanes,
784					 VertexPacketAllocator&			vpalloc)
785{
786	DE_UNREF(vpalloc);
787	DE_UNREF(program);
788
789	std::vector<pa::Point> visiblePoints;
790
791	// Z-clipping disabled, don't do anything
792	if (!clipWithZPlanes)
793		return;
794
795	for (size_t ndx = 0; ndx < list.size(); ++ndx)
796	{
797		pa::Point& p = list[ndx];
798
799		// points are discarded if Z is not in range. (Wide) point clipping is done in the rasterization phase
800		if (de::inRange(p.v0->position.z(), -p.v0->position.w(), p.v0->position.w()))
801			visiblePoints.push_back(pa::Point(p.v0));
802	}
803
804	// return visible in list
805	std::swap(visiblePoints, list);
806}
807
808void transformVertexClipCoordsToWindowCoords (const RenderState& state, VertexPacket& packet)
809{
810	// To normalized device coords
811	{
812		packet.position = tcu::Vec4(packet.position.x()/packet.position.w(),
813									packet.position.y()/packet.position.w(),
814									packet.position.z()/packet.position.w(),
815									1.0f               /packet.position.w());
816	}
817
818	// To window coords
819	{
820		const WindowRectangle&	viewport	= state.viewport.rect;
821		const float				halfW		= (float)(viewport.width) / 2.0f;
822		const float				halfH		= (float)(viewport.height) / 2.0f;
823		const float				oX			= (float)viewport.left + halfW;
824		const float				oY			= (float)viewport.bottom + halfH;
825		const float				zn			= state.viewport.zn;
826		const float				zf			= state.viewport.zf;
827
828		packet.position = tcu::Vec4(packet.position.x()*halfW + oX,
829									packet.position.y()*halfH + oY,
830									packet.position.z()*(zf - zn)/2.0f + (zn + zf)/2.0f,
831									packet.position.w());
832	}
833}
834
835void transformPrimitiveClipCoordsToWindowCoords (const RenderState& state, pa::Triangle& target)
836{
837	transformVertexClipCoordsToWindowCoords(state, *target.v0);
838	transformVertexClipCoordsToWindowCoords(state, *target.v1);
839	transformVertexClipCoordsToWindowCoords(state, *target.v2);
840}
841
842void transformPrimitiveClipCoordsToWindowCoords (const RenderState& state, pa::Line& target)
843{
844	transformVertexClipCoordsToWindowCoords(state, *target.v0);
845	transformVertexClipCoordsToWindowCoords(state, *target.v1);
846}
847
848void transformPrimitiveClipCoordsToWindowCoords (const RenderState& state, pa::Point& target)
849{
850	transformVertexClipCoordsToWindowCoords(state, *target.v0);
851}
852
853template <typename ContainerType>
854void transformClipCoordsToWindowCoords (const RenderState& state, ContainerType& list)
855{
856	for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
857		transformPrimitiveClipCoordsToWindowCoords(state, *it);
858}
859
860void makeSharedVerticeDistinct (VertexPacket*& packet, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
861{
862	// distinct
863	if (vertices.find(packet) == vertices.end())
864	{
865		vertices.insert(packet);
866	}
867	else
868	{
869		VertexPacket* newPacket = vpalloc.alloc();
870
871		// copy packet output values
872		newPacket->position		= packet->position;
873		newPacket->pointSize	= packet->pointSize;
874		newPacket->primitiveID	= packet->primitiveID;
875
876		for (size_t outputNdx = 0; outputNdx < vpalloc.getNumVertexOutputs(); ++outputNdx)
877			newPacket->outputs[outputNdx] = packet->outputs[outputNdx];
878
879		// no need to insert new packet to "vertices" as newPacket is unique
880		packet = newPacket;
881	}
882}
883
884void makeSharedVerticesDistinct (pa::Triangle& target, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
885{
886	makeSharedVerticeDistinct(target.v0, vertices, vpalloc);
887	makeSharedVerticeDistinct(target.v1, vertices, vpalloc);
888	makeSharedVerticeDistinct(target.v2, vertices, vpalloc);
889}
890
891void makeSharedVerticesDistinct (pa::Line& target, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
892{
893	makeSharedVerticeDistinct(target.v0, vertices, vpalloc);
894	makeSharedVerticeDistinct(target.v1, vertices, vpalloc);
895}
896
897void makeSharedVerticesDistinct (pa::Point& target, std::set<VertexPacket*, std::less<void*> >& vertices, VertexPacketAllocator& vpalloc)
898{
899	makeSharedVerticeDistinct(target.v0, vertices, vpalloc);
900}
901
902template <typename ContainerType>
903void makeSharedVerticesDistinct (ContainerType& list, VertexPacketAllocator& vpalloc)
904{
905	std::set<VertexPacket*, std::less<void*> > vertices;
906
907	for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
908		makeSharedVerticesDistinct(*it, vertices, vpalloc);
909}
910
911void generatePrimitiveIDs (pa::Triangle& target, int id)
912{
913	target.v0->primitiveID = id;
914	target.v1->primitiveID = id;
915	target.v2->primitiveID = id;
916}
917
918void generatePrimitiveIDs (pa::Line& target, int id)
919{
920	target.v0->primitiveID = id;
921	target.v1->primitiveID = id;
922}
923
924void generatePrimitiveIDs (pa::Point& target, int id)
925{
926	target.v0->primitiveID = id;
927}
928
929template <typename ContainerType>
930void generatePrimitiveIDs (ContainerType& list, DrawContext& drawContext)
931{
932	for (typename ContainerType::iterator it = list.begin(); it != list.end(); ++it)
933		generatePrimitiveIDs(*it, drawContext.primitiveID++);
934}
935
936static float findTriangleVertexDepthSlope (const tcu::Vec4& p, const tcu::Vec4& v0, const tcu::Vec4& v1)
937{
938	// screen space
939	const tcu::Vec3 ssp		=  p.swizzle(0, 1, 2);
940	const tcu::Vec3 ssv0	= v0.swizzle(0, 1, 2);
941	const tcu::Vec3 ssv1	= v1.swizzle(0, 1, 2);
942
943	// dx & dy
944
945	const tcu::Vec3 a		= ssv0.swizzle(0,1,2) - ssp.swizzle(0,1,2);
946	const tcu::Vec3 b		= ssv1.swizzle(0,1,2) - ssp.swizzle(0,1,2);
947	const float		epsilon	= 0.0001f;
948	const float		det		= (a.x() * b.y() - b.x() * a.y());
949
950	// degenerate triangle, it won't generate any fragments anyway. Return value doesn't matter
951	if (de::abs(det) < epsilon)
952		return 0.0f;
953
954	const tcu::Vec2	dxDir	= tcu::Vec2( b.y(), -a.y()) / det;
955	const tcu::Vec2	dyDir	= tcu::Vec2(-b.x(),  a.x()) / det;
956
957	const float		dzdx	= dxDir.x() * a.z() + dxDir.y() * b.z();
958	const float		dzdy	= dyDir.x() * a.z() + dyDir.y() * b.z();
959
960	// approximate using max(|dz/dx|, |dz/dy|)
961	return de::max(de::abs(dzdx), de::abs(dzdy));
962}
963
964static float findPrimitiveMaximumDepthSlope (const pa::Triangle& triangle)
965{
966	const float d1 = findTriangleVertexDepthSlope(triangle.v0->position, triangle.v1->position, triangle.v2->position);
967	const float d2 = findTriangleVertexDepthSlope(triangle.v1->position, triangle.v2->position, triangle.v0->position);
968	const float d3 = findTriangleVertexDepthSlope(triangle.v2->position, triangle.v0->position, triangle.v1->position);
969
970	return de::max(d1, de::max(d2, d3));
971}
972
973static float getFloatingPointMinimumResolvableDifference (float maxZValue, tcu::TextureFormat::ChannelType type)
974{
975	if (type == tcu::TextureFormat::FLOAT)
976	{
977		// 32f
978		const int maxExponent = tcu::Float32(maxZValue).exponent();
979		return tcu::Float32::construct(+1, maxExponent - 23, 1 << 23).asFloat();
980	}
981
982	// unexpected format
983	DE_ASSERT(false);
984	return 0.0f;
985}
986
987static float getFixedPointMinimumResolvableDifference (int numBits)
988{
989	return tcu::Float32::construct(+1, -numBits, 1 << 23).asFloat();
990}
991
992static float findPrimitiveMinimumResolvableDifference (const pa::Triangle& triangle, const rr::MultisampleConstPixelBufferAccess& depthAccess)
993{
994	const float								maxZvalue		= de::max(de::max(triangle.v0->position.z(), triangle.v1->position.z()), triangle.v2->position.z());
995	const tcu::TextureFormat				format			= depthAccess.raw().getFormat();
996	const tcu::TextureFormat::ChannelOrder	order			= format.order;
997
998	if (order == tcu::TextureFormat::D)
999	{
1000		// depth only
1001		const tcu::TextureFormat::ChannelType	channelType		= format.type;
1002		const tcu::TextureChannelClass			channelClass	= tcu::getTextureChannelClass(channelType);
1003		const int								numBits			= tcu::getTextureFormatBitDepth(format).x();
1004
1005		if (channelClass == tcu::TEXTURECHANNELCLASS_FLOATING_POINT)
1006			return getFloatingPointMinimumResolvableDifference(maxZvalue, channelType);
1007		else
1008			// \note channelClass might be CLASS_LAST but that's ok
1009			return getFixedPointMinimumResolvableDifference(numBits);
1010	}
1011	else if (order == tcu::TextureFormat::DS)
1012	{
1013		// depth stencil, special cases for possible combined formats
1014		if (format.type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
1015			return getFloatingPointMinimumResolvableDifference(maxZvalue, tcu::TextureFormat::FLOAT);
1016		else if (format.type == tcu::TextureFormat::UNSIGNED_INT_24_8)
1017			return getFixedPointMinimumResolvableDifference(24);
1018	}
1019
1020	// unexpected format
1021	DE_ASSERT(false);
1022	return 0.0f;
1023}
1024
1025void writeFragmentPackets (const RenderState&					state,
1026						   const RenderTarget&					renderTarget,
1027						   const Program&						program,
1028						   const FragmentPacket*				fragmentPackets,
1029						   int									numRasterizedPackets,
1030						   rr::FaceType							facetype,
1031						   const std::vector<rr::GenericVec4>&	fragmentOutputArray,
1032						   const float*							depthValues,
1033						   std::vector<Fragment>&				fragmentBuffer)
1034{
1035	const int			numSamples		= renderTarget.colorBuffers[0].getNumSamples();
1036	const size_t		numOutputs		= program.fragmentShader->getOutputs().size();
1037	FragmentProcessor	fragProcessor;
1038
1039	DE_ASSERT(fragmentOutputArray.size() >= (size_t)numRasterizedPackets*4*numOutputs);
1040	DE_ASSERT(fragmentBuffer.size()      >= (size_t)numRasterizedPackets*4);
1041
1042	// Translate fragments but do not set the value yet
1043	{
1044		int	fragCount = 0;
1045		for (int packetNdx = 0; packetNdx < numRasterizedPackets; ++packetNdx)
1046		for (int fragNdx = 0; fragNdx < 4; fragNdx++)
1047		{
1048			const FragmentPacket&	packet	= fragmentPackets[packetNdx];
1049			const int				xo		= fragNdx%2;
1050			const int				yo		= fragNdx/2;
1051
1052			if (getCoverageAnyFragmentSampleLive(packet.coverage, numSamples, xo, yo))
1053			{
1054				Fragment& fragment		= fragmentBuffer[fragCount++];
1055
1056				fragment.pixelCoord		= packet.position + tcu::IVec2(xo, yo);
1057				fragment.coverage		= (deUint32)((packet.coverage & getCoverageFragmentSampleBits(numSamples, xo, yo)) >> getCoverageOffset(numSamples, xo, yo));
1058				fragment.sampleDepths	= (depthValues) ? (&depthValues[(packetNdx*4 + yo*2 + xo)*numSamples]) : (DE_NULL);
1059			}
1060		}
1061	}
1062
1063	// Set per output output values
1064	{
1065		rr::FragmentOperationState noStencilDepthWriteState(state.fragOps);
1066		noStencilDepthWriteState.depthMask						= false;
1067		noStencilDepthWriteState.stencilStates[facetype].sFail	= STENCILOP_KEEP;
1068		noStencilDepthWriteState.stencilStates[facetype].dpFail	= STENCILOP_KEEP;
1069		noStencilDepthWriteState.stencilStates[facetype].dpPass	= STENCILOP_KEEP;
1070
1071		int	fragCount = 0;
1072		for (size_t outputNdx = 0; outputNdx < numOutputs; ++outputNdx)
1073		{
1074			// Only the last output-pass has default state, other passes have stencil & depth writemask=0
1075			const rr::FragmentOperationState& fragOpsState = (outputNdx == numOutputs-1) ? (state.fragOps) : (noStencilDepthWriteState);
1076
1077			for (int packetNdx = 0; packetNdx < numRasterizedPackets; ++packetNdx)
1078			for (int fragNdx = 0; fragNdx < 4; fragNdx++)
1079			{
1080				const FragmentPacket&	packet	= fragmentPackets[packetNdx];
1081				const int				xo		= fragNdx%2;
1082				const int				yo		= fragNdx/2;
1083
1084				// Add only fragments that have live samples to shaded fragments queue.
1085				if (getCoverageAnyFragmentSampleLive(packet.coverage, numSamples, xo, yo))
1086				{
1087					Fragment& fragment		= fragmentBuffer[fragCount++];
1088					fragment.value			= fragmentOutputArray[(packetNdx*4 + fragNdx) * numOutputs + outputNdx];
1089				}
1090			}
1091
1092			// Execute per-fragment ops and write
1093			fragProcessor.render(renderTarget.colorBuffers[outputNdx], renderTarget.depthBuffer, renderTarget.stencilBuffer, &fragmentBuffer[0], fragCount, facetype, fragOpsState);
1094		}
1095	}
1096}
1097
1098void rasterizePrimitive (const RenderState&					state,
1099						 const RenderTarget&				renderTarget,
1100						 const Program&						program,
1101						 const pa::Triangle&				triangle,
1102						 const tcu::IVec4&					renderTargetRect,
1103						 RasterizationInternalBuffers&		buffers)
1104{
1105	const int			numSamples		= renderTarget.colorBuffers[0].getNumSamples();
1106	const float			depthClampMin	= de::min(state.viewport.zn, state.viewport.zf);
1107	const float			depthClampMax	= de::max(state.viewport.zn, state.viewport.zf);
1108	TriangleRasterizer	rasterizer		(renderTargetRect, numSamples, state.rasterization);
1109	float				depthOffset		= 0.0f;
1110
1111	rasterizer.init(triangle.v0->position, triangle.v1->position, triangle.v2->position);
1112
1113	// Culling
1114	const FaceType visibleFace = rasterizer.getVisibleFace();
1115	if ((state.cullMode == CULLMODE_FRONT	&& visibleFace == FACETYPE_FRONT) ||
1116		(state.cullMode == CULLMODE_BACK	&& visibleFace == FACETYPE_BACK))
1117		return;
1118
1119	// Shading context
1120	FragmentShadingContext shadingContext(triangle.v0->outputs, triangle.v1->outputs, triangle.v2->outputs, &buffers.shaderOutputs[0], buffers.fragmentDepthBuffer, triangle.v2->primitiveID, (int)program.fragmentShader->getOutputs().size(), numSamples);
1121
1122	// Polygon offset
1123	if (buffers.fragmentDepthBuffer && state.fragOps.polygonOffsetEnabled)
1124	{
1125		const float maximumDepthSlope			= findPrimitiveMaximumDepthSlope(triangle);
1126		const float minimumResolvableDifference	= findPrimitiveMinimumResolvableDifference(triangle, renderTarget.depthBuffer);
1127
1128		depthOffset = maximumDepthSlope * state.fragOps.polygonOffsetFactor + minimumResolvableDifference * state.fragOps.polygonOffsetUnits;
1129	}
1130
1131	// Execute rasterize - shade - write loop
1132	for (;;)
1133	{
1134		const int	maxFragmentPackets		= (int)buffers.fragmentPackets.size();
1135		int			numRasterizedPackets	= 0;
1136
1137		// Rasterize
1138
1139		rasterizer.rasterize(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
1140
1141		// numRasterizedPackets is guaranteed to be greater than zero for shadeFragments()
1142
1143		if (!numRasterizedPackets)
1144			break; // Rasterization finished.
1145
1146		// Polygon offset
1147		if (buffers.fragmentDepthBuffer && state.fragOps.polygonOffsetEnabled)
1148			for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
1149				buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx] + depthOffset, 0.0f, 1.0f);
1150
1151		// Shade
1152
1153		program.fragmentShader->shadeFragments(&buffers.fragmentPackets[0], numRasterizedPackets, shadingContext);
1154
1155		// Depth clamp
1156		if (buffers.fragmentDepthBuffer && state.fragOps.depthClampEnabled)
1157			for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
1158				buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx], depthClampMin, depthClampMax);
1159
1160		// Handle fragment shader outputs
1161
1162		writeFragmentPackets(state, renderTarget, program, &buffers.fragmentPackets[0], numRasterizedPackets, visibleFace, buffers.shaderOutputs, buffers.fragmentDepthBuffer, buffers.shadedFragments);
1163	}
1164}
1165
1166void rasterizePrimitive (const RenderState&					state,
1167						 const RenderTarget&				renderTarget,
1168						 const Program&						program,
1169						 const pa::Line&					line,
1170						 const tcu::IVec4&					renderTargetRect,
1171						 RasterizationInternalBuffers&		buffers)
1172{
1173	const int					numSamples			= renderTarget.colorBuffers[0].getNumSamples();
1174	const float					depthClampMin		= de::min(state.viewport.zn, state.viewport.zf);
1175	const float					depthClampMax		= de::max(state.viewport.zn, state.viewport.zf);
1176	const bool					msaa				= numSamples > 1;
1177	FragmentShadingContext		shadingContext		(line.v0->outputs, line.v1->outputs, DE_NULL, &buffers.shaderOutputs[0], buffers.fragmentDepthBuffer, line.v1->primitiveID, (int)program.fragmentShader->getOutputs().size(), numSamples);
1178	SingleSampleLineRasterizer	aliasedRasterizer	(renderTargetRect);
1179	MultiSampleLineRasterizer	msaaRasterizer		(numSamples, renderTargetRect);
1180
1181	// Initialize rasterization.
1182	if (msaa)
1183		msaaRasterizer.init(line.v0->position, line.v1->position, state.line.lineWidth);
1184	else
1185		aliasedRasterizer.init(line.v0->position, line.v1->position, state.line.lineWidth);
1186
1187	for (;;)
1188	{
1189		const int	maxFragmentPackets		= (int)buffers.fragmentPackets.size();
1190		int			numRasterizedPackets	= 0;
1191
1192		// Rasterize
1193
1194		if (msaa)
1195			msaaRasterizer.rasterize	(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
1196		else
1197			aliasedRasterizer.rasterize	(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
1198
1199		// numRasterizedPackets is guaranteed to be greater than zero for shadeFragments()
1200
1201		if (!numRasterizedPackets)
1202			break; // Rasterization finished.
1203
1204		// Shade
1205
1206		program.fragmentShader->shadeFragments(&buffers.fragmentPackets[0], numRasterizedPackets, shadingContext);
1207
1208		// Depth clamp
1209		if (buffers.fragmentDepthBuffer && state.fragOps.depthClampEnabled)
1210			for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
1211				buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx], depthClampMin, depthClampMax);
1212
1213		// Handle fragment shader outputs
1214
1215		writeFragmentPackets(state, renderTarget, program, &buffers.fragmentPackets[0], numRasterizedPackets, rr::FACETYPE_FRONT, buffers.shaderOutputs, buffers.fragmentDepthBuffer, buffers.shadedFragments);
1216	}
1217}
1218
1219void rasterizePrimitive (const RenderState&					state,
1220						 const RenderTarget&				renderTarget,
1221						 const Program&						program,
1222						 const pa::Point&					point,
1223						 const tcu::IVec4&					renderTargetRect,
1224						 RasterizationInternalBuffers&		buffers)
1225{
1226	const int			numSamples		= renderTarget.colorBuffers[0].getNumSamples();
1227	const float			depthClampMin	= de::min(state.viewport.zn, state.viewport.zf);
1228	const float			depthClampMax	= de::max(state.viewport.zn, state.viewport.zf);
1229	TriangleRasterizer	rasterizer1		(renderTargetRect, numSamples, state.rasterization);
1230	TriangleRasterizer	rasterizer2		(renderTargetRect, numSamples, state.rasterization);
1231
1232	// draw point as two triangles
1233	const float offset				= point.v0->pointSize / 2.0f;
1234	const tcu::Vec4		w0			= tcu::Vec4(point.v0->position.x() + offset, point.v0->position.y() + offset, point.v0->position.z(), point.v0->position.w());
1235	const tcu::Vec4		w1			= tcu::Vec4(point.v0->position.x() - offset, point.v0->position.y() + offset, point.v0->position.z(), point.v0->position.w());
1236	const tcu::Vec4		w2			= tcu::Vec4(point.v0->position.x() - offset, point.v0->position.y() - offset, point.v0->position.z(), point.v0->position.w());
1237	const tcu::Vec4		w3			= tcu::Vec4(point.v0->position.x() + offset, point.v0->position.y() - offset, point.v0->position.z(), point.v0->position.w());
1238
1239	rasterizer1.init(w0, w1, w2);
1240	rasterizer2.init(w0, w2, w3);
1241
1242	// Shading context
1243	FragmentShadingContext shadingContext(point.v0->outputs, DE_NULL, DE_NULL, &buffers.shaderOutputs[0], buffers.fragmentDepthBuffer, point.v0->primitiveID, (int)program.fragmentShader->getOutputs().size(), numSamples);
1244
1245	// Execute rasterize - shade - write loop
1246	for (;;)
1247	{
1248		const int	maxFragmentPackets		= (int)buffers.fragmentPackets.size();
1249		int			numRasterizedPackets	= 0;
1250
1251		// Rasterize both triangles
1252
1253		rasterizer1.rasterize(&buffers.fragmentPackets[0], buffers.fragmentDepthBuffer, maxFragmentPackets, numRasterizedPackets);
1254		if (numRasterizedPackets != maxFragmentPackets)
1255		{
1256			float* const	depthBufferAppendPointer	= (buffers.fragmentDepthBuffer) ? (buffers.fragmentDepthBuffer + numRasterizedPackets*numSamples*4) : (DE_NULL);
1257			int				numRasterizedPackets2		= 0;
1258
1259			rasterizer2.rasterize(&buffers.fragmentPackets[numRasterizedPackets], depthBufferAppendPointer, maxFragmentPackets - numRasterizedPackets, numRasterizedPackets2);
1260
1261			numRasterizedPackets += numRasterizedPackets2;
1262		}
1263
1264		// numRasterizedPackets is guaranteed to be greater than zero for shadeFragments()
1265
1266		if (!numRasterizedPackets)
1267			break; // Rasterization finished.
1268
1269		// Shade
1270
1271		program.fragmentShader->shadeFragments(&buffers.fragmentPackets[0], numRasterizedPackets, shadingContext);
1272
1273		// Depth clamp
1274		if (buffers.fragmentDepthBuffer && state.fragOps.depthClampEnabled)
1275			for (int sampleNdx = 0; sampleNdx < numRasterizedPackets * 4 * numSamples; ++sampleNdx)
1276				buffers.fragmentDepthBuffer[sampleNdx] = de::clamp(buffers.fragmentDepthBuffer[sampleNdx], depthClampMin, depthClampMax);
1277
1278		// Handle fragment shader outputs
1279
1280		writeFragmentPackets(state, renderTarget, program, &buffers.fragmentPackets[0], numRasterizedPackets, rr::FACETYPE_FRONT, buffers.shaderOutputs, buffers.fragmentDepthBuffer, buffers.shadedFragments);
1281	}
1282}
1283
1284template <typename ContainerType>
1285void rasterize (const RenderState&					state,
1286				const RenderTarget&					renderTarget,
1287				const Program&						program,
1288				const ContainerType&				list)
1289{
1290	const int						numSamples			= renderTarget.colorBuffers[0].getNumSamples();
1291	const int						numFragmentOutputs	= (int)program.fragmentShader->getOutputs().size();
1292	const size_t					maxFragmentPackets	= 128;
1293
1294	const tcu::IVec4				viewportRect		= tcu::IVec4(state.viewport.rect.left, state.viewport.rect.bottom, state.viewport.rect.width, state.viewport.rect.height);
1295	const tcu::IVec4				bufferRect			= getBufferSize(renderTarget.colorBuffers[0]);
1296	const tcu::IVec4				renderTargetRect	= rectIntersection(viewportRect, bufferRect);
1297
1298	// shared buffers for all primitives
1299	std::vector<FragmentPacket>		fragmentPackets		(maxFragmentPackets);
1300	std::vector<GenericVec4>		shaderOutputs		(maxFragmentPackets*4*numFragmentOutputs);
1301	std::vector<Fragment>			shadedFragments		(maxFragmentPackets*4);
1302	std::vector<float>				depthValues			(0);
1303	float*							depthBufferPointer	= DE_NULL;
1304
1305	RasterizationInternalBuffers	buffers;
1306
1307	// calculate depth only if we have a depth buffer
1308	if (!isEmpty(renderTarget.depthBuffer))
1309	{
1310		depthValues.resize(maxFragmentPackets*4*numSamples);
1311		depthBufferPointer = &depthValues[0];
1312	}
1313
1314	// set buffers
1315	buffers.fragmentPackets.swap(fragmentPackets);
1316	buffers.shaderOutputs.swap(shaderOutputs);
1317	buffers.shadedFragments.swap(shadedFragments);
1318	buffers.fragmentDepthBuffer = depthBufferPointer;
1319
1320	// rasterize
1321	for (typename ContainerType::const_iterator it = list.begin(); it != list.end(); ++it)
1322		rasterizePrimitive(state, renderTarget, program, *it, renderTargetRect, buffers);
1323}
1324
1325/*--------------------------------------------------------------------*//*!
1326 * Draws transformed triangles, lines or points to render target
1327 *//*--------------------------------------------------------------------*/
1328template <typename ContainerType>
1329void drawBasicPrimitives (const RenderState& state, const RenderTarget& renderTarget, const Program& program, ContainerType& primList, VertexPacketAllocator& vpalloc)
1330{
1331	const bool clipZ = !state.fragOps.depthClampEnabled;
1332
1333	// Transform feedback
1334
1335	// Flatshading
1336	flatshadeVertices(program, primList);
1337
1338	// Clipping
1339	// \todo [jarkko] is creating & swapping std::vectors really a good solution?
1340	clipPrimitives(primList, program, clipZ, vpalloc);
1341
1342	// Transform vertices to window coords
1343	transformClipCoordsToWindowCoords(state, primList);
1344
1345	// Rasterize and paint
1346	rasterize(state, renderTarget, program, primList);
1347}
1348
1349void copyVertexPacketPointers(const VertexPacket** dst, const pa::Point& in)
1350{
1351	dst[0] = in.v0;
1352}
1353
1354void copyVertexPacketPointers(const VertexPacket** dst, const pa::Line& in)
1355{
1356	dst[0] = in.v0;
1357	dst[1] = in.v1;
1358}
1359
1360void copyVertexPacketPointers(const VertexPacket** dst, const pa::Triangle& in)
1361{
1362	dst[0] = in.v0;
1363	dst[1] = in.v1;
1364	dst[2] = in.v2;
1365}
1366
1367void copyVertexPacketPointers(const VertexPacket** dst, const pa::LineAdjacency& in)
1368{
1369	dst[0] = in.v0;
1370	dst[1] = in.v1;
1371	dst[2] = in.v2;
1372	dst[3] = in.v3;
1373}
1374
1375void copyVertexPacketPointers(const VertexPacket** dst, const pa::TriangleAdjacency& in)
1376{
1377	dst[0] = in.v0;
1378	dst[1] = in.v1;
1379	dst[2] = in.v2;
1380	dst[3] = in.v3;
1381	dst[4] = in.v4;
1382	dst[5] = in.v5;
1383}
1384
1385template <PrimitiveType DrawPrimitiveType> // \note DrawPrimitiveType  can only be Points, line_strip, or triangle_strip
1386void drawGeometryShaderOutputAsPrimitives (const RenderState& state, const RenderTarget& renderTarget, const Program& program, VertexPacket* const* vertices, size_t numVertices, VertexPacketAllocator& vpalloc)
1387{
1388	// Run primitive assembly for generated stream
1389
1390	const size_t															assemblerPrimitiveCount		= PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::getPrimitiveCount(numVertices);
1391	std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::BaseType>	inputPrimitives				(assemblerPrimitiveCount);
1392
1393	PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::exec(inputPrimitives.begin(), vertices, numVertices, state.provokingVertexConvention); // \note input Primitives are baseType_t => only basic primitives (non adjacency) will compile
1394
1395	// Make shared vertices distinct
1396
1397	makeSharedVerticesDistinct(inputPrimitives, vpalloc);
1398
1399	// Draw assembled primitives
1400
1401	drawBasicPrimitives(state, renderTarget, program, inputPrimitives, vpalloc);
1402}
1403
1404template <PrimitiveType DrawPrimitiveType>
1405void drawWithGeometryShader(const RenderState& state, const RenderTarget& renderTarget, const Program& program, std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::Type>& input, DrawContext& drawContext)
1406{
1407	// Vertices outputted by geometry shader may have different number of output variables than the original, create new memory allocator
1408	VertexPacketAllocator vpalloc(program.geometryShader->getOutputs().size());
1409
1410	// Run geometry shader for all primitives
1411	GeometryEmitter					emitter			(vpalloc, program.geometryShader->getNumVerticesOut());
1412	std::vector<PrimitivePacket>	primitives		(input.size());
1413	const int						numInvocations	= (int)program.geometryShader->getNumInvocations();
1414	const int						verticesIn		= PrimitiveTypeTraits<DrawPrimitiveType>::Type::NUM_VERTICES;
1415
1416	for (size_t primitiveNdx = 0; primitiveNdx < input.size(); ++primitiveNdx)
1417	{
1418		primitives[primitiveNdx].primitiveIDIn = drawContext.primitiveID++;
1419		copyVertexPacketPointers(primitives[primitiveNdx].vertices, input[primitiveNdx]);
1420	}
1421
1422	if (primitives.empty())
1423		return;
1424
1425	for (int invocationNdx = 0; invocationNdx < numInvocations; ++invocationNdx)
1426	{
1427		// Shading invocation
1428
1429		program.geometryShader->shadePrimitives(emitter, verticesIn, &primitives[0], (int)primitives.size(), invocationNdx);
1430
1431		// Find primitives in the emitted vertices
1432
1433		std::vector<VertexPacket*> emitted;
1434		emitter.moveEmittedTo(emitted);
1435
1436		for (size_t primitiveBegin = 0; primitiveBegin < emitted.size();)
1437		{
1438			size_t primitiveEnd;
1439
1440			// Find primitive begin
1441			if (!emitted[primitiveBegin])
1442			{
1443				++primitiveBegin;
1444				continue;
1445			}
1446
1447			// Find primitive end
1448
1449			primitiveEnd = primitiveBegin + 1;
1450			for (; (primitiveEnd < emitted.size()) && emitted[primitiveEnd]; ++primitiveEnd); // find primitive end
1451
1452			// Draw range [begin, end)
1453
1454			switch (program.geometryShader->getOutputType())
1455			{
1456				case rr::GEOMETRYSHADEROUTPUTTYPE_POINTS:			drawGeometryShaderOutputAsPrimitives<PRIMITIVETYPE_POINTS>			(state, renderTarget, program, &emitted[primitiveBegin], primitiveEnd-primitiveBegin, vpalloc); break;
1457				case rr::GEOMETRYSHADEROUTPUTTYPE_LINE_STRIP:		drawGeometryShaderOutputAsPrimitives<PRIMITIVETYPE_LINE_STRIP>		(state, renderTarget, program, &emitted[primitiveBegin], primitiveEnd-primitiveBegin, vpalloc); break;
1458				case rr::GEOMETRYSHADEROUTPUTTYPE_TRIANGLE_STRIP:	drawGeometryShaderOutputAsPrimitives<PRIMITIVETYPE_TRIANGLE_STRIP>	(state, renderTarget, program, &emitted[primitiveBegin], primitiveEnd-primitiveBegin, vpalloc); break;
1459				default:
1460					DE_ASSERT(DE_FALSE);
1461			}
1462
1463			// Next primitive
1464			primitiveBegin = primitiveEnd + 1;
1465		}
1466	}
1467}
1468
1469/*--------------------------------------------------------------------*//*!
1470 * Assembles, tesselates, runs geometry shader and draws primitives of any type from vertex list.
1471 *//*--------------------------------------------------------------------*/
1472template <PrimitiveType DrawPrimitiveType>
1473void drawAsPrimitives (const RenderState& state, const RenderTarget& renderTarget, const Program& program, VertexPacket* const* vertices, int numVertices, DrawContext& drawContext, VertexPacketAllocator& vpalloc)
1474{
1475	// Assemble primitives (deconstruct stips & loops)
1476	const size_t															assemblerPrimitiveCount		= PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::getPrimitiveCount(numVertices);
1477	std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::Type>		inputPrimitives				(assemblerPrimitiveCount);
1478
1479	PrimitiveTypeTraits<DrawPrimitiveType>::Assembler::exec(inputPrimitives.begin(), vertices, (size_t)numVertices, state.provokingVertexConvention);
1480
1481	// Tesselate
1482	//if (state.tesselation)
1483	//	primList = state.tesselation.exec(primList);
1484
1485	// Geometry shader
1486	if (program.geometryShader)
1487	{
1488		// If there is an active geometry shader, it will convert any primitive type to basic types
1489		drawWithGeometryShader<DrawPrimitiveType>(state, renderTarget, program, inputPrimitives, drawContext);
1490	}
1491	else
1492	{
1493		std::vector<typename PrimitiveTypeTraits<DrawPrimitiveType>::BaseType> basePrimitives;
1494
1495		// convert types from X_adjacency to X
1496		convertPrimitiveToBaseType(basePrimitives, inputPrimitives);
1497
1498		// Make shared vertices distinct. Needed for that the translation to screen space happens only once per vertex, and for flatshading
1499		makeSharedVerticesDistinct(basePrimitives, vpalloc);
1500
1501		// A primitive ID will be generated even if no geometry shader is active
1502		generatePrimitiveIDs(basePrimitives, drawContext);
1503
1504		// Draw as a basic type
1505		drawBasicPrimitives(state, renderTarget, program, basePrimitives, vpalloc);
1506	}
1507}
1508
1509bool isValidCommand (const DrawCommand& command, int numInstances)
1510{
1511	// numInstances should be valid
1512	if (numInstances < 1)
1513		return false;
1514
1515	// Shaders should have the same varyings
1516	if (command.program.geometryShader)
1517	{
1518		if (command.program.vertexShader->getOutputs() != command.program.geometryShader->getInputs())
1519			return false;
1520
1521		if (command.program.geometryShader->getOutputs() != command.program.fragmentShader->getInputs())
1522			return false;
1523	}
1524	else
1525	{
1526		if (command.program.vertexShader->getOutputs() != command.program.fragmentShader->getInputs())
1527			return false;
1528	}
1529
1530	// Shader input/output types are set
1531	for (size_t varyingNdx = 0; varyingNdx < command.program.vertexShader->getInputs().size(); ++varyingNdx)
1532		if (command.program.vertexShader->getInputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
1533			command.program.vertexShader->getInputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
1534			command.program.vertexShader->getInputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
1535			return false;
1536	for (size_t varyingNdx = 0; varyingNdx < command.program.vertexShader->getOutputs().size(); ++varyingNdx)
1537		if (command.program.vertexShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
1538			command.program.vertexShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
1539			command.program.vertexShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
1540			return false;
1541
1542	for (size_t varyingNdx = 0; varyingNdx < command.program.fragmentShader->getInputs().size(); ++varyingNdx)
1543		if (command.program.fragmentShader->getInputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
1544			command.program.fragmentShader->getInputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
1545			command.program.fragmentShader->getInputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
1546			return false;
1547	for (size_t varyingNdx = 0; varyingNdx < command.program.fragmentShader->getOutputs().size(); ++varyingNdx)
1548		if (command.program.fragmentShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
1549			command.program.fragmentShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
1550			command.program.fragmentShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
1551			return false;
1552
1553	if (command.program.geometryShader)
1554	{
1555		for (size_t varyingNdx = 0; varyingNdx < command.program.geometryShader->getInputs().size(); ++varyingNdx)
1556			if (command.program.geometryShader->getInputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
1557				command.program.geometryShader->getInputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
1558				command.program.geometryShader->getInputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
1559				return false;
1560		for (size_t varyingNdx = 0; varyingNdx < command.program.geometryShader->getOutputs().size(); ++varyingNdx)
1561			if (command.program.geometryShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_FLOAT &&
1562				command.program.geometryShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_INT32 &&
1563				command.program.geometryShader->getOutputs()[varyingNdx].type != GENERICVECTYPE_UINT32)
1564				return false;
1565	}
1566
1567	// Enough vertex inputs?
1568	if ((size_t)command.numVertexAttribs < command.program.vertexShader->getInputs().size())
1569		return false;
1570
1571	// There is a fragment output sink for each output?
1572	if ((size_t)command.renderTarget.numColorBuffers < command.program.fragmentShader->getOutputs().size())
1573		return false;
1574
1575	// All destination buffers should have same number of samples and same size
1576	for (int outputNdx = 0; outputNdx < command.renderTarget.numColorBuffers; ++outputNdx)
1577	{
1578		if (getBufferSize(command.renderTarget.colorBuffers[0]) != getBufferSize(command.renderTarget.colorBuffers[outputNdx]))
1579			return false;
1580
1581		if (command.renderTarget.colorBuffers[0].getNumSamples() != command.renderTarget.colorBuffers[outputNdx].getNumSamples())
1582			return false;
1583	}
1584
1585	// All destination buffers should have same basic type as matching fragment output
1586	for (size_t varyingNdx = 0; varyingNdx < command.program.fragmentShader->getOutputs().size(); ++varyingNdx)
1587	{
1588		const tcu::TextureChannelClass	colorbufferClass = tcu::getTextureChannelClass(command.renderTarget.colorBuffers[varyingNdx].raw().getFormat().type);
1589		const GenericVecType			colorType		 = (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT));
1590
1591		if (command.program.fragmentShader->getOutputs()[varyingNdx].type != colorType)
1592			return false;
1593	}
1594
1595	// Integer values are flatshaded
1596	for (size_t outputNdx = 0; outputNdx < command.program.vertexShader->getOutputs().size(); ++outputNdx)
1597	{
1598		if (!command.program.vertexShader->getOutputs()[outputNdx].flatshade &&
1599			(command.program.vertexShader->getOutputs()[outputNdx].type == GENERICVECTYPE_INT32 ||
1600			 command.program.vertexShader->getOutputs()[outputNdx].type == GENERICVECTYPE_UINT32))
1601			return false;
1602	}
1603	if (command.program.geometryShader)
1604		for (size_t outputNdx = 0; outputNdx < command.program.geometryShader->getOutputs().size(); ++outputNdx)
1605		{
1606			if (!command.program.geometryShader->getOutputs()[outputNdx].flatshade &&
1607				(command.program.geometryShader->getOutputs()[outputNdx].type == GENERICVECTYPE_INT32 ||
1608				 command.program.geometryShader->getOutputs()[outputNdx].type == GENERICVECTYPE_UINT32))
1609				return false;
1610		}
1611
1612	// Draw primitive is valid for geometry shader
1613	if (command.program.geometryShader)
1614	{
1615		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_POINTS && command.primitives.getPrimitiveType() != PRIMITIVETYPE_POINTS)
1616			return false;
1617
1618		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_LINES &&
1619			(command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINES &&
1620			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINE_STRIP &&
1621			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINE_LOOP))
1622			return false;
1623
1624		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_TRIANGLES &&
1625			(command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLES &&
1626			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLE_STRIP &&
1627			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLE_FAN))
1628			return false;
1629
1630		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_LINES_ADJACENCY &&
1631			(command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINES_ADJACENCY &&
1632			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_LINE_STRIP_ADJACENCY))
1633			return false;
1634
1635		if (command.program.geometryShader->getInputType() == rr::GEOMETRYSHADERINPUTTYPE_TRIANGLES_ADJACENCY &&
1636			(command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLES_ADJACENCY &&
1637			 command.primitives.getPrimitiveType() != PRIMITIVETYPE_TRIANGLE_STRIP_ADJACENCY))
1638			return false;
1639	}
1640
1641	return true;
1642}
1643
1644} // anonymous
1645
1646DrawIndices::DrawIndices (const deUint32* ptr, int baseVertex_)
1647	: indices	(ptr)
1648	, indexType	(INDEXTYPE_UINT32)
1649	, baseVertex(baseVertex_)
1650{
1651}
1652
1653DrawIndices::DrawIndices (const deUint16* ptr, int baseVertex_)
1654	: indices	(ptr)
1655	, indexType	(INDEXTYPE_UINT16)
1656	, baseVertex(baseVertex_)
1657{
1658}
1659
1660DrawIndices::DrawIndices (const deUint8* ptr, int baseVertex_)
1661	: indices	(ptr)
1662	, indexType	(INDEXTYPE_UINT8)
1663	, baseVertex(baseVertex_)
1664{
1665}
1666
1667DrawIndices::DrawIndices (const void* ptr, IndexType type, int baseVertex_)
1668	: indices	(ptr)
1669	, indexType	(type)
1670	, baseVertex(baseVertex_)
1671{
1672}
1673
1674PrimitiveList::PrimitiveList (PrimitiveType primitiveType, int numElements, const int firstElement)
1675	: m_primitiveType	(primitiveType)
1676	, m_numElements		(numElements)
1677	, m_indices			(DE_NULL)
1678	, m_indexType		(INDEXTYPE_LAST)
1679	, m_baseVertex		(firstElement)
1680{
1681	DE_ASSERT(numElements >= 0 && "Invalid numElements");
1682	DE_ASSERT(firstElement >= 0 && "Invalid firstElement");
1683}
1684
1685PrimitiveList::PrimitiveList (PrimitiveType primitiveType, int numElements, const DrawIndices& indices)
1686	: m_primitiveType	(primitiveType)
1687	, m_numElements		((size_t)numElements)
1688	, m_indices			(indices.indices)
1689	, m_indexType		(indices.indexType)
1690	, m_baseVertex		(indices.baseVertex)
1691{
1692	DE_ASSERT(numElements >= 0 && "Invalid numElements");
1693}
1694
1695size_t PrimitiveList::getIndex (size_t elementNdx) const
1696{
1697	// indices == DE_NULL interpreted as command.indices = [first (=baseVertex) + 0, first + 1, first + 2...]
1698	if (m_indices)
1699	{
1700		int index = m_baseVertex + (int)readIndexArray(m_indexType, m_indices, elementNdx);
1701		DE_ASSERT(index >= 0); // do not access indices < 0
1702
1703		return (size_t)index;
1704	}
1705	else
1706		return (size_t)(m_baseVertex) + elementNdx;
1707}
1708
1709bool PrimitiveList::isRestartIndex (size_t elementNdx, deUint32 restartIndex) const
1710{
1711	// implicit index or explicit index (without base vertex) equals restart
1712	if (m_indices)
1713		return readIndexArray(m_indexType, m_indices, elementNdx) == restartIndex;
1714	else
1715		return elementNdx == (size_t)restartIndex;
1716}
1717
1718Renderer::Renderer (void)
1719{
1720}
1721
1722Renderer::~Renderer (void)
1723{
1724}
1725
1726void Renderer::draw (const DrawCommand& command) const
1727{
1728	drawInstanced(command, 1);
1729}
1730
1731void Renderer::drawInstanced (const DrawCommand& command, int numInstances) const
1732{
1733	// Do not run bad commands
1734	{
1735		const bool validCommand = isValidCommand(command, numInstances);
1736		if (!validCommand)
1737		{
1738			DE_ASSERT(false);
1739			return;
1740		}
1741	}
1742
1743	// Do not draw if nothing to draw
1744	{
1745		if (command.primitives.getNumElements() == 0 || numInstances == 0)
1746			return;
1747	}
1748
1749	// Prepare transformation
1750
1751	const size_t				numVaryings = command.program.vertexShader->getOutputs().size();
1752	VertexPacketAllocator		vpalloc(numVaryings);
1753	std::vector<VertexPacket*>	vertexPackets = vpalloc.allocArray(command.primitives.getNumElements());
1754	DrawContext					drawContext;
1755
1756	for (int instanceID = 0; instanceID < numInstances; ++instanceID)
1757	{
1758		// Each instance has its own primitives
1759		drawContext.primitiveID = 0;
1760
1761		for (size_t elementNdx = 0; elementNdx < command.primitives.getNumElements(); ++elementNdx)
1762		{
1763			int numVertexPackets = 0;
1764
1765			// collect primitive vertices until restart
1766
1767			while (elementNdx < command.primitives.getNumElements() &&
1768					!(command.state.restart.enabled && command.primitives.isRestartIndex(elementNdx, command.state.restart.restartIndex)))
1769			{
1770				// input
1771				vertexPackets[numVertexPackets]->instanceNdx	= instanceID;
1772				vertexPackets[numVertexPackets]->vertexNdx		= (int)command.primitives.getIndex(elementNdx);
1773
1774				// output
1775				vertexPackets[numVertexPackets]->pointSize		= command.state.point.pointSize;	// default value from the current state
1776				vertexPackets[numVertexPackets]->position		= tcu::Vec4(0, 0, 0, 0);			// no undefined values
1777
1778				++numVertexPackets;
1779				++elementNdx;
1780			}
1781
1782			// Duplicated restart shade
1783			if (numVertexPackets == 0)
1784				continue;
1785
1786			// \todo Vertex cache?
1787
1788			// Transform vertices
1789
1790			command.program.vertexShader->shadeVertices(command.vertexAttribs, &vertexPackets[0], numVertexPackets);
1791
1792			// Draw primitives
1793
1794			switch (command.primitives.getPrimitiveType())
1795			{
1796				case PRIMITIVETYPE_TRIANGLES:				{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLES>					(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
1797				case PRIMITIVETYPE_TRIANGLE_STRIP:			{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLE_STRIP>			(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
1798				case PRIMITIVETYPE_TRIANGLE_FAN:			{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLE_FAN>				(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
1799				case PRIMITIVETYPE_LINES:					{ drawAsPrimitives<PRIMITIVETYPE_LINES>						(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
1800				case PRIMITIVETYPE_LINE_STRIP:				{ drawAsPrimitives<PRIMITIVETYPE_LINE_STRIP>				(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
1801				case PRIMITIVETYPE_LINE_LOOP:				{ drawAsPrimitives<PRIMITIVETYPE_LINE_LOOP>					(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
1802				case PRIMITIVETYPE_POINTS:					{ drawAsPrimitives<PRIMITIVETYPE_POINTS>					(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
1803				case PRIMITIVETYPE_LINES_ADJACENCY:			{ drawAsPrimitives<PRIMITIVETYPE_LINES_ADJACENCY>			(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
1804				case PRIMITIVETYPE_LINE_STRIP_ADJACENCY:	{ drawAsPrimitives<PRIMITIVETYPE_LINE_STRIP_ADJACENCY>		(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
1805				case PRIMITIVETYPE_TRIANGLES_ADJACENCY:		{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLES_ADJACENCY>		(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
1806				case PRIMITIVETYPE_TRIANGLE_STRIP_ADJACENCY:{ drawAsPrimitives<PRIMITIVETYPE_TRIANGLE_STRIP_ADJACENCY>	(command.state, command.renderTarget, command.program, &vertexPackets[0], numVertexPackets, drawContext, vpalloc);	break; }
1807				default:
1808					DE_ASSERT(DE_FALSE);
1809			}
1810		}
1811	}
1812}
1813
1814} // rr
1815