es3pBufferDataUploadTests.cpp revision 8852c82a1ffa4760985c17cc6875d5d521daf343
1/*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.0 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Buffer data upload performance tests.
22 *//*--------------------------------------------------------------------*/
23
24#include "es3pBufferDataUploadTests.hpp"
25#include "glsCalibration.hpp"
26#include "tcuTestLog.hpp"
27#include "tcuVectorUtil.hpp"
28#include "tcuSurface.hpp"
29#include "tcuCPUWarmup.hpp"
30#include "tcuRenderTarget.hpp"
31#include "gluRenderContext.hpp"
32#include "gluShaderProgram.hpp"
33#include "gluStrUtil.hpp"
34#include "gluPixelTransfer.hpp"
35#include "gluObjectWrapper.hpp"
36#include "glwFunctions.hpp"
37#include "glwEnums.hpp"
38#include "deClock.h"
39#include "deMath.h"
40#include "deStringUtil.hpp"
41#include "deRandom.hpp"
42#include "deMemory.h"
43#include "deThread.h"
44
45#include <algorithm>
46#include <iomanip>
47#include <limits>
48
49namespace deqp
50{
51namespace gles3
52{
53namespace Performance
54{
55namespace
56{
57
58using gls::theilSenSiegelLinearRegression;
59using gls::LineParametersWithConfidence;
60
61static const char* const s_dummyVertexShader =		"#version 300 es\n"
62													"in highp vec4 a_position;\n"
63													"void main (void)\n"
64													"{\n"
65													"	gl_Position = a_position;\n"
66													"}\n";
67
68static const char* const s_dummyFragnentShader =	"#version 300 es\n"
69													"layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
70													"void main (void)\n"
71													"{\n"
72													"	dEQP_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
73													"}\n";
74
75static const char* const s_colorVertexShader =		"#version 300 es\n"
76													"in highp vec4 a_position;\n"
77													"in highp vec4 a_color;\n"
78													"out highp vec4 v_color;\n"
79													"void main (void)\n"
80													"{\n"
81													"	gl_Position = a_position;\n"
82													"	v_color = a_color;\n"
83													"}\n";
84
85static const char* const s_colorFragmentShader =	"#version 300 es\n"
86													"layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
87													"in mediump vec4 v_color;\n"
88													"void main (void)\n"
89													"{\n"
90													"	dEQP_FragColor = v_color;\n"
91													"}\n";
92
93template <typename TrueType, int cond>
94struct EnableIf
95{
96	typedef TrueType Type;
97};
98
99template <typename TrueType>
100struct EnableIf<TrueType, 0>
101{
102};
103
104template <typename TrueType, int cond>
105struct EnableIfNot
106{
107};
108
109template <typename TrueType>
110struct EnableIfNot<TrueType, 0>
111{
112	typedef TrueType Type;
113};
114
115struct SingleOperationDuration
116{
117	deUint64 totalDuration;
118	deUint64 fitResponseDuration; // used for fitting
119};
120
121struct MapBufferRangeDuration
122{
123	deUint64 mapDuration;
124	deUint64 unmapDuration;
125	deUint64 writeDuration;
126	deUint64 allocDuration;
127	deUint64 totalDuration;
128
129	deUint64 fitResponseDuration;
130};
131
132struct MapBufferRangeDurationNoAlloc
133{
134	deUint64 mapDuration;
135	deUint64 unmapDuration;
136	deUint64 writeDuration;
137	deUint64 totalDuration;
138
139	deUint64 fitResponseDuration;
140};
141
142struct MapBufferRangeFlushDuration
143{
144	deUint64 mapDuration;
145	deUint64 unmapDuration;
146	deUint64 writeDuration;
147	deUint64 flushDuration;
148	deUint64 allocDuration;
149	deUint64 totalDuration;
150
151	deUint64 fitResponseDuration;
152};
153
154struct MapBufferRangeFlushDurationNoAlloc
155{
156	deUint64 mapDuration;
157	deUint64 unmapDuration;
158	deUint64 writeDuration;
159	deUint64 flushDuration;
160	deUint64 totalDuration;
161
162	deUint64 fitResponseDuration;
163};
164
165struct RenderReadDuration
166{
167	deUint64 renderDuration;
168	deUint64 readDuration;
169	deUint64 renderReadDuration;
170	deUint64 totalDuration;
171
172	deUint64 fitResponseDuration;
173};
174
175struct UnrelatedUploadRenderReadDuration
176{
177	deUint64 renderDuration;
178	deUint64 readDuration;
179	deUint64 renderReadDuration;
180	deUint64 totalDuration;
181
182	deUint64 fitResponseDuration;
183};
184
185struct UploadRenderReadDuration
186{
187	deUint64 uploadDuration;
188	deUint64 renderDuration;
189	deUint64 readDuration;
190	deUint64 totalDuration;
191	deUint64 renderReadDuration;
192
193	deUint64 fitResponseDuration;
194};
195
196struct UploadRenderReadDurationWithUnrelatedUploadSize
197{
198	deUint64 uploadDuration;
199	deUint64 renderDuration;
200	deUint64 readDuration;
201	deUint64 totalDuration;
202	deUint64 renderReadDuration;
203
204	deUint64 fitResponseDuration;
205};
206
207struct RenderUploadRenderReadDuration
208{
209	deUint64 firstRenderDuration;
210	deUint64 uploadDuration;
211	deUint64 secondRenderDuration;
212	deUint64 readDuration;
213	deUint64 totalDuration;
214	deUint64 renderReadDuration;
215
216	deUint64 fitResponseDuration;
217};
218
219template <typename SampleT>
220struct UploadSampleResult
221{
222	typedef SampleT SampleType;
223
224	int			bufferSize;
225	int			allocatedSize;
226	int			writtenSize;
227	SampleType	duration;
228};
229
230template <typename SampleT>
231struct RenderSampleResult
232{
233	typedef SampleT SampleType;
234
235	int			uploadedDataSize;
236	int			renderDataSize;
237	int			unrelatedDataSize;
238	int			numVertices;
239	SampleT		duration;
240};
241
242struct SingleOperationStatistics
243{
244	float minTime;
245	float maxTime;
246	float medianTime;
247	float min2DecileTime;		// !< minimum value in the 2nd decile
248	float max9DecileTime;		// !< maximum value in the 9th decile
249};
250
251struct SingleCallStatistics
252{
253	SingleOperationStatistics	result;
254
255	float						medianRate;
256	float						maxDiffTime;
257	float						maxDiff9DecileTime;
258	float						medianDiffTime;
259
260	float						maxRelDiffTime;
261	float						max9DecileRelDiffTime;
262	float						medianRelDiffTime;
263};
264
265struct MapCallStatistics
266{
267	SingleOperationStatistics	map;
268	SingleOperationStatistics	unmap;
269	SingleOperationStatistics	write;
270	SingleOperationStatistics	alloc;
271	SingleOperationStatistics	result;
272
273	float						medianRate;
274	float						maxDiffTime;
275	float						maxDiff9DecileTime;
276	float						medianDiffTime;
277
278	float						maxRelDiffTime;
279	float						max9DecileRelDiffTime;
280	float						medianRelDiffTime;
281};
282
283struct MapFlushCallStatistics
284{
285	SingleOperationStatistics	map;
286	SingleOperationStatistics	unmap;
287	SingleOperationStatistics	write;
288	SingleOperationStatistics	flush;
289	SingleOperationStatistics	alloc;
290	SingleOperationStatistics	result;
291
292	float						medianRate;
293	float						maxDiffTime;
294	float						maxDiff9DecileTime;
295	float						medianDiffTime;
296
297	float						maxRelDiffTime;
298	float						max9DecileRelDiffTime;
299	float						medianRelDiffTime;
300};
301
302struct RenderReadStatistics
303{
304	SingleOperationStatistics	render;
305	SingleOperationStatistics	read;
306	SingleOperationStatistics	result;
307	SingleOperationStatistics	total;
308
309	float						medianRate;
310	float						maxDiffTime;
311	float						maxDiff9DecileTime;
312	float						medianDiffTime;
313
314	float						maxRelDiffTime;
315	float						max9DecileRelDiffTime;
316	float						medianRelDiffTime;
317};
318
319struct UploadRenderReadStatistics
320{
321	SingleOperationStatistics	upload;
322	SingleOperationStatistics	render;
323	SingleOperationStatistics	read;
324	SingleOperationStatistics	result;
325	SingleOperationStatistics	total;
326
327	float						medianRate;
328	float						maxDiffTime;
329	float						maxDiff9DecileTime;
330	float						medianDiffTime;
331
332	float						maxRelDiffTime;
333	float						max9DecileRelDiffTime;
334	float						medianRelDiffTime;
335};
336
337struct RenderUploadRenderReadStatistics
338{
339	SingleOperationStatistics	firstRender;
340	SingleOperationStatistics	upload;
341	SingleOperationStatistics	secondRender;
342	SingleOperationStatistics	read;
343	SingleOperationStatistics	result;
344	SingleOperationStatistics	total;
345
346	float						medianRate;
347	float						maxDiffTime;
348	float						maxDiff9DecileTime;
349	float						medianDiffTime;
350
351	float						maxRelDiffTime;
352	float						max9DecileRelDiffTime;
353	float						medianRelDiffTime;
354};
355
356template <typename T>
357struct SampleTypeTraits
358{
359};
360
361template <>
362struct SampleTypeTraits<SingleOperationDuration>
363{
364	typedef SingleCallStatistics StatsType;
365
366	enum { HAS_MAP_STATS		= 0	};
367	enum { HAS_UNMAP_STATS		= 0	};
368	enum { HAS_WRITE_STATS		= 0	};
369	enum { HAS_FLUSH_STATS		= 0	};
370	enum { HAS_ALLOC_STATS		= 0	};
371	enum { LOG_CONTRIBUTIONS	= 0	};
372};
373
374template <>
375struct SampleTypeTraits<MapBufferRangeDuration>
376{
377	typedef MapCallStatistics StatsType;
378
379	enum { HAS_MAP_STATS		= 1	};
380	enum { HAS_UNMAP_STATS		= 1	};
381	enum { HAS_WRITE_STATS		= 1	};
382	enum { HAS_FLUSH_STATS		= 0	};
383	enum { HAS_ALLOC_STATS		= 1	};
384	enum { LOG_CONTRIBUTIONS	= 1	};
385};
386
387template <>
388struct SampleTypeTraits<MapBufferRangeDurationNoAlloc>
389{
390	typedef MapCallStatistics StatsType;
391
392	enum { HAS_MAP_STATS		= 1	};
393	enum { HAS_UNMAP_STATS		= 1	};
394	enum { HAS_WRITE_STATS		= 1	};
395	enum { HAS_FLUSH_STATS		= 0	};
396	enum { HAS_ALLOC_STATS		= 0	};
397	enum { LOG_CONTRIBUTIONS	= 1	};
398};
399
400template <>
401struct SampleTypeTraits<MapBufferRangeFlushDuration>
402{
403	typedef MapFlushCallStatistics StatsType;
404
405	enum { HAS_MAP_STATS		= 1	};
406	enum { HAS_UNMAP_STATS		= 1	};
407	enum { HAS_WRITE_STATS		= 1	};
408	enum { HAS_FLUSH_STATS		= 1	};
409	enum { HAS_ALLOC_STATS		= 1	};
410	enum { LOG_CONTRIBUTIONS	= 1	};
411};
412
413template <>
414struct SampleTypeTraits<MapBufferRangeFlushDurationNoAlloc>
415{
416	typedef MapFlushCallStatistics StatsType;
417
418	enum { HAS_MAP_STATS		= 1	};
419	enum { HAS_UNMAP_STATS		= 1	};
420	enum { HAS_WRITE_STATS		= 1	};
421	enum { HAS_FLUSH_STATS		= 1	};
422	enum { HAS_ALLOC_STATS		= 0	};
423	enum { LOG_CONTRIBUTIONS	= 1	};
424};
425
426template <>
427struct SampleTypeTraits<RenderReadDuration>
428{
429	typedef RenderReadStatistics StatsType;
430
431	enum { HAS_RENDER_STATS			= 1	};
432	enum { HAS_READ_STATS			= 1	};
433	enum { HAS_UPLOAD_STATS			= 0	};
434	enum { HAS_TOTAL_STATS			= 1	};
435	enum { HAS_FIRST_RENDER_STATS	= 0	};
436	enum { HAS_SECOND_RENDER_STATS	= 0	};
437
438	enum { LOG_CONTRIBUTIONS	= 1	};
439};
440
441template <>
442struct SampleTypeTraits<UnrelatedUploadRenderReadDuration>
443{
444	typedef RenderReadStatistics StatsType;
445
446	enum { HAS_RENDER_STATS			= 1	};
447	enum { HAS_READ_STATS			= 1	};
448	enum { HAS_UPLOAD_STATS			= 0	};
449	enum { HAS_TOTAL_STATS			= 1	};
450	enum { HAS_FIRST_RENDER_STATS	= 0	};
451	enum { HAS_SECOND_RENDER_STATS	= 0	};
452
453	enum { LOG_CONTRIBUTIONS	= 1	};
454};
455
456template <>
457struct SampleTypeTraits<UploadRenderReadDuration>
458{
459	typedef UploadRenderReadStatistics StatsType;
460
461	enum { HAS_RENDER_STATS			= 1	};
462	enum { HAS_READ_STATS			= 1	};
463	enum { HAS_UPLOAD_STATS			= 1	};
464	enum { HAS_TOTAL_STATS			= 1	};
465	enum { HAS_FIRST_RENDER_STATS	= 0	};
466	enum { HAS_SECOND_RENDER_STATS	= 0	};
467
468	enum { LOG_CONTRIBUTIONS			= 1	};
469	enum { LOG_UNRELATED_UPLOAD_SIZE	= 0 };
470};
471
472template <>
473struct SampleTypeTraits<UploadRenderReadDurationWithUnrelatedUploadSize>
474{
475	typedef UploadRenderReadStatistics StatsType;
476
477	enum { HAS_RENDER_STATS			= 1	};
478	enum { HAS_READ_STATS			= 1	};
479	enum { HAS_UPLOAD_STATS			= 1	};
480	enum { HAS_TOTAL_STATS			= 1	};
481	enum { HAS_FIRST_RENDER_STATS	= 0	};
482	enum { HAS_SECOND_RENDER_STATS	= 0	};
483
484	enum { LOG_CONTRIBUTIONS			= 1	};
485	enum { LOG_UNRELATED_UPLOAD_SIZE	= 1 };
486};
487
488template <>
489struct SampleTypeTraits<RenderUploadRenderReadDuration>
490{
491	typedef RenderUploadRenderReadStatistics StatsType;
492
493	enum { HAS_RENDER_STATS			= 0	};
494	enum { HAS_READ_STATS			= 1	};
495	enum { HAS_UPLOAD_STATS			= 1	};
496	enum { HAS_TOTAL_STATS			= 1	};
497	enum { HAS_FIRST_RENDER_STATS	= 1	};
498	enum { HAS_SECOND_RENDER_STATS	= 1	};
499
500	enum { LOG_CONTRIBUTIONS			= 1	};
501	enum { LOG_UNRELATED_UPLOAD_SIZE	= 1 };
502};
503
504struct UploadSampleAnalyzeResult
505{
506	float transferRateMedian;
507	float transferRateAtRange;
508	float transferRateAtInfinity;
509};
510
511struct RenderSampleAnalyzeResult
512{
513	float renderRateMedian;
514	float renderRateAtRange;
515	float renderRateAtInfinity;
516};
517
518class UnmapFailureError : public std::exception
519{
520public:
521	UnmapFailureError (void) : std::exception() {}
522};
523
524static std::string getHumanReadableByteSize (int numBytes)
525{
526	std::ostringstream buf;
527
528	if (numBytes < 1024)
529		buf << numBytes << " byte(s)";
530	else if (numBytes < 1024 * 1024)
531		buf << de::floatToString(numBytes/1024.0f, 1) << " KiB";
532	else
533		buf << de::floatToString(numBytes/1024.0f/1024.0f, 1) << " MiB";
534
535	return buf.str();
536}
537
538static deUint64 medianTimeMemcpy (void* dst, const void* src, int numBytes)
539{
540	// Time used by memcpy is assumed to be asymptotically linear
541
542	// With large numBytes, the probability of context switch or other random
543	// event is high. Apply memcpy in parts and report how much time would
544	// memcpy have used with the median transfer rate.
545
546	// Less than 1MiB, no need to do anything special
547	if (numBytes < 1048576)
548	{
549		deUint64 startTime;
550		deUint64 endTime;
551
552		deYield();
553
554		startTime = deGetMicroseconds();
555		deMemcpy(dst, src, numBytes);
556		endTime = deGetMicroseconds();
557
558		return endTime - startTime;
559	}
560	else
561	{
562		// Do memcpy in multiple parts
563
564		const int	numSections		= 5;
565		const int	sectionAlign	= 16;
566
567		int			sectionStarts[numSections+1];
568		int			sectionLens[numSections];
569		deUint64	sectionTimes[numSections];
570		deUint64	medianTime;
571		deUint64	bestTime		= 0;
572
573		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
574			sectionStarts[sectionNdx] = deAlign32((numBytes * sectionNdx / numSections), sectionAlign);
575		sectionStarts[numSections] = numBytes;
576
577		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
578			sectionLens[sectionNdx] = sectionStarts[sectionNdx+1] - sectionStarts[sectionNdx];
579
580		// Memcpy is usually called after mapbuffer range which may take
581		// a lot of time. To prevent power management from kicking in during
582		// copy, warm up more.
583		{
584			deYield();
585			tcu::warmupCPU();
586			deYield();
587		}
588
589		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
590		{
591			deUint64 startTime;
592			deUint64 endTime;
593
594			startTime = deGetMicroseconds();
595			deMemcpy((deUint8*)dst + sectionStarts[sectionNdx], (const deUint8*)src + sectionStarts[sectionNdx], sectionLens[sectionNdx]);
596			endTime = deGetMicroseconds();
597
598			sectionTimes[sectionNdx] = endTime - startTime;
599
600			if (!bestTime || sectionTimes[sectionNdx] < bestTime)
601				bestTime = sectionTimes[sectionNdx];
602
603			// Detect if write takes 50% longer than it should, and warm up if that happened
604			if (sectionNdx != numSections-1 && (float)sectionTimes[sectionNdx] > 1.5f * bestTime)
605			{
606				deYield();
607				tcu::warmupCPU();
608				deYield();
609			}
610		}
611
612		std::sort(sectionTimes, sectionTimes + numSections);
613
614		if ((numSections % 2) == 0)
615			medianTime = (sectionTimes[numSections / 2 - 1] + sectionTimes[numSections / 2]) / 2;
616		else
617			medianTime = sectionTimes[numSections / 2];
618
619		return medianTime*numSections;
620	}
621}
622
623static float dummyCalculation (float initial, int workSize)
624{
625	float	a = initial;
626	int		b = 123;
627
628	for (int ndx = 0; ndx < workSize; ++ndx)
629	{
630		a = deFloatCos(a + (float)b);
631		b = (b + 63) % 107 + de::abs((int)(a*10.0f));
632	}
633
634	return a + (float)b;
635}
636
637static void busyWait (int microseconds)
638{
639	const deUint64	maxSingleWaitTime	= 1000; // 1ms
640	const deUint64	endTime				= deGetMicroseconds() + microseconds;
641	float			dummy				= *tcu::warmupCPUInternal::g_dummy.m_v;
642	int				workSize			= 500;
643
644	// exponentially increase work, cap to 1ms
645	while (deGetMicroseconds() < endTime)
646	{
647		const deUint64	startTime		= deGetMicroseconds();
648		deUint64		totalTime;
649
650		dummy = dummyCalculation(dummy, workSize);
651
652		totalTime = deGetMicroseconds() - startTime;
653
654		if (totalTime >= maxSingleWaitTime)
655			break;
656		else
657			workSize *= 2;
658	}
659
660	// "wait"
661	while (deGetMicroseconds() < endTime)
662		dummy = dummyCalculation(dummy, workSize);
663
664	*tcu::warmupCPUInternal::g_dummy.m_v = dummy;
665}
666
667// Sample from given values using linear interpolation at a given position as if values were laid to range [0, 1]
668template <typename T>
669static float linearSample (const std::vector<T>& values, float position)
670{
671	DE_ASSERT(position >= 0.0f);
672	DE_ASSERT(position <= 1.0f);
673
674	const float	floatNdx			= ((int)values.size() - 1) * position;
675	const int	lowerNdx			= (int)deFloatFloor(floatNdx);
676	const int	higherNdx			= lowerNdx + 1;
677	const float	interpolationFactor = floatNdx - (float)lowerNdx;
678
679	DE_ASSERT(lowerNdx >= 0 && lowerNdx < (int)values.size());
680	DE_ASSERT(higherNdx >= 0 && higherNdx < (int)values.size());
681	DE_ASSERT(interpolationFactor >= 0 && interpolationFactor < 1.0f);
682
683	return tcu::mix((float)values[lowerNdx], (float)values[higherNdx], interpolationFactor);
684}
685
686template <typename T>
687SingleOperationStatistics calculateSingleOperationStatistics (const std::vector<T>& samples, deUint64 T::SampleType::*target)
688{
689	SingleOperationStatistics	stats;
690	std::vector<deUint64>		values(samples.size());
691
692	for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
693		values[ndx] = samples[ndx].duration.*target;
694
695	std::sort(values.begin(), values.end());
696
697	stats.minTime			= (float)values.front();
698	stats.maxTime			= (float)values.back();
699	stats.medianTime		= linearSample(values, 0.5f);
700	stats.min2DecileTime	= linearSample(values, 0.1f);
701	stats.max9DecileTime	= linearSample(values, 0.9f);
702
703	return stats;
704}
705
706template <typename StatisticsType, typename SampleType>
707void calculateBasicStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples, int SampleType::*predictor)
708{
709	std::vector<deUint64> values(samples.size());
710
711	for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
712		values[ndx] = samples[ndx].duration.fitResponseDuration;
713
714	// median rate
715	{
716		std::vector<float> processingRates(samples.size());
717
718		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
719		{
720			const float timeInSeconds = values[ndx] / 1000.0f / 1000.0f;
721			processingRates[ndx] = samples[ndx].*predictor / timeInSeconds;
722		}
723
724		std::sort(processingRates.begin(), processingRates.end());
725
726		stats.medianRate = linearSample(processingRates, 0.5f);
727	}
728
729	// results compared to the approximation
730	{
731		std::vector<float> timeDiffs(samples.size());
732
733		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
734		{
735			const float prediction	= samples[ndx].*predictor * fit.coefficient + fit.offset;
736			const float actual		= (float)values[ndx];
737			timeDiffs[ndx] = actual - prediction;
738		}
739		std::sort(timeDiffs.begin(), timeDiffs.end());
740
741		stats.maxDiffTime			= timeDiffs.back();
742		stats.maxDiff9DecileTime	= linearSample(timeDiffs, 0.9f);
743		stats.medianDiffTime		= linearSample(timeDiffs, 0.5f);
744	}
745
746	// relative comparison to the approximation
747	{
748		std::vector<float> relativeDiffs(samples.size());
749
750		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
751		{
752			const float prediction	= samples[ndx].*predictor * fit.coefficient + fit.offset;
753			const float actual		= (float)values[ndx];
754
755			// Ignore cases where we predict negative times, or if
756			// ratio would be (nearly) infinite: ignore if predicted
757			// time is less than 1 microsecond
758			if (prediction < 1.0f)
759				relativeDiffs[ndx] = 0.0f;
760			else
761				relativeDiffs[ndx] = (actual - prediction) / prediction;
762		}
763		std::sort(relativeDiffs.begin(), relativeDiffs.end());
764
765		stats.maxRelDiffTime		= relativeDiffs.back();
766		stats.max9DecileRelDiffTime	= linearSample(relativeDiffs, 0.9f);
767		stats.medianRelDiffTime		= linearSample(relativeDiffs, 0.5f);
768	}
769
770	// values calculated using sorted timings
771
772	std::sort(values.begin(), values.end());
773
774	stats.result.minTime = (float)values.front();
775	stats.result.maxTime = (float)values.back();
776	stats.result.medianTime = linearSample(values, 0.5f);
777	stats.result.min2DecileTime = linearSample(values, 0.1f);
778	stats.result.max9DecileTime = linearSample(values, 0.9f);
779}
780
781template <typename StatisticsType, typename SampleType>
782void calculateBasicTransferStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples)
783{
784	calculateBasicStatistics(stats, fit, samples, &SampleType::writtenSize);
785}
786
787template <typename StatisticsType, typename SampleType>
788void calculateBasicRenderStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples)
789{
790	calculateBasicStatistics(stats, fit, samples, &SampleType::renderDataSize);
791}
792
793static SingleCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples)
794{
795	SingleCallStatistics stats;
796
797	calculateBasicTransferStatistics(stats, fit, samples);
798
799	return stats;
800}
801
802static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples)
803{
804	MapCallStatistics stats;
805
806	calculateBasicTransferStatistics(stats, fit, samples);
807
808	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::mapDuration);
809	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::unmapDuration);
810	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::writeDuration);
811	stats.alloc	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::allocDuration);
812
813	return stats;
814}
815
816static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples)
817{
818	MapFlushCallStatistics stats;
819
820	calculateBasicTransferStatistics(stats, fit, samples);
821
822	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::mapDuration);
823	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::unmapDuration);
824	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::writeDuration);
825	stats.flush	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::flushDuration);
826	stats.alloc	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::allocDuration);
827
828	return stats;
829}
830
831static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples)
832{
833	MapCallStatistics stats;
834
835	calculateBasicTransferStatistics(stats, fit, samples);
836
837	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::mapDuration);
838	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::unmapDuration);
839	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::writeDuration);
840
841	return stats;
842}
843
844static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples)
845{
846	MapFlushCallStatistics stats;
847
848	calculateBasicTransferStatistics(stats, fit, samples);
849
850	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::mapDuration);
851	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::unmapDuration);
852	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::writeDuration);
853	stats.flush	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::flushDuration);
854
855	return stats;
856}
857
858static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderReadDuration> >& samples)
859{
860	RenderReadStatistics stats;
861
862	calculateBasicRenderStatistics(stats, fit, samples);
863
864	stats.render	= calculateSingleOperationStatistics(samples, &RenderReadDuration::renderDuration);
865	stats.read		= calculateSingleOperationStatistics(samples, &RenderReadDuration::readDuration);
866	stats.total		= calculateSingleOperationStatistics(samples, &RenderReadDuration::totalDuration);
867
868	return stats;
869}
870
871static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples)
872{
873	RenderReadStatistics stats;
874
875	calculateBasicRenderStatistics(stats, fit, samples);
876
877	stats.render	= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::renderDuration);
878	stats.read		= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::readDuration);
879	stats.total		= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::totalDuration);
880
881	return stats;
882}
883
884static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples)
885{
886	UploadRenderReadStatistics stats;
887
888	calculateBasicRenderStatistics(stats, fit, samples);
889
890	stats.upload	= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::uploadDuration);
891	stats.render	= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::renderDuration);
892	stats.read		= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::readDuration);
893	stats.total		= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::totalDuration);
894
895	return stats;
896}
897
898static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples)
899{
900	UploadRenderReadStatistics stats;
901
902	calculateBasicRenderStatistics(stats, fit, samples);
903
904	stats.upload	= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::uploadDuration);
905	stats.render	= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::renderDuration);
906	stats.read		= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::readDuration);
907	stats.total		= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::totalDuration);
908
909	return stats;
910}
911
912static RenderUploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples)
913{
914	RenderUploadRenderReadStatistics stats;
915
916	calculateBasicRenderStatistics(stats, fit, samples);
917
918	stats.firstRender	= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::firstRenderDuration);
919	stats.upload		= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::uploadDuration);
920	stats.secondRender	= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::secondRenderDuration);
921	stats.read			= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::readDuration);
922	stats.total			= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::totalDuration);
923
924	return stats;
925}
926
927template <typename DurationType>
928static LineParametersWithConfidence fitLineToSamples (const std::vector<UploadSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration)
929{
930	std::vector<tcu::Vec2> samplePoints;
931
932	for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
933	{
934		tcu::Vec2 point;
935
936		point.x() = (float)(samples[sampleNdx].writtenSize);
937		point.y() = (float)(samples[sampleNdx].duration.*target);
938
939		samplePoints.push_back(point);
940	}
941
942	return theilSenSiegelLinearRegression(samplePoints, 0.6f);
943}
944
945template <typename DurationType>
946static LineParametersWithConfidence fitLineToSamples (const std::vector<RenderSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration)
947{
948	std::vector<tcu::Vec2> samplePoints;
949
950	for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
951	{
952		tcu::Vec2 point;
953
954		point.x() = (float)(samples[sampleNdx].renderDataSize);
955		point.y() = (float)(samples[sampleNdx].duration.*target);
956
957		samplePoints.push_back(point);
958	}
959
960	return theilSenSiegelLinearRegression(samplePoints, 0.6f);
961}
962
963template <typename T>
964static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, int beginNdx, int endNdx, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration)
965{
966	return fitLineToSamples(samples, beginNdx, endNdx, 1, target);
967}
968
969template <typename T>
970static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration)
971{
972	return fitLineToSamples(samples, 0, (int)samples.size(), target);
973}
974
975static float getAreaBetweenLines (float xmin, float xmax, float lineAOffset, float lineACoefficient, float lineBOffset, float lineBCoefficient)
976{
977	const float lineAMin		= lineAOffset + lineACoefficient * xmin;
978	const float lineAMax		= lineAOffset + lineACoefficient * xmax;
979	const float lineBMin		= lineBOffset + lineBCoefficient * xmin;
980	const float lineBMax		= lineBOffset + lineBCoefficient * xmax;
981	const bool	aOverBAtBegin	= (lineAMin > lineBMin);
982	const bool	aOverBAtEnd		= (lineAMax > lineBMax);
983
984	if (aOverBAtBegin == aOverBAtEnd)
985	{
986		// lines do not intersect
987
988		const float midpoint	= (xmin + xmax) / 2.0f;
989		const float width		= (xmax - xmin);
990
991		const float lineAHeight	= lineAOffset + lineACoefficient * midpoint;
992		const float lineBHeight	= lineBOffset + lineBCoefficient * midpoint;
993
994		return width * de::abs(lineAHeight - lineBHeight);
995	}
996	else
997	{
998
999		// lines intersect
1000
1001		const float approachCoeffient	= de::abs(lineACoefficient - lineBCoefficient);
1002		const float epsilon				= 0.0001f;
1003		const float leftHeight			= de::abs(lineAMin - lineBMin);
1004		const float rightHeight			= de::abs(lineAMax - lineBMax);
1005
1006		if (approachCoeffient < epsilon)
1007			return 0.0f;
1008
1009		return (0.5f * leftHeight * (leftHeight / approachCoeffient)) + (0.5f * rightHeight * (rightHeight / approachCoeffient));
1010	}
1011}
1012
1013template <typename T>
1014static float calculateSampleFitLinearity (const std::vector<T>& samples, int T::*predictor)
1015{
1016	// Compare the fitted line of first half of the samples to the fitted line of
1017	// the second half of the samples. Calculate a AABB that fully contains every
1018	// sample's x component and both fit lines in this range. Calculate the ratio
1019	// of the area between the lines and the AABB.
1020
1021	const float				epsilon				= 1.e-6f;
1022	const int				midPoint			= (int)samples.size() / 2;
1023	const LineParametersWithConfidence	startApproximation	= fitLineToSamples(samples, 0, midPoint, &T::SampleType::fitResponseDuration);
1024	const LineParametersWithConfidence	endApproximation	= fitLineToSamples(samples, midPoint, (int)samples.size(), &T::SampleType::fitResponseDuration);
1025
1026	const float				aabbMinX			= (float)(samples.front().*predictor);
1027	const float				aabbMinY			= de::min(startApproximation.offset + startApproximation.coefficient*aabbMinX, endApproximation.offset + endApproximation.coefficient*aabbMinX);
1028	const float				aabbMaxX			= (float)(samples.back().*predictor);
1029	const float				aabbMaxY			= de::max(startApproximation.offset + startApproximation.coefficient*aabbMaxX, endApproximation.offset + endApproximation.coefficient*aabbMaxX);
1030
1031	const float				aabbArea			= (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1032	const float				areaBetweenLines	= getAreaBetweenLines(aabbMinX, aabbMaxX, startApproximation.offset, startApproximation.coefficient, endApproximation.offset, endApproximation.coefficient);
1033	const float				errorAreaRatio		= (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1034
1035	return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1036}
1037
1038template <typename DurationType>
1039static float calculateSampleFitLinearity (const std::vector<UploadSampleResult<DurationType> >& samples)
1040{
1041	return calculateSampleFitLinearity(samples, &UploadSampleResult<DurationType>::writtenSize);
1042}
1043
1044template <typename DurationType>
1045static float calculateSampleFitLinearity (const std::vector<RenderSampleResult<DurationType> >& samples)
1046{
1047	return calculateSampleFitLinearity(samples, &RenderSampleResult<DurationType>::renderDataSize);
1048}
1049
1050template <typename T>
1051static float calculateSampleTemporalStability (const std::vector<T>& samples, int T::*predictor)
1052{
1053	// Samples are sampled in the following order: 1) even samples (in random order) 2) odd samples (in random order)
1054	// Compare the fitted line of even samples to the fitted line of the odd samples. Calculate a AABB that fully
1055	// contains every sample's x component and both fit lines in this range. Calculate the ratio of the area between
1056	// the lines and the AABB.
1057
1058	const float				epsilon				= 1.e-6f;
1059	const LineParametersWithConfidence	evenApproximation	= fitLineToSamples(samples, 0, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1060	const LineParametersWithConfidence	oddApproximation	= fitLineToSamples(samples, 1, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1061
1062	const float				aabbMinX			= (float)(samples.front().*predictor);
1063	const float				aabbMinY			= de::min(evenApproximation.offset + evenApproximation.coefficient*aabbMinX, oddApproximation.offset + oddApproximation.coefficient*aabbMinX);
1064	const float				aabbMaxX			= (float)(samples.back().*predictor);
1065	const float				aabbMaxY			= de::max(evenApproximation.offset + evenApproximation.coefficient*aabbMaxX, oddApproximation.offset + oddApproximation.coefficient*aabbMaxX);
1066
1067	const float				aabbArea			= (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1068	const float				areaBetweenLines	= getAreaBetweenLines(aabbMinX, aabbMaxX, evenApproximation.offset, evenApproximation.coefficient, oddApproximation.offset, oddApproximation.coefficient);
1069	const float				errorAreaRatio		= (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1070
1071	return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1072}
1073
1074template <typename DurationType>
1075static float calculateSampleTemporalStability (const std::vector<UploadSampleResult<DurationType> >& samples)
1076{
1077	return calculateSampleTemporalStability(samples, &UploadSampleResult<DurationType>::writtenSize);
1078}
1079
1080template <typename DurationType>
1081static float calculateSampleTemporalStability (const std::vector<RenderSampleResult<DurationType> >& samples)
1082{
1083	return calculateSampleTemporalStability(samples, &RenderSampleResult<DurationType>::renderDataSize);
1084}
1085
1086template <typename DurationType>
1087static void bucketizeSamplesUniformly (const std::vector<UploadSampleResult<DurationType> >& samples, std::vector<UploadSampleResult<DurationType> >* buckets, int numBuckets, int& minBufferSize, int& maxBufferSize)
1088{
1089	minBufferSize = 0;
1090	maxBufferSize = 0;
1091
1092	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1093	{
1094		DE_ASSERT(samples[sampleNdx].allocatedSize != 0);
1095
1096		if (!minBufferSize || samples[sampleNdx].allocatedSize < minBufferSize)
1097			minBufferSize = samples[sampleNdx].allocatedSize;
1098		if (!maxBufferSize || samples[sampleNdx].allocatedSize > maxBufferSize)
1099			maxBufferSize = samples[sampleNdx].allocatedSize;
1100	}
1101
1102	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1103	{
1104		const float bucketNdxFloat	= (samples[sampleNdx].allocatedSize - minBufferSize) / (float)(maxBufferSize - minBufferSize) * numBuckets;
1105		const int bucketNdx			= de::clamp((int)deFloatFloor(bucketNdxFloat), 0, numBuckets-1);
1106
1107		buckets[bucketNdx].push_back(samples[sampleNdx]);
1108	}
1109}
1110
1111template <typename SampleType>
1112static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1113{
1114	log	<< tcu::TestLog::Float("MapRangeMin", "MapRange: Min time", "us", QP_KEY_TAG_TIME, stats.map.minTime)
1115		<< tcu::TestLog::Float("MapRangeMax", "MapRange: Max time", "us", QP_KEY_TAG_TIME, stats.map.maxTime)
1116		<< tcu::TestLog::Float("MapRangeMin90", "MapRange: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.map.min2DecileTime)
1117		<< tcu::TestLog::Float("MapRangeMax90", "MapRange: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.map.max9DecileTime)
1118		<< tcu::TestLog::Float("MapRangeMedian", "MapRange: Median time", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1119}
1120
1121template <typename SampleType>
1122static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1123{
1124	log	<< tcu::TestLog::Float("UnmapMin", "Unmap: Min time", "us", QP_KEY_TAG_TIME, stats.unmap.minTime)
1125		<< tcu::TestLog::Float("UnmapMax", "Unmap: Max time", "us", QP_KEY_TAG_TIME, stats.unmap.maxTime)
1126		<< tcu::TestLog::Float("UnmapMin90", "Unmap: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.unmap.min2DecileTime)
1127		<< tcu::TestLog::Float("UnmapMax90", "Unmap: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.unmap.max9DecileTime)
1128		<< tcu::TestLog::Float("UnmapMedian", "Unmap: Median time", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1129}
1130
1131template <typename SampleType>
1132static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1133{
1134	log	<< tcu::TestLog::Float("WriteMin", "Write: Min time", "us", QP_KEY_TAG_TIME, stats.write.minTime)
1135		<< tcu::TestLog::Float("WriteMax", "Write: Max time", "us", QP_KEY_TAG_TIME, stats.write.maxTime)
1136		<< tcu::TestLog::Float("WriteMin90", "Write: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.write.min2DecileTime)
1137		<< tcu::TestLog::Float("WriteMax90", "Write: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.write.max9DecileTime)
1138		<< tcu::TestLog::Float("WriteMedian", "Write: Median time", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1139}
1140
1141template <typename SampleType>
1142static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1143{
1144	log	<< tcu::TestLog::Float("FlushMin", "Flush: Min time", "us", QP_KEY_TAG_TIME, stats.flush.minTime)
1145		<< tcu::TestLog::Float("FlushMax", "Flush: Max time", "us", QP_KEY_TAG_TIME, stats.flush.maxTime)
1146		<< tcu::TestLog::Float("FlushMin90", "Flush: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.flush.min2DecileTime)
1147		<< tcu::TestLog::Float("FlushMax90", "Flush: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.flush.max9DecileTime)
1148		<< tcu::TestLog::Float("FlushMedian", "Flush: Median time", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1149}
1150
1151template <typename SampleType>
1152static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1153{
1154	log	<< tcu::TestLog::Float("AllocMin", "Alloc: Min time", "us", QP_KEY_TAG_TIME, stats.alloc.minTime)
1155		<< tcu::TestLog::Float("AllocMax", "Alloc: Max time", "us", QP_KEY_TAG_TIME, stats.alloc.maxTime)
1156		<< tcu::TestLog::Float("AllocMin90", "Alloc: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.alloc.min2DecileTime)
1157		<< tcu::TestLog::Float("AllocMax90", "Alloc: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.alloc.max9DecileTime)
1158		<< tcu::TestLog::Float("AllocMedian", "Alloc: Median time", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1159}
1160
1161template <typename SampleType>
1162static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1163{
1164	DE_UNREF(log);
1165	DE_UNREF(stats);
1166}
1167
1168template <typename SampleType>
1169static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1170{
1171	DE_UNREF(log);
1172	DE_UNREF(stats);
1173}
1174
1175template <typename SampleType>
1176static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1177{
1178	DE_UNREF(log);
1179	DE_UNREF(stats);
1180}
1181
1182template <typename SampleType>
1183static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1184{
1185	DE_UNREF(log);
1186	DE_UNREF(stats);
1187}
1188
1189template <typename SampleType>
1190static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1191{
1192	DE_UNREF(log);
1193	DE_UNREF(stats);
1194}
1195
1196template <typename SampleType>
1197static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1198{
1199	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::mapDuration);
1200	log	<< tcu::TestLog::Float("MapConstantCost", "Map: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1201		<< tcu::TestLog::Float("MapLinearCost", "Map: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1202		<< tcu::TestLog::Float("MapMedianCost", "Map: Median cost", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1203}
1204
1205template <typename SampleType>
1206static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1207{
1208	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::unmapDuration);
1209	log	<< tcu::TestLog::Float("UnmapConstantCost", "Unmap: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1210		<< tcu::TestLog::Float("UnmapLinearCost", "Unmap: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1211		<< tcu::TestLog::Float("UnmapMedianCost", "Unmap: Median cost", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1212}
1213
1214template <typename SampleType>
1215static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1216{
1217	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::writeDuration);
1218	log	<< tcu::TestLog::Float("WriteConstantCost", "Write: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1219		<< tcu::TestLog::Float("WriteLinearCost", "Write: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1220		<< tcu::TestLog::Float("WriteMedianCost", "Write: Median cost", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1221}
1222
1223template <typename SampleType>
1224static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1225{
1226	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::flushDuration);
1227	log	<< tcu::TestLog::Float("FlushConstantCost", "Flush: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1228		<< tcu::TestLog::Float("FlushLinearCost", "Flush: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1229		<< tcu::TestLog::Float("FlushMedianCost", "Flush: Median cost", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1230}
1231
1232template <typename SampleType>
1233static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1234{
1235	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::allocDuration);
1236	log	<< tcu::TestLog::Float("AllocConstantCost", "Alloc: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1237		<< tcu::TestLog::Float("AllocLinearCost", "Alloc: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1238		<< tcu::TestLog::Float("AllocMedianCost", "Alloc: Median cost", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1239}
1240
1241template <typename SampleType>
1242static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1243{
1244	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::renderDuration);
1245	log	<< tcu::TestLog::Float("DrawCallConstantCost", "DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1246		<< tcu::TestLog::Float("DrawCallLinearCost", "DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1247		<< tcu::TestLog::Float("DrawCallMedianCost", "DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.render.medianTime);
1248}
1249
1250template <typename SampleType>
1251static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_READ_STATS>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1252{
1253	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::readDuration);
1254	log	<< tcu::TestLog::Float("ReadConstantCost", "Read: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1255		<< tcu::TestLog::Float("ReadLinearCost", "Read: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1256		<< tcu::TestLog::Float("ReadMedianCost", "Read: Median cost", "us", QP_KEY_TAG_TIME, stats.read.medianTime);
1257}
1258
1259template <typename SampleType>
1260static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1261{
1262	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::uploadDuration);
1263	log	<< tcu::TestLog::Float("UploadConstantCost", "Upload: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1264		<< tcu::TestLog::Float("UploadLinearCost", "Upload: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1265		<< tcu::TestLog::Float("UploadMedianCost", "Upload: Median cost", "us", QP_KEY_TAG_TIME, stats.upload.medianTime);
1266}
1267
1268template <typename SampleType>
1269static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1270{
1271	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::totalDuration);
1272	log	<< tcu::TestLog::Float("TotalConstantCost", "Total: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1273		<< tcu::TestLog::Float("TotalLinearCost", "Total: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1274		<< tcu::TestLog::Float("TotalMedianCost", "Total: Median cost", "us", QP_KEY_TAG_TIME, stats.total.medianTime);
1275}
1276
1277template <typename SampleType>
1278static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1279{
1280	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::firstRenderDuration);
1281	log	<< tcu::TestLog::Float("FirstDrawCallConstantCost", "First DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1282		<< tcu::TestLog::Float("FirstDrawCallLinearCost", "First DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1283		<< tcu::TestLog::Float("FirstDrawCallMedianCost", "First DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.firstRender.medianTime);
1284}
1285
1286template <typename SampleType>
1287static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1288{
1289	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::secondRenderDuration);
1290	log	<< tcu::TestLog::Float("SecondDrawCallConstantCost", "Second DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1291		<< tcu::TestLog::Float("SecondDrawCallLinearCost", "Second DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1292		<< tcu::TestLog::Float("SecondDrawCallMedianCost", "Second DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.secondRender.medianTime);
1293}
1294
1295template <typename SampleType>
1296static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1297{
1298	DE_UNREF(log);
1299	DE_UNREF(samples);
1300	DE_UNREF(stats);
1301}
1302
1303template <typename SampleType>
1304static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1305{
1306	DE_UNREF(log);
1307	DE_UNREF(samples);
1308	DE_UNREF(stats);
1309}
1310
1311template <typename SampleType>
1312static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1313{
1314	DE_UNREF(log);
1315	DE_UNREF(samples);
1316	DE_UNREF(stats);
1317}
1318
1319template <typename SampleType>
1320static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1321{
1322	DE_UNREF(log);
1323	DE_UNREF(samples);
1324	DE_UNREF(stats);
1325}
1326
1327template <typename SampleType>
1328static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1329{
1330	DE_UNREF(log);
1331	DE_UNREF(samples);
1332	DE_UNREF(stats);
1333}
1334
1335template <typename SampleType>
1336static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1337{
1338	DE_UNREF(log);
1339	DE_UNREF(samples);
1340	DE_UNREF(stats);
1341}
1342
1343template <typename SampleType>
1344static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_READ_STATS>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1345{
1346	DE_UNREF(log);
1347	DE_UNREF(samples);
1348	DE_UNREF(stats);
1349}
1350
1351template <typename SampleType>
1352static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1353{
1354	DE_UNREF(log);
1355	DE_UNREF(samples);
1356	DE_UNREF(stats);
1357}
1358
1359template <typename SampleType>
1360static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1361{
1362	DE_UNREF(log);
1363	DE_UNREF(samples);
1364	DE_UNREF(stats);
1365}
1366
1367template <typename SampleType>
1368static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1369{
1370	DE_UNREF(log);
1371	DE_UNREF(samples);
1372	DE_UNREF(stats);
1373}
1374
1375template <typename SampleType>
1376static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1377{
1378	DE_UNREF(log);
1379	DE_UNREF(samples);
1380	DE_UNREF(stats);
1381}
1382
1383void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples)
1384{
1385	log << tcu::TestLog::SampleList("Samples", "Samples")
1386		<< tcu::TestLog::SampleInfo
1387		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1388		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1389		<< tcu::TestLog::ValueInfo("UploadTime",		"Upload time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1390		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1391		<< tcu::TestLog::EndSampleInfo;
1392
1393	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1394	{
1395		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1396		log	<< tcu::TestLog::Sample
1397			<< samples[sampleNdx].writtenSize
1398			<< samples[sampleNdx].bufferSize
1399			<< (int)samples[sampleNdx].duration.totalDuration
1400			<< fitResidual
1401			<< tcu::TestLog::EndSample;
1402	}
1403
1404	log << tcu::TestLog::EndSampleList;
1405}
1406
1407void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples)
1408{
1409	log << tcu::TestLog::SampleList("Samples", "Samples")
1410		<< tcu::TestLog::SampleInfo
1411		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1412		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1413		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1414		<< tcu::TestLog::ValueInfo("AllocTime",			"Alloc time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1415		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1416		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1417		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1418		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1419		<< tcu::TestLog::EndSampleInfo;
1420
1421	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1422	{
1423		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1424		log	<< tcu::TestLog::Sample
1425			<< samples[sampleNdx].writtenSize
1426			<< samples[sampleNdx].bufferSize
1427			<< (int)samples[sampleNdx].duration.totalDuration
1428			<< (int)samples[sampleNdx].duration.allocDuration
1429			<< (int)samples[sampleNdx].duration.mapDuration
1430			<< (int)samples[sampleNdx].duration.unmapDuration
1431			<< (int)samples[sampleNdx].duration.writeDuration
1432			<< fitResidual
1433			<< tcu::TestLog::EndSample;
1434	}
1435
1436	log << tcu::TestLog::EndSampleList;
1437}
1438
1439void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples)
1440{
1441	log << tcu::TestLog::SampleList("Samples", "Samples")
1442		<< tcu::TestLog::SampleInfo
1443		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1444		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1445		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1446		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1447		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1448		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1449		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1450		<< tcu::TestLog::EndSampleInfo;
1451
1452	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1453	{
1454		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1455		log	<< tcu::TestLog::Sample
1456			<< samples[sampleNdx].writtenSize
1457			<< samples[sampleNdx].bufferSize
1458			<< (int)samples[sampleNdx].duration.totalDuration
1459			<< (int)samples[sampleNdx].duration.mapDuration
1460			<< (int)samples[sampleNdx].duration.unmapDuration
1461			<< (int)samples[sampleNdx].duration.writeDuration
1462			<< fitResidual
1463			<< tcu::TestLog::EndSample;
1464	}
1465
1466	log << tcu::TestLog::EndSampleList;
1467}
1468
1469void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples)
1470{
1471	log << tcu::TestLog::SampleList("Samples", "Samples")
1472		<< tcu::TestLog::SampleInfo
1473		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1474		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1475		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1476		<< tcu::TestLog::ValueInfo("AllocTime",			"Alloc time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1477		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1478		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1479		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1480		<< tcu::TestLog::ValueInfo("FlushTime",			"Flush time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1481		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1482		<< tcu::TestLog::EndSampleInfo;
1483
1484	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1485	{
1486		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1487		log	<< tcu::TestLog::Sample
1488			<< samples[sampleNdx].writtenSize
1489			<< samples[sampleNdx].bufferSize
1490			<< (int)samples[sampleNdx].duration.totalDuration
1491			<< (int)samples[sampleNdx].duration.allocDuration
1492			<< (int)samples[sampleNdx].duration.mapDuration
1493			<< (int)samples[sampleNdx].duration.unmapDuration
1494			<< (int)samples[sampleNdx].duration.writeDuration
1495			<< (int)samples[sampleNdx].duration.flushDuration
1496			<< fitResidual
1497			<< tcu::TestLog::EndSample;
1498	}
1499
1500	log << tcu::TestLog::EndSampleList;
1501}
1502
1503void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples)
1504{
1505	log << tcu::TestLog::SampleList("Samples", "Samples")
1506		<< tcu::TestLog::SampleInfo
1507		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1508		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1509		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1510		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1511		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1512		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1513		<< tcu::TestLog::ValueInfo("FlushTime",			"Flush time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1514		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1515		<< tcu::TestLog::EndSampleInfo;
1516
1517	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1518	{
1519		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1520		log	<< tcu::TestLog::Sample
1521			<< samples[sampleNdx].writtenSize
1522			<< samples[sampleNdx].bufferSize
1523			<< (int)samples[sampleNdx].duration.totalDuration
1524			<< (int)samples[sampleNdx].duration.mapDuration
1525			<< (int)samples[sampleNdx].duration.unmapDuration
1526			<< (int)samples[sampleNdx].duration.writeDuration
1527			<< (int)samples[sampleNdx].duration.flushDuration
1528			<< fitResidual
1529			<< tcu::TestLog::EndSample;
1530	}
1531
1532	log << tcu::TestLog::EndSampleList;
1533}
1534
1535void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderReadDuration> >& samples)
1536{
1537	log << tcu::TestLog::SampleList("Samples", "Samples")
1538		<< tcu::TestLog::SampleInfo
1539		<< tcu::TestLog::ValueInfo("DataSize",			"Data processed",		"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1540		<< tcu::TestLog::ValueInfo("VertexCount",		"Number of vertices",	"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1541		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1542		<< tcu::TestLog::ValueInfo("DrawCallTime",		"Draw call time",		"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1543		<< tcu::TestLog::ValueInfo("ReadTime",			"ReadPixels time",		"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1544		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1545		<< tcu::TestLog::EndSampleInfo;
1546
1547	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1548	{
1549		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1550		log	<< tcu::TestLog::Sample
1551			<< samples[sampleNdx].renderDataSize
1552			<< samples[sampleNdx].numVertices
1553			<< (int)samples[sampleNdx].duration.renderReadDuration
1554			<< (int)samples[sampleNdx].duration.renderDuration
1555			<< (int)samples[sampleNdx].duration.readDuration
1556			<< fitResidual
1557			<< tcu::TestLog::EndSample;
1558	}
1559
1560	log << tcu::TestLog::EndSampleList;
1561}
1562
1563void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples)
1564{
1565	log << tcu::TestLog::SampleList("Samples", "Samples")
1566		<< tcu::TestLog::SampleInfo
1567		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1568		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",		"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1569		<< tcu::TestLog::ValueInfo("UnrelatedUploadSize",	"Unrelated upload size",	"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1570		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1571		<< tcu::TestLog::ValueInfo("DrawCallTime",			"Draw call time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1572		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1573		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1574		<< tcu::TestLog::EndSampleInfo;
1575
1576	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1577	{
1578		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1579		log	<< tcu::TestLog::Sample
1580			<< samples[sampleNdx].renderDataSize
1581			<< samples[sampleNdx].numVertices
1582			<< samples[sampleNdx].unrelatedDataSize
1583			<< (int)samples[sampleNdx].duration.renderReadDuration
1584			<< (int)samples[sampleNdx].duration.renderDuration
1585			<< (int)samples[sampleNdx].duration.readDuration
1586			<< fitResidual
1587			<< tcu::TestLog::EndSample;
1588	}
1589
1590	log << tcu::TestLog::EndSampleList;
1591}
1592
1593void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples)
1594{
1595	log << tcu::TestLog::SampleList("Samples", "Samples")
1596		<< tcu::TestLog::SampleInfo
1597		<< tcu::TestLog::ValueInfo("DataSize",			"Data processed",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1598		<< tcu::TestLog::ValueInfo("UploadSize",		"Data uploaded",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1599		<< tcu::TestLog::ValueInfo("VertexCount",		"Number of vertices",				"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1600		<< tcu::TestLog::ValueInfo("DrawReadTime",		"Draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1601		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1602		<< tcu::TestLog::ValueInfo("Upload time",		"Upload time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1603		<< tcu::TestLog::ValueInfo("DrawCallTime",		"Draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1604		<< tcu::TestLog::ValueInfo("ReadTime",			"ReadPixels time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1605		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1606		<< tcu::TestLog::EndSampleInfo;
1607
1608	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1609	{
1610		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1611		log	<< tcu::TestLog::Sample
1612			<< samples[sampleNdx].renderDataSize
1613			<< samples[sampleNdx].uploadedDataSize
1614			<< samples[sampleNdx].numVertices
1615			<< (int)samples[sampleNdx].duration.renderReadDuration
1616			<< (int)samples[sampleNdx].duration.totalDuration
1617			<< (int)samples[sampleNdx].duration.uploadDuration
1618			<< (int)samples[sampleNdx].duration.renderDuration
1619			<< (int)samples[sampleNdx].duration.readDuration
1620			<< fitResidual
1621			<< tcu::TestLog::EndSample;
1622	}
1623
1624	log << tcu::TestLog::EndSampleList;
1625}
1626
1627void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples)
1628{
1629	log << tcu::TestLog::SampleList("Samples", "Samples")
1630		<< tcu::TestLog::SampleInfo
1631		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1632		<< tcu::TestLog::ValueInfo("UploadSize",			"Data uploaded",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1633		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",				"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1634		<< tcu::TestLog::ValueInfo("UnrelatedUploadSize",	"Unrelated upload size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1635		<< tcu::TestLog::ValueInfo("DrawReadTime",			"Draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1636		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1637		<< tcu::TestLog::ValueInfo("Upload time",			"Upload time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1638		<< tcu::TestLog::ValueInfo("DrawCallTime",			"Draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1639		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1640		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1641		<< tcu::TestLog::EndSampleInfo;
1642
1643	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1644	{
1645		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1646		log	<< tcu::TestLog::Sample
1647			<< samples[sampleNdx].renderDataSize
1648			<< samples[sampleNdx].uploadedDataSize
1649			<< samples[sampleNdx].numVertices
1650			<< samples[sampleNdx].unrelatedDataSize
1651			<< (int)samples[sampleNdx].duration.renderReadDuration
1652			<< (int)samples[sampleNdx].duration.totalDuration
1653			<< (int)samples[sampleNdx].duration.uploadDuration
1654			<< (int)samples[sampleNdx].duration.renderDuration
1655			<< (int)samples[sampleNdx].duration.readDuration
1656			<< fitResidual
1657			<< tcu::TestLog::EndSample;
1658	}
1659
1660	log << tcu::TestLog::EndSampleList;
1661}
1662
1663void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples)
1664{
1665	log << tcu::TestLog::SampleList("Samples", "Samples")
1666		<< tcu::TestLog::SampleInfo
1667		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",						"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1668		<< tcu::TestLog::ValueInfo("UploadSize",			"Data uploaded",						"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1669		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",					"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1670		<< tcu::TestLog::ValueInfo("DrawReadTime",			"Second draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1671		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1672		<< tcu::TestLog::ValueInfo("FirstDrawCallTime",		"First draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1673		<< tcu::TestLog::ValueInfo("Upload time",			"Upload time",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1674		<< tcu::TestLog::ValueInfo("SecondDrawCallTime",	"Second draw call time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1675		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1676		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1677		<< tcu::TestLog::EndSampleInfo;
1678
1679	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1680	{
1681		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1682		log	<< tcu::TestLog::Sample
1683			<< samples[sampleNdx].renderDataSize
1684			<< samples[sampleNdx].uploadedDataSize
1685			<< samples[sampleNdx].numVertices
1686			<< (int)samples[sampleNdx].duration.renderReadDuration
1687			<< (int)samples[sampleNdx].duration.totalDuration
1688			<< (int)samples[sampleNdx].duration.firstRenderDuration
1689			<< (int)samples[sampleNdx].duration.uploadDuration
1690			<< (int)samples[sampleNdx].duration.secondRenderDuration
1691			<< (int)samples[sampleNdx].duration.readDuration
1692			<< fitResidual
1693			<< tcu::TestLog::EndSample;
1694	}
1695
1696	log << tcu::TestLog::EndSampleList;
1697}
1698
1699template <typename SampleType>
1700static UploadSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, bool logBucketPerformance)
1701{
1702	// Assume data is linear with some outliers, fit a line
1703	const LineParametersWithConfidence									theilSenFitting						= fitLineToSamples(samples);
1704	const typename SampleTypeTraits<SampleType>::StatsType	resultStats							= calculateSampleStatistics(theilSenFitting, samples);
1705	float													approximatedTransferRate;
1706	float													approximatedTransferRateNoConstant;
1707
1708	// Output raw samples
1709	{
1710		const tcu::ScopedLogSection	section(log, "Samples", "Samples");
1711		logSampleList(log, theilSenFitting, samples);
1712	}
1713
1714	// Calculate results for different ranges
1715	if (logBucketPerformance)
1716	{
1717		const int										numBuckets				= 4;
1718		int												minBufferSize			= 0;
1719		int												maxBufferSize			= 0;
1720		std::vector<UploadSampleResult<SampleType> >	buckets[numBuckets];
1721
1722		bucketizeSamplesUniformly(samples, &buckets[0], numBuckets, minBufferSize, maxBufferSize);
1723
1724		for (int bucketNdx = 0; bucketNdx < numBuckets; ++bucketNdx)
1725		{
1726			if (buckets[bucketNdx].empty())
1727				continue;
1728
1729			// Print a nice result summary
1730
1731			const int												bucketRangeMin	= minBufferSize + (int)(( bucketNdx    / (float)numBuckets) * (maxBufferSize - minBufferSize));
1732			const int												bucketRangeMax	= minBufferSize + (int)(((bucketNdx+1) / (float)numBuckets) * (maxBufferSize - minBufferSize));
1733			const typename SampleTypeTraits<SampleType>::StatsType	stats			= calculateSampleStatistics(theilSenFitting, buckets[bucketNdx]);
1734			const tcu::ScopedLogSection								section			(log, "BufferSizeRange", std::string("Transfer performance with buffer size in range [").append(getHumanReadableByteSize(bucketRangeMin).append(", ").append(getHumanReadableByteSize(bucketRangeMax).append("]"))));
1735
1736			logMapRangeStats<SampleType>(log, stats);
1737			logUnmapStats<SampleType>(log, stats);
1738			logWriteStats<SampleType>(log, stats);
1739			logFlushStats<SampleType>(log, stats);
1740			logAllocStats<SampleType>(log, stats);
1741
1742			log	<< tcu::TestLog::Float("Min", "Total: Min time", "us", QP_KEY_TAG_TIME, stats.result.minTime)
1743				<< tcu::TestLog::Float("Max", "Total: Max time", "us", QP_KEY_TAG_TIME, stats.result.maxTime)
1744				<< tcu::TestLog::Float("Min90", "Total: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.result.min2DecileTime)
1745				<< tcu::TestLog::Float("Max90", "Total: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.result.max9DecileTime)
1746				<< tcu::TestLog::Float("Median", "Total: Median time", "us", QP_KEY_TAG_TIME, stats.result.medianTime)
1747				<< tcu::TestLog::Float("MedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, stats.medianRate / 1024.0f / 1024.0f)
1748				<< tcu::TestLog::Float("MaxDiff", "Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiffTime)
1749				<< tcu::TestLog::Float("Max90Diff", "90%-Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiff9DecileTime)
1750				<< tcu::TestLog::Float("MedianDiff", "Median difference to approximated", "us", QP_KEY_TAG_TIME, stats.medianDiffTime)
1751				<< tcu::TestLog::Float("MaxRelDiff", "Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.maxRelDiffTime * 100.0f)
1752				<< tcu::TestLog::Float("Max90RelDiff", "90%-Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.max9DecileRelDiffTime * 100.0f)
1753				<< tcu::TestLog::Float("MedianRelDiff", "Median relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.medianRelDiffTime * 100.0f);
1754		}
1755	}
1756
1757	// Contributions
1758	if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
1759	{
1760		const tcu::ScopedLogSection	section(log, "Contribution", "Contributions");
1761
1762		logMapContribution(log, samples, resultStats);
1763		logUnmapContribution(log, samples, resultStats);
1764		logWriteContribution(log, samples, resultStats);
1765		logFlushContribution(log, samples, resultStats);
1766		logAllocContribution(log, samples, resultStats);
1767	}
1768
1769	// Print results
1770	{
1771		const tcu::ScopedLogSection	section(log, "Results", "Results");
1772
1773		const int	medianBufferSize					= (samples.front().bufferSize + samples.back().bufferSize) / 2;
1774		const float	approximatedTransferTime			= (theilSenFitting.offset + theilSenFitting.coefficient * medianBufferSize) / 1000.0f / 1000.0f;
1775		const float	approximatedTransferTimeNoConstant	= (theilSenFitting.coefficient * medianBufferSize) / 1000.0f / 1000.0f;
1776		const float	sampleLinearity						= calculateSampleFitLinearity(samples);
1777		const float	sampleTemporalStability				= calculateSampleTemporalStability(samples);
1778
1779		approximatedTransferRateNoConstant				= medianBufferSize / approximatedTransferTimeNoConstant;
1780		approximatedTransferRate						= medianBufferSize / approximatedTransferTime;
1781
1782		log	<< tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f)
1783			<< tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f)
1784			<< tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset)
1785			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower)
1786			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper)
1787			<< tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f)
1788			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
1789			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
1790			<< tcu::TestLog::Float("ApproximatedTransferRate", "Approximated transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRate / 1024.0f / 1024.0f)
1791			<< tcu::TestLog::Float("ApproximatedTransferRateNoConstant", "Approximated transfer rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRateNoConstant / 1024.0f / 1024.0f)
1792			<< tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime)
1793			<< tcu::TestLog::Float("SampleMedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f);
1794	}
1795
1796	// return approximated transfer rate
1797	{
1798		UploadSampleAnalyzeResult result;
1799
1800		result.transferRateMedian = resultStats.medianRate;
1801		result.transferRateAtRange = approximatedTransferRate;
1802		result.transferRateAtInfinity = approximatedTransferRateNoConstant;
1803
1804		return result;
1805	}
1806}
1807
1808template <typename SampleType>
1809static RenderSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples)
1810{
1811	// Assume data is linear with some outliers, fit a line
1812	const LineParametersWithConfidence									theilSenFitting						= fitLineToSamples(samples);
1813	const typename SampleTypeTraits<SampleType>::StatsType	resultStats							= calculateSampleStatistics(theilSenFitting, samples);
1814	float													approximatedProcessingRate;
1815	float													approximatedProcessingRateNoConstant;
1816
1817	// output raw samples
1818	{
1819		const tcu::ScopedLogSection	section(log, "Samples", "Samples");
1820		logSampleList(log, theilSenFitting, samples);
1821	}
1822
1823	// Contributions
1824	if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
1825	{
1826		const tcu::ScopedLogSection	section(log, "Contribution", "Contributions");
1827
1828		logFirstRenderContribution(log, samples, resultStats);
1829		logUploadContribution(log, samples, resultStats);
1830		logRenderContribution(log, samples, resultStats);
1831		logSecondRenderContribution(log, samples, resultStats);
1832		logReadContribution(log, samples, resultStats);
1833		logTotalContribution(log, samples, resultStats);
1834	}
1835
1836	// print results
1837	{
1838		const tcu::ScopedLogSection	section(log, "Results", "Results");
1839
1840		const int	medianDataSize						= (samples.front().renderDataSize + samples.back().renderDataSize) / 2;
1841		const float	approximatedRenderTime				= (theilSenFitting.offset + theilSenFitting.coefficient * medianDataSize) / 1000.0f / 1000.0f;
1842		const float	approximatedRenderTimeNoConstant	= (theilSenFitting.coefficient * medianDataSize) / 1000.0f / 1000.0f;
1843		const float	sampleLinearity						= calculateSampleFitLinearity(samples);
1844		const float	sampleTemporalStability				= calculateSampleTemporalStability(samples);
1845
1846		approximatedProcessingRateNoConstant			= medianDataSize / approximatedRenderTimeNoConstant;
1847		approximatedProcessingRate						= medianDataSize / approximatedRenderTime;
1848
1849		log	<< tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f)
1850			<< tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f)
1851			<< tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset)
1852			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower)
1853			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper)
1854			<< tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f)
1855			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
1856			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
1857			<< tcu::TestLog::Float("ApproximatedProcessRate", "Approximated processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRate / 1024.0f / 1024.0f)
1858			<< tcu::TestLog::Float("ApproximatedProcessRateNoConstant", "Approximated processing rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRateNoConstant / 1024.0f / 1024.0f)
1859			<< tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime)
1860			<< tcu::TestLog::Float("SampleMedianProcess", "Median processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f);
1861	}
1862
1863	// return approximated render rate
1864	{
1865		RenderSampleAnalyzeResult result;
1866
1867		result.renderRateMedian		= resultStats.medianRate;
1868		result.renderRateAtRange	= approximatedProcessingRate;
1869		result.renderRateAtInfinity = approximatedProcessingRateNoConstant;
1870
1871		return result;
1872	}
1873	return RenderSampleAnalyzeResult();
1874}
1875
1876static void generateTwoPassRandomIterationOrder (std::vector<int>& iterationOrder, int numSamples)
1877{
1878	de::Random	rnd			(0xabc);
1879	const int	midPoint	= (numSamples+1) / 2;		// !< ceil(m_numSamples / 2)
1880
1881	DE_ASSERT((int)iterationOrder.size() == numSamples);
1882
1883	// Two "passes" over range, randomize order in both passes
1884	// This allows to us detect if iterations are not independent
1885	// (first run and later run samples differ significantly?)
1886
1887	for (int sampleNdx = 0; sampleNdx < midPoint; ++sampleNdx)
1888		iterationOrder[sampleNdx] = sampleNdx * 2;
1889	for (int sampleNdx = midPoint; sampleNdx < numSamples; ++sampleNdx)
1890		iterationOrder[sampleNdx] = (sampleNdx - midPoint) * 2 + 1;
1891
1892	for (int ndx = 0; ndx < midPoint; ++ndx)
1893		std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(0, midPoint - 1)]);
1894	for (int ndx = midPoint; ndx < (int)iterationOrder.size(); ++ndx)
1895		std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(midPoint, (int)iterationOrder.size()-1)]);
1896}
1897
1898template <typename SampleType>
1899class BasicBufferCase : public TestCase
1900{
1901public:
1902
1903	enum Flags
1904	{
1905		FLAG_ALLOCATE_LARGER_BUFFER = 0x01,
1906	};
1907							BasicBufferCase		(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags);
1908							~BasicBufferCase	(void);
1909
1910	virtual void			init				(void);
1911	virtual void			deinit				(void);
1912
1913protected:
1914	IterateResult			iterate				(void);
1915
1916	virtual bool			runSample			(int iteration, UploadSampleResult<SampleType>& sample) = 0;
1917	virtual void			logAndSetTestResult	(const std::vector<UploadSampleResult<SampleType> >& results) = 0;
1918
1919	void					disableGLWarmup		(void);
1920	void					waitGLResults		(void);
1921
1922	enum
1923	{
1924		DUMMY_RENDER_AREA_SIZE = 32
1925	};
1926
1927	glu::ShaderProgram*		m_dummyProgram;
1928	deInt32					m_dummyProgramPosLoc;
1929	deUint32				m_bufferID;
1930
1931	const int				m_numSamples;
1932	const int				m_bufferSizeMin;
1933	const int				m_bufferSizeMax;
1934	const bool				m_allocateLargerBuffer;
1935
1936private:
1937	int						m_iteration;
1938	std::vector<int>		m_iterationOrder;
1939	std::vector<UploadSampleResult<SampleType> > m_results;
1940
1941	bool					m_useGL;
1942	int						m_bufferRandomizerTimer;
1943};
1944
1945template <typename SampleType>
1946BasicBufferCase<SampleType>::BasicBufferCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags)
1947	: TestCase					(context, tcu::NODETYPE_PERFORMANCE, name, desc)
1948	, m_dummyProgram			(DE_NULL)
1949	, m_dummyProgramPosLoc		(-1)
1950	, m_bufferID				(0)
1951	, m_numSamples				(numSamples)
1952	, m_bufferSizeMin			(bufferSizeMin)
1953	, m_bufferSizeMax			(bufferSizeMax)
1954	, m_allocateLargerBuffer	((flags & FLAG_ALLOCATE_LARGER_BUFFER) != 0)
1955	, m_iteration				(0)
1956	, m_iterationOrder			(numSamples)
1957	, m_results					(numSamples)
1958	, m_useGL					(true)
1959	, m_bufferRandomizerTimer	(0)
1960{
1961	// "randomize" iteration order. Deterministic, patternless
1962	generateTwoPassRandomIterationOrder(m_iterationOrder, m_numSamples);
1963
1964	// choose buffer sizes
1965	for (int sampleNdx = 0; sampleNdx < m_numSamples; ++sampleNdx)
1966	{
1967		const int rawBufferSize			= (int)deFloatFloor(bufferSizeMin + (bufferSizeMax - bufferSizeMin) * ((float)(sampleNdx + 1) / m_numSamples));
1968		const int bufferSize			= deAlign32(rawBufferSize, 16);
1969		const int allocatedBufferSize	= deAlign32((m_allocateLargerBuffer) ? ((int)(bufferSize * 1.5f)) : (bufferSize), 16);
1970
1971		m_results[sampleNdx].bufferSize		= bufferSize;
1972		m_results[sampleNdx].allocatedSize	= allocatedBufferSize;
1973		m_results[sampleNdx].writtenSize	= -1;
1974	}
1975}
1976
1977template <typename SampleType>
1978BasicBufferCase<SampleType>::~BasicBufferCase (void)
1979{
1980	deinit();
1981}
1982
1983template <typename SampleType>
1984void BasicBufferCase<SampleType>::init (void)
1985{
1986	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1987
1988	if (!m_useGL)
1989		return;
1990
1991	// \note Viewport size is not checked, it won't matter if the render target actually is smaller hhan DUMMY_RENDER_AREA_SIZE
1992
1993	// dummy shader
1994
1995	m_dummyProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_dummyVertexShader) << glu::FragmentSource(s_dummyFragnentShader));
1996	if (!m_dummyProgram->isOk())
1997	{
1998		m_testCtx.getLog() << *m_dummyProgram;
1999		throw tcu::TestError("failed to build shader program");
2000	}
2001
2002	m_dummyProgramPosLoc = gl.getAttribLocation(m_dummyProgram->getProgram(), "a_position");
2003	if (m_dummyProgramPosLoc == -1)
2004		throw tcu::TestError("a_position location was -1");
2005}
2006
2007template <typename SampleType>
2008void BasicBufferCase<SampleType>::deinit (void)
2009{
2010	if (m_bufferID)
2011	{
2012		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2013		m_bufferID = 0;
2014	}
2015
2016	delete m_dummyProgram;
2017	m_dummyProgram = DE_NULL;
2018}
2019
2020template <typename SampleType>
2021TestCase::IterateResult BasicBufferCase<SampleType>::iterate (void)
2022{
2023	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2024	static bool				buffersWarmedUp	= false;
2025
2026	static const deUint32	usages[] =
2027	{
2028		GL_STREAM_DRAW, GL_STREAM_READ, GL_STREAM_COPY,
2029		GL_STATIC_DRAW, GL_STATIC_READ, GL_STATIC_COPY,
2030		GL_DYNAMIC_DRAW, GL_DYNAMIC_READ, GL_DYNAMIC_COPY,
2031	};
2032
2033	// Allocate some random sized buffers and remove them to
2034	// make sure the first samples too have some buffers removed
2035	// just before their allocation. This is only needed by the
2036	// the first test.
2037
2038	if (m_useGL && !buffersWarmedUp)
2039	{
2040		const int					numRandomBuffers				= 6;
2041		const int					numRepeats						= 10;
2042		const int					maxBufferSize					= 16777216;
2043		const std::vector<deUint8>	zeroData						(maxBufferSize, 0x00);
2044		de::Random					rnd								(0x1234);
2045		deUint32					bufferIDs[numRandomBuffers]		= {0};
2046
2047		gl.useProgram(m_dummyProgram->getProgram());
2048		gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2049		gl.enableVertexAttribArray(m_dummyProgramPosLoc);
2050
2051		for (int ndx = 0; ndx < numRepeats; ++ndx)
2052		{
2053			// Create buffer and maybe draw from it
2054			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2055			{
2056				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2057				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2058
2059				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2060				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2061				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2062
2063				if (rnd.getBool())
2064				{
2065					gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2066					gl.drawArrays(GL_POINTS, 0, 1);
2067					gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2068				}
2069			}
2070
2071			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2072				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2073
2074			waitGLResults();
2075			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2076
2077			m_testCtx.touchWatchdog();
2078		}
2079
2080		buffersWarmedUp = true;
2081		return CONTINUE;
2082	}
2083	else if (m_useGL && m_bufferRandomizerTimer++ % 8 == 0)
2084	{
2085		// Do some random buffer operations to every now and then
2086		// to make sure the previous test iterations won't affect
2087		// following test runs.
2088
2089		const int					numRandomBuffers				= 3;
2090		const int					maxBufferSize					= 16777216;
2091		const std::vector<deUint8>	zeroData						(maxBufferSize, 0x00);
2092		de::Random					rnd								(0x1234 + 0xabc * m_bufferRandomizerTimer);
2093
2094		// BufferData
2095		{
2096			deUint32 bufferIDs[numRandomBuffers] = {0};
2097
2098			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2099			{
2100				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2101				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2102
2103				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2104				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2105				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2106			}
2107
2108			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2109				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2110		}
2111
2112		GLU_EXPECT_NO_ERROR(gl.getError(), "buffer ops");
2113
2114		// Do some memory mappings
2115		{
2116			deUint32 bufferIDs[numRandomBuffers] = {0};
2117
2118			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2119			{
2120				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2121				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2122				void*			ptr;
2123
2124				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2125				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2126				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2127
2128				gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2129				gl.drawArrays(GL_POINTS, 0, 1);
2130				gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2131
2132				if (rnd.getBool())
2133					waitGLResults();
2134
2135				ptr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, randomSize, GL_MAP_WRITE_BIT);
2136				if (ptr)
2137				{
2138					medianTimeMemcpy(ptr, &zeroData[0], randomSize);
2139					gl.unmapBuffer(GL_ARRAY_BUFFER);
2140				}
2141			}
2142
2143			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2144				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2145
2146			waitGLResults();
2147		}
2148
2149		GLU_EXPECT_NO_ERROR(gl.getError(), "buffer maps");
2150		return CONTINUE;
2151	}
2152	else
2153	{
2154		const int	currentIteration	= m_iteration;
2155		const int	sampleNdx			= m_iterationOrder[currentIteration];
2156		const bool	sampleRunSuccessful	= runSample(currentIteration, m_results[sampleNdx]);
2157
2158		GLU_EXPECT_NO_ERROR(gl.getError(), "post runSample()");
2159
2160		// Retry failed samples
2161		if (!sampleRunSuccessful)
2162			return CONTINUE;
2163
2164		if (++m_iteration >= m_numSamples)
2165		{
2166			logAndSetTestResult(m_results);
2167			return STOP;
2168		}
2169		else
2170			return CONTINUE;
2171	}
2172}
2173
2174template <typename SampleType>
2175void BasicBufferCase<SampleType>::disableGLWarmup (void)
2176{
2177	m_useGL = false;
2178}
2179
2180template <typename SampleType>
2181void BasicBufferCase<SampleType>::waitGLResults (void)
2182{
2183	tcu::Surface dummySurface(DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2184	glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess());
2185}
2186
2187template <typename SampleType>
2188class BasicUploadCase : public BasicBufferCase<SampleType>
2189{
2190public:
2191	enum CaseType
2192	{
2193		CASE_NO_BUFFERS = 0,
2194		CASE_NEW_BUFFER,
2195		CASE_UNSPECIFIED_BUFFER,
2196		CASE_SPECIFIED_BUFFER,
2197		CASE_USED_BUFFER,
2198		CASE_USED_LARGER_BUFFER,
2199
2200		CASE_LAST
2201	};
2202
2203	enum CaseFlags
2204	{
2205		FLAG_DONT_LOG_BUFFER_INFO				= 0x01,
2206		FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT	= 0x02,
2207	};
2208
2209	enum ResultType
2210	{
2211		RESULT_MEDIAN_TRANSFER_RATE = 0,
2212		RESULT_ASYMPTOTIC_TRANSFER_RATE,
2213	};
2214
2215						BasicUploadCase		(Context& context,
2216											 const char* name,
2217											 const char* desc,
2218											 int bufferSizeMin,
2219											 int bufferSizeMax,
2220											 int numSamples,
2221											 deUint32 bufferUsage,
2222											 CaseType caseType,
2223											 ResultType resultType,
2224											 int flags = 0);
2225
2226						~BasicUploadCase	(void);
2227
2228	virtual void		init				(void);
2229	virtual void		deinit				(void);
2230
2231private:
2232	bool				runSample			(int iteration, UploadSampleResult<SampleType>& sample);
2233	void				createBuffer		(int bufferSize, int iteration);
2234	void				deleteBuffer		(int bufferSize);
2235	void				useBuffer			(int bufferSize);
2236
2237	virtual void		testBufferUpload	(UploadSampleResult<SampleType>& result, int writeSize) = 0;
2238	void				logAndSetTestResult	(const std::vector<UploadSampleResult<SampleType> >& results);
2239
2240	deUint32			m_dummyBufferID;
2241
2242protected:
2243	const CaseType		m_caseType;
2244	const ResultType	m_resultType;
2245	const deUint32		m_bufferUsage;
2246	const bool			m_logBufferInfo;
2247	const bool			m_bufferUnspecifiedContent;
2248	std::vector<deUint8> m_zeroData;
2249
2250	using BasicBufferCase<SampleType>::m_testCtx;
2251	using BasicBufferCase<SampleType>::m_context;
2252
2253	using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE;
2254	using BasicBufferCase<SampleType>::m_dummyProgram;
2255	using BasicBufferCase<SampleType>::m_dummyProgramPosLoc;
2256	using BasicBufferCase<SampleType>::m_bufferID;
2257	using BasicBufferCase<SampleType>::m_numSamples;
2258	using BasicBufferCase<SampleType>::m_bufferSizeMin;
2259	using BasicBufferCase<SampleType>::m_bufferSizeMax;
2260	using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
2261};
2262
2263template <typename SampleType>
2264BasicUploadCase<SampleType>::BasicUploadCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, deUint32 bufferUsage, CaseType caseType, ResultType resultType, int flags)
2265	: BasicBufferCase<SampleType>	(context, name, desc, bufferSizeMin, bufferSizeMax, numSamples, (caseType == CASE_USED_LARGER_BUFFER) ? (BasicBufferCase<SampleType>::FLAG_ALLOCATE_LARGER_BUFFER) : (0))
2266	, m_dummyBufferID				(0)
2267	, m_caseType					(caseType)
2268	, m_resultType					(resultType)
2269	, m_bufferUsage					(bufferUsage)
2270	, m_logBufferInfo				((flags & FLAG_DONT_LOG_BUFFER_INFO) == 0)
2271	, m_bufferUnspecifiedContent	((flags & FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT) != 0)
2272	, m_zeroData					()
2273{
2274	DE_ASSERT(m_caseType < CASE_LAST);
2275}
2276
2277template <typename SampleType>
2278BasicUploadCase<SampleType>::~BasicUploadCase (void)
2279{
2280	deinit();
2281}
2282
2283template <typename SampleType>
2284void BasicUploadCase<SampleType>::init (void)
2285{
2286	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2287
2288	BasicBufferCase<SampleType>::init();
2289
2290	// zero buffer as upload source
2291	m_zeroData.resize(m_bufferSizeMax, 0x00);
2292
2293	// dummy buffer
2294
2295	gl.genBuffers(1, &m_dummyBufferID);
2296	GLU_EXPECT_NO_ERROR(gl.getError(), "Gen buf");
2297
2298	// log basic info
2299
2300	m_testCtx.getLog()
2301		<< tcu::TestLog::Message
2302		<< "Testing performance with " << m_numSamples << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n"
2303		<< "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]."
2304		<< tcu::TestLog::EndMessage;
2305
2306	if (m_logBufferInfo)
2307	{
2308		switch (m_caseType)
2309		{
2310			case CASE_NO_BUFFERS:
2311				break;
2312
2313			case CASE_NEW_BUFFER:
2314				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is generated but not specified (i.e glBufferData() not called)." << tcu::TestLog::EndMessage;
2315				break;
2316
2317			case CASE_UNSPECIFIED_BUFFER:
2318				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is allocated with glBufferData(NULL)." << tcu::TestLog::EndMessage;
2319				break;
2320
2321			case CASE_SPECIFIED_BUFFER:
2322				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer contents are specified prior testing with glBufferData(data)." << tcu::TestLog::EndMessage;
2323				break;
2324
2325			case CASE_USED_BUFFER:
2326				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer has been used in drawing before testing." << tcu::TestLog::EndMessage;
2327				break;
2328
2329			case CASE_USED_LARGER_BUFFER:
2330				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is larger and has been used in drawing before testing." << tcu::TestLog::EndMessage;
2331				break;
2332
2333			default:
2334				DE_ASSERT(false);
2335				break;
2336		}
2337	}
2338
2339	if (m_resultType == RESULT_MEDIAN_TRANSFER_RATE)
2340		m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples." << tcu::TestLog::EndMessage;
2341	else if (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE)
2342		m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the asymptotic transfer rate as the buffer size approaches infinity." << tcu::TestLog::EndMessage;
2343	else
2344		DE_ASSERT(false);
2345}
2346
2347template <typename SampleType>
2348void BasicUploadCase<SampleType>::deinit (void)
2349{
2350	if (m_dummyBufferID)
2351	{
2352		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_dummyBufferID);
2353		m_dummyBufferID = 0;
2354	}
2355
2356	m_zeroData.clear();
2357
2358	BasicBufferCase<SampleType>::deinit();
2359}
2360
2361template <typename SampleType>
2362bool BasicUploadCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample)
2363{
2364	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2365	const int				allocatedBufferSize	= sample.allocatedSize;
2366	const int				bufferSize			= sample.bufferSize;
2367
2368	if (m_caseType != CASE_NO_BUFFERS)
2369		createBuffer(iteration, allocatedBufferSize);
2370
2371	// warmup CPU before the test to make sure the power management governor
2372	// keeps us in the "high performance" mode
2373	{
2374		deYield();
2375		tcu::warmupCPU();
2376		deYield();
2377	}
2378
2379	testBufferUpload(sample, bufferSize);
2380	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
2381
2382	if (m_caseType != CASE_NO_BUFFERS)
2383		deleteBuffer(bufferSize);
2384
2385	return true;
2386}
2387
2388template <typename SampleType>
2389void BasicUploadCase<SampleType>::createBuffer (int iteration, int bufferSize)
2390{
2391	DE_ASSERT(!m_bufferID);
2392	DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2393
2394	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2395
2396	// create buffer
2397
2398	if (m_caseType == CASE_NO_BUFFERS)
2399		return;
2400
2401	// create empty buffer
2402
2403	gl.genBuffers(1, &m_bufferID);
2404	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2405	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2406
2407	if (m_caseType == CASE_NEW_BUFFER)
2408	{
2409		// upload something else first, this should reduce noise in samples
2410
2411		de::Random					rng				(0xbadc * iteration);
2412		const int					sizeDelta		= rng.getInt(0, 2097140);
2413		const int					dummyUploadSize = deAlign32(1048576 + sizeDelta, 4*4); // Vary buffer size to make sure it is always reallocated
2414		const std::vector<deUint8>	dummyData		(dummyUploadSize, 0x20);
2415
2416		gl.bindBuffer(GL_ARRAY_BUFFER, m_dummyBufferID);
2417		gl.bufferData(GL_ARRAY_BUFFER, dummyUploadSize, &dummyData[0], m_bufferUsage);
2418
2419		// make sure upload won't interfere with the test
2420		useBuffer(dummyUploadSize);
2421
2422		// don't kill the buffer so that the following upload cannot potentially reuse the buffer
2423
2424		return;
2425	}
2426
2427	// specify it
2428
2429	if (m_caseType == CASE_UNSPECIFIED_BUFFER)
2430		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2431	else
2432	{
2433		const std::vector<deUint8> dummyData(bufferSize, 0x20);
2434		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage);
2435	}
2436
2437	if (m_caseType == CASE_UNSPECIFIED_BUFFER || m_caseType == CASE_SPECIFIED_BUFFER)
2438		return;
2439
2440	// use it and make sure it is uploaded
2441
2442	useBuffer(bufferSize);
2443	DE_ASSERT(m_caseType == CASE_USED_BUFFER || m_caseType == CASE_USED_LARGER_BUFFER);
2444}
2445
2446template <typename SampleType>
2447void BasicUploadCase<SampleType>::deleteBuffer (int bufferSize)
2448{
2449	DE_ASSERT(m_bufferID);
2450	DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2451
2452	// render from the buffer to make sure it actually made it to the gpu. This is to
2453	// make sure that if the upload actually happens later or is happening right now in
2454	// the background, it will not interfere with further test runs
2455
2456	// if buffer contains unspecified content, sourcing data from it results in undefined
2457	// results, possibly including program termination. Specify all data to prevent such
2458	// case from happening
2459
2460	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2461
2462	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2463
2464	if (m_bufferUnspecifiedContent)
2465	{
2466		const std::vector<deUint8> dummyData(bufferSize, 0x20);
2467		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage);
2468
2469		GLU_EXPECT_NO_ERROR(gl.getError(), "re-specify buffer");
2470	}
2471
2472	useBuffer(bufferSize);
2473
2474	gl.deleteBuffers(1, &m_bufferID);
2475	m_bufferID = 0;
2476}
2477
2478template <typename SampleType>
2479void BasicUploadCase<SampleType>::useBuffer (int bufferSize)
2480{
2481	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2482
2483	gl.useProgram(m_dummyProgram->getProgram());
2484
2485	gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2486	gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2487	gl.enableVertexAttribArray(m_dummyProgramPosLoc);
2488
2489	// use whole buffer to make sure buffer is uploaded by drawing first and last
2490	DE_ASSERT(bufferSize % (int)sizeof(float[4]) == 0);
2491	gl.drawArrays(GL_POINTS, 0, 1);
2492	gl.drawArrays(GL_POINTS, bufferSize / (int)sizeof(float[4]) - 1, 1);
2493
2494	BasicBufferCase<SampleType>::waitGLResults();
2495}
2496
2497template <typename SampleType>
2498void BasicUploadCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results)
2499{
2500	const UploadSampleAnalyzeResult	analysis	= analyzeSampleResults(m_testCtx.getLog(), results, true);
2501
2502	// with small buffers, report the median transfer rate of the samples
2503	// with large buffers, report the expected preformance of infinitely large buffers
2504	const float						rate		= (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE) ? (analysis.transferRateAtInfinity) : (analysis.transferRateMedian);
2505
2506	if (rate == std::numeric_limits<float>::infinity())
2507	{
2508		// sample times are 1) invalid or 2) timer resolution too low
2509		// report speed 0 bytes / s since real value cannot be determined
2510		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
2511	}
2512	else
2513	{
2514		// report transfer rate in MB / s
2515		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
2516	}
2517}
2518
2519class ReferenceMemcpyCase : public BasicUploadCase<SingleOperationDuration>
2520{
2521public:
2522				ReferenceMemcpyCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase);
2523				~ReferenceMemcpyCase	(void);
2524
2525	void		init					(void);
2526	void		deinit					(void);
2527private:
2528	void		testBufferUpload		(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2529
2530	std::vector<deUint8> m_dstBuf;
2531};
2532
2533ReferenceMemcpyCase::ReferenceMemcpyCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase)
2534	: BasicUploadCase<SingleOperationDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, 0, CASE_NO_BUFFERS, (largeBuffersCase) ? (RESULT_ASYMPTOTIC_TRANSFER_RATE) : (RESULT_MEDIAN_TRANSFER_RATE))
2535	, m_dstBuf									()
2536{
2537	disableGLWarmup();
2538}
2539
2540ReferenceMemcpyCase::~ReferenceMemcpyCase (void)
2541{
2542}
2543
2544void ReferenceMemcpyCase::init (void)
2545{
2546	// Describe what the test tries to do
2547	m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of memcpy()." << tcu::TestLog::EndMessage;
2548
2549	m_dstBuf.resize(m_bufferSizeMax, 0x00);
2550
2551	BasicUploadCase<SingleOperationDuration>::init();
2552}
2553
2554void ReferenceMemcpyCase::deinit (void)
2555{
2556	m_dstBuf.clear();
2557	BasicUploadCase<SingleOperationDuration>::deinit();
2558}
2559
2560void ReferenceMemcpyCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2561{
2562	// write
2563	result.duration.totalDuration = medianTimeMemcpy(&m_dstBuf[0], &m_zeroData[0], bufferSize);
2564	result.duration.fitResponseDuration = result.duration.totalDuration;
2565
2566	result.writtenSize = bufferSize;
2567}
2568
2569class BufferDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2570{
2571public:
2572				BufferDataUploadCase	(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType);
2573				~BufferDataUploadCase	(void);
2574
2575	void		init					(void);
2576private:
2577	void		testBufferUpload		(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2578};
2579
2580BufferDataUploadCase::BufferDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType)
2581	: BasicUploadCase<SingleOperationDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, caseType, RESULT_MEDIAN_TRANSFER_RATE)
2582{
2583}
2584
2585BufferDataUploadCase::~BufferDataUploadCase (void)
2586{
2587}
2588
2589void BufferDataUploadCase::init (void)
2590{
2591	// Describe what the test tries to do
2592	m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferData() function." << tcu::TestLog::EndMessage;
2593
2594	BasicUploadCase<SingleOperationDuration>::init();
2595}
2596
2597void BufferDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2598{
2599	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2600
2601	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2602
2603	// upload
2604	{
2605		deUint64 startTime;
2606		deUint64 endTime;
2607
2608		startTime = deGetMicroseconds();
2609		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
2610		endTime = deGetMicroseconds();
2611
2612		result.duration.totalDuration = endTime - startTime;
2613		result.duration.fitResponseDuration = result.duration.totalDuration;
2614		result.writtenSize = bufferSize;
2615	}
2616}
2617
2618class BufferSubDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2619{
2620public:
2621	enum Flags
2622	{
2623		FLAG_FULL_UPLOAD			= 0x01,
2624		FLAG_PARTIAL_UPLOAD			= 0x02,
2625		FLAG_INVALIDATE_BEFORE_USE	= 0x04,
2626	};
2627
2628				BufferSubDataUploadCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags);
2629				~BufferSubDataUploadCase	(void);
2630
2631	void		init						(void);
2632private:
2633	void		testBufferUpload			(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2634
2635	const bool	m_fullUpload;
2636	const bool	m_invalidateBeforeUse;
2637};
2638
2639BufferSubDataUploadCase::BufferSubDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags)
2640	: BasicUploadCase<SingleOperationDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, parentCase, RESULT_MEDIAN_TRANSFER_RATE)
2641	, m_fullUpload								((flags & FLAG_FULL_UPLOAD) != 0)
2642	, m_invalidateBeforeUse						((flags & FLAG_INVALIDATE_BEFORE_USE) != 0)
2643{
2644	DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != 0);
2645	DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD));
2646}
2647
2648BufferSubDataUploadCase::~BufferSubDataUploadCase (void)
2649{
2650}
2651
2652void BufferSubDataUploadCase::init (void)
2653{
2654	// Describe what the test tries to do
2655	m_testCtx.getLog()
2656		<< tcu::TestLog::Message
2657		<< "Testing glBufferSubData() function call performance. "
2658		<< ((m_fullUpload) ? ("The whole buffer is updated with glBufferSubData. ") : ("Half of the buffer data is updated with glBufferSubData. "))
2659		<< ((m_invalidateBeforeUse) ? ("The buffer is cleared with glBufferData(..., NULL) before glBufferSubData upload.") : ("")) << "\n"
2660		<< tcu::TestLog::EndMessage;
2661
2662	BasicUploadCase<SingleOperationDuration>::init();
2663}
2664
2665void BufferSubDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2666{
2667	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2668
2669	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2670
2671	// "invalidate", upload null
2672	if (m_invalidateBeforeUse)
2673		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2674
2675	// upload
2676	{
2677		deUint64 startTime;
2678		deUint64 endTime;
2679
2680		startTime = deGetMicroseconds();
2681
2682		if (m_fullUpload)
2683			gl.bufferSubData(GL_ARRAY_BUFFER, 0, bufferSize, &m_zeroData[0]);
2684		else
2685		{
2686			// upload to buffer center
2687			gl.bufferSubData(GL_ARRAY_BUFFER, bufferSize / 4, bufferSize / 2, &m_zeroData[0]);
2688		}
2689
2690		endTime = deGetMicroseconds();
2691
2692		result.duration.totalDuration = endTime - startTime;
2693		result.duration.fitResponseDuration = result.duration.totalDuration;
2694
2695		if (m_fullUpload)
2696			result.writtenSize = bufferSize;
2697		else
2698			result.writtenSize = bufferSize / 2;
2699	}
2700}
2701
2702class MapBufferRangeCase : public BasicUploadCase<MapBufferRangeDuration>
2703{
2704public:
2705	enum Flags
2706	{
2707		FLAG_PARTIAL						= 0x01,
2708		FLAG_MANUAL_INVALIDATION			= 0x02,
2709		FLAG_USE_UNUSED_UNSPECIFIED_BUFFER	= 0x04,
2710		FLAG_USE_UNUSED_SPECIFIED_BUFFER	= 0x08,
2711	};
2712
2713					MapBufferRangeCase			(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags);
2714					~MapBufferRangeCase			(void);
2715
2716	void			init						(void);
2717private:
2718	static CaseType getBaseCaseType				(int caseFlags);
2719	static int		getBaseFlags				(deUint32 mapFlags, int caseFlags);
2720
2721	void			testBufferUpload			(UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize);
2722	void			attemptBufferMap			(UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize);
2723
2724	const bool		m_manualInvalidation;
2725	const bool		m_fullUpload;
2726	const bool		m_useUnusedUnspecifiedBuffer;
2727	const bool		m_useUnusedSpecifiedBuffer;
2728	const deUint32	m_mapFlags;
2729	int				m_unmapFailures;
2730};
2731
2732MapBufferRangeCase::MapBufferRangeCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags)
2733	: BasicUploadCase<MapBufferRangeDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags))
2734	, m_manualInvalidation						((caseFlags&FLAG_MANUAL_INVALIDATION) != 0)
2735	, m_fullUpload								((caseFlags&FLAG_PARTIAL) == 0)
2736	, m_useUnusedUnspecifiedBuffer				((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2737	, m_useUnusedSpecifiedBuffer				((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
2738	, m_mapFlags								(mapFlags)
2739	, m_unmapFailures							(0)
2740{
2741	DE_ASSERT(!(m_useUnusedUnspecifiedBuffer && m_useUnusedSpecifiedBuffer));
2742	DE_ASSERT(!((m_useUnusedUnspecifiedBuffer || m_useUnusedSpecifiedBuffer) && m_manualInvalidation));
2743}
2744
2745MapBufferRangeCase::~MapBufferRangeCase (void)
2746{
2747}
2748
2749void MapBufferRangeCase::init (void)
2750{
2751	// Describe what the test tries to do
2752	m_testCtx.getLog()
2753		<< tcu::TestLog::Message
2754		<< "Testing glMapBufferRange() and glUnmapBuffer() function call performance.\n"
2755		<< ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
2756		<< ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : (""))
2757		<< ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : (""))
2758		<< ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : (""))
2759		<< ((m_manualInvalidation) ? ("The buffer is cleared with glBufferData(..., NULL) before mapping.\n") : (""))
2760		<< "Map bits:\n"
2761		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
2762		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
2763		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
2764		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
2765		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
2766		<< tcu::TestLog::EndMessage;
2767
2768	BasicUploadCase<MapBufferRangeDuration>::init();
2769}
2770
2771MapBufferRangeCase::CaseType MapBufferRangeCase::getBaseCaseType (int caseFlags)
2772{
2773	if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
2774		return CASE_USED_BUFFER;
2775	else
2776		return CASE_NEW_BUFFER;
2777}
2778
2779int MapBufferRangeCase::getBaseFlags (deUint32 mapFlags, int caseFlags)
2780{
2781	int flags = FLAG_DONT_LOG_BUFFER_INFO;
2782
2783	// If buffer contains unspecified data when it is sourced (i.e drawn)
2784	// results are undefined, and system errors may occur. Signal parent
2785	// class to take this into account
2786	if (caseFlags & FLAG_PARTIAL)
2787	{
2788		if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0			||
2789			(caseFlags & FLAG_MANUAL_INVALIDATION) != 0				||
2790			(caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2791		{
2792			flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
2793		}
2794	}
2795
2796	return flags;
2797}
2798
2799void MapBufferRangeCase::testBufferUpload (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize)
2800{
2801	const int unmapFailureThreshold = 4;
2802
2803	for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
2804	{
2805		try
2806		{
2807			attemptBufferMap(result, bufferSize);
2808			return;
2809		}
2810		catch (UnmapFailureError&)
2811		{
2812		}
2813	}
2814
2815	throw tcu::TestError("Unmapping failures exceeded limit");
2816}
2817
2818void MapBufferRangeCase::attemptBufferMap (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize)
2819{
2820	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2821
2822	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2823
2824	if (m_fullUpload)
2825		result.writtenSize = bufferSize;
2826	else
2827		result.writtenSize = bufferSize / 2;
2828
2829	// Create unused buffer
2830
2831	if (m_manualInvalidation || m_useUnusedUnspecifiedBuffer)
2832	{
2833		deUint64 startTime;
2834		deUint64 endTime;
2835
2836		// "invalidate" or allocate, upload null
2837		startTime = deGetMicroseconds();
2838		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2839		endTime = deGetMicroseconds();
2840
2841		result.duration.allocDuration = endTime - startTime;
2842	}
2843	else if (m_useUnusedSpecifiedBuffer)
2844	{
2845		deUint64 startTime;
2846		deUint64 endTime;
2847
2848		// Specify buffer contents
2849		startTime = deGetMicroseconds();
2850		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
2851		endTime = deGetMicroseconds();
2852
2853		result.duration.allocDuration = endTime - startTime;
2854	}
2855	else
2856	{
2857		// No alloc, no time
2858		result.duration.allocDuration = 0;
2859	}
2860
2861	// upload
2862	{
2863		void* mapPtr;
2864
2865		// Map
2866		{
2867			deUint64 startTime;
2868			deUint64 endTime;
2869
2870			startTime = deGetMicroseconds();
2871			if (m_fullUpload)
2872				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, result.writtenSize, m_mapFlags);
2873			else
2874			{
2875				// upload to buffer center
2876				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, result.writtenSize, m_mapFlags);
2877			}
2878			endTime = deGetMicroseconds();
2879
2880			if (!mapPtr)
2881				throw tcu::Exception("MapBufferRange returned NULL");
2882
2883			result.duration.mapDuration = endTime - startTime;
2884		}
2885
2886		// Write
2887		{
2888			result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
2889		}
2890
2891		// Unmap
2892		{
2893			deUint64		startTime;
2894			deUint64		endTime;
2895			glw::GLboolean	unmapSuccessful;
2896
2897			startTime = deGetMicroseconds();
2898			unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
2899			endTime = deGetMicroseconds();
2900
2901			// if unmapping fails, just try again later
2902			if (!unmapSuccessful)
2903				throw UnmapFailureError();
2904
2905			result.duration.unmapDuration = endTime - startTime;
2906		}
2907
2908		result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.allocDuration;
2909		result.duration.fitResponseDuration = result.duration.totalDuration;
2910	}
2911}
2912
2913class MapBufferRangeFlushCase : public BasicUploadCase<MapBufferRangeFlushDuration>
2914{
2915public:
2916	enum Flags
2917	{
2918		FLAG_PARTIAL						= 0x01,
2919		FLAG_FLUSH_IN_PARTS					= 0x02,
2920		FLAG_USE_UNUSED_UNSPECIFIED_BUFFER	= 0x04,
2921		FLAG_USE_UNUSED_SPECIFIED_BUFFER	= 0x08,
2922		FLAG_FLUSH_PARTIAL					= 0x10,
2923	};
2924
2925					MapBufferRangeFlushCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags);
2926					~MapBufferRangeFlushCase	(void);
2927
2928	void			init						(void);
2929private:
2930	static CaseType getBaseCaseType				(int caseFlags);
2931	static int		getBaseFlags				(deUint32 mapFlags, int caseFlags);
2932
2933	void			testBufferUpload			(UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize);
2934	void			attemptBufferMap			(UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize);
2935
2936	const bool		m_fullUpload;
2937	const bool		m_flushInParts;
2938	const bool		m_flushPartial;
2939	const bool		m_useUnusedUnspecifiedBuffer;
2940	const bool		m_useUnusedSpecifiedBuffer;
2941	const deUint32	m_mapFlags;
2942	int				m_unmapFailures;
2943};
2944
2945MapBufferRangeFlushCase::MapBufferRangeFlushCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags)
2946	: BasicUploadCase<MapBufferRangeFlushDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags))
2947	, m_fullUpload									((caseFlags&FLAG_PARTIAL) == 0)
2948	, m_flushInParts								((caseFlags&FLAG_FLUSH_IN_PARTS) != 0)
2949	, m_flushPartial								((caseFlags&FLAG_FLUSH_PARTIAL) != 0)
2950	, m_useUnusedUnspecifiedBuffer					((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2951	, m_useUnusedSpecifiedBuffer					((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
2952	, m_mapFlags									(mapFlags)
2953	, m_unmapFailures								(0)
2954{
2955	DE_ASSERT(!(m_flushPartial && m_flushInParts));
2956	DE_ASSERT(!(m_flushPartial && !m_fullUpload));
2957}
2958
2959MapBufferRangeFlushCase::~MapBufferRangeFlushCase (void)
2960{
2961}
2962
2963void MapBufferRangeFlushCase::init (void)
2964{
2965	// Describe what the test tries to do
2966	m_testCtx.getLog()
2967		<< tcu::TestLog::Message
2968		<< "Testing glMapBufferRange(), glFlushMappedBufferRange() and glUnmapBuffer() function call performance.\n"
2969		<< ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
2970		<< ((m_flushInParts) ?
2971			("The mapped range is partitioned to 4 subranges and each partition is flushed separately.") :
2972			(m_flushPartial) ?
2973				("Half of the buffer range is flushed.") :
2974				("The whole mapped range is flushed in one flush call.")) << "\n"
2975		<< ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : (""))
2976		<< ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : (""))
2977		<< ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : (""))
2978		<< "Map bits:\n"
2979		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
2980		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
2981		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
2982		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
2983		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
2984		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
2985		<< tcu::TestLog::EndMessage;
2986
2987	BasicUploadCase<MapBufferRangeFlushDuration>::init();
2988}
2989
2990MapBufferRangeFlushCase::CaseType MapBufferRangeFlushCase::getBaseCaseType (int caseFlags)
2991{
2992	if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
2993		return CASE_USED_BUFFER;
2994	else
2995		return CASE_NEW_BUFFER;
2996}
2997
2998int MapBufferRangeFlushCase::getBaseFlags (deUint32 mapFlags, int caseFlags)
2999{
3000	int flags = FLAG_DONT_LOG_BUFFER_INFO;
3001
3002	// If buffer contains unspecified data when it is sourced (i.e drawn)
3003	// results are undefined, and system errors may occur. Signal parent
3004	// class to take this into account
3005	if (caseFlags & FLAG_PARTIAL)
3006	{
3007		if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0			||
3008			(caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0	||
3009			(caseFlags & FLAG_FLUSH_PARTIAL) != 0)
3010		{
3011			flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
3012		}
3013	}
3014
3015	return flags;
3016}
3017
3018void MapBufferRangeFlushCase::testBufferUpload (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize)
3019{
3020	const int unmapFailureThreshold = 4;
3021
3022	for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
3023	{
3024		try
3025		{
3026			attemptBufferMap(result, bufferSize);
3027			return;
3028		}
3029		catch (UnmapFailureError&)
3030		{
3031		}
3032	}
3033
3034	throw tcu::TestError("Unmapping failures exceeded limit");
3035}
3036
3037void MapBufferRangeFlushCase::attemptBufferMap (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize)
3038{
3039	const glw::Functions&	gl			= m_context.getRenderContext().getFunctions();
3040	const int				mappedSize	= (m_fullUpload) ? (bufferSize) : (bufferSize / 2);
3041
3042	if (m_fullUpload && !m_flushPartial)
3043		result.writtenSize = bufferSize;
3044	else
3045		result.writtenSize = bufferSize / 2;
3046
3047	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3048
3049	// Create unused buffer
3050
3051	if (m_useUnusedUnspecifiedBuffer)
3052	{
3053		deUint64 startTime;
3054		deUint64 endTime;
3055
3056		// Don't specify contents
3057		startTime = deGetMicroseconds();
3058		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
3059		endTime = deGetMicroseconds();
3060
3061		result.duration.allocDuration = endTime - startTime;
3062	}
3063	else if (m_useUnusedSpecifiedBuffer)
3064	{
3065		deUint64 startTime;
3066		deUint64 endTime;
3067
3068		// Specify buffer contents
3069		startTime = deGetMicroseconds();
3070		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3071		endTime = deGetMicroseconds();
3072
3073		result.duration.allocDuration = endTime - startTime;
3074	}
3075	else
3076	{
3077		// No alloc, no time
3078		result.duration.allocDuration = 0;
3079	}
3080
3081	// upload
3082	{
3083		void* mapPtr;
3084
3085		// Map
3086		{
3087			deUint64 startTime;
3088			deUint64 endTime;
3089
3090			startTime = deGetMicroseconds();
3091			if (m_fullUpload)
3092				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, mappedSize, m_mapFlags);
3093			else
3094			{
3095				// upload to buffer center
3096				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, mappedSize, m_mapFlags);
3097			}
3098			endTime = deGetMicroseconds();
3099
3100			if (!mapPtr)
3101				throw tcu::Exception("MapBufferRange returned NULL");
3102
3103			result.duration.mapDuration = endTime - startTime;
3104		}
3105
3106		// Write
3107		{
3108			if (!m_flushPartial)
3109				result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
3110			else
3111				result.duration.writeDuration = medianTimeMemcpy((deUint8*)mapPtr + bufferSize / 4, &m_zeroData[0], result.writtenSize);
3112		}
3113
3114		// Flush
3115		{
3116			deUint64	startTime;
3117			deUint64	endTime;
3118
3119			startTime = deGetMicroseconds();
3120
3121			if (m_flushPartial)
3122				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, mappedSize/4, mappedSize/2);
3123			else if (!m_flushInParts)
3124				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, mappedSize);
3125			else
3126			{
3127				const int p1 = 0;
3128				const int p2 = mappedSize / 3;
3129				const int p3 = mappedSize / 2;
3130				const int p4 = mappedSize * 2 / 4;
3131				const int p5 = mappedSize;
3132
3133				// flush in mixed order
3134				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p2,	p3-p2);
3135				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p1,	p2-p1);
3136				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p4,	p5-p4);
3137				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p3,	p4-p3);
3138			}
3139
3140			endTime = deGetMicroseconds();
3141
3142			result.duration.flushDuration = endTime - startTime;
3143		}
3144
3145		// Unmap
3146		{
3147			deUint64		startTime;
3148			deUint64		endTime;
3149			glw::GLboolean	unmapSuccessful;
3150
3151			startTime = deGetMicroseconds();
3152			unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
3153			endTime = deGetMicroseconds();
3154
3155			// if unmapping fails, just try again later
3156			if (!unmapSuccessful)
3157				throw UnmapFailureError();
3158
3159			result.duration.unmapDuration = endTime - startTime;
3160		}
3161
3162		result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.flushDuration + result.duration.unmapDuration + result.duration.allocDuration;
3163		result.duration.fitResponseDuration = result.duration.totalDuration;
3164	}
3165}
3166
3167template <typename SampleType>
3168class ModifyAfterBasicCase : public BasicBufferCase<SampleType>
3169{
3170public:
3171						ModifyAfterBasicCase	(Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest);
3172						~ModifyAfterBasicCase	(void);
3173
3174	void				init					(void);
3175	void				deinit					(void);
3176
3177protected:
3178	void				drawBufferRange			(int begin, int end);
3179
3180private:
3181	enum
3182	{
3183		NUM_SAMPLES = 20,
3184	};
3185
3186
3187	bool				runSample				(int iteration, UploadSampleResult<SampleType>& sample);
3188	bool				prepareAndRunTest		(int iteration, UploadSampleResult<SampleType>& result, int bufferSize);
3189	void				logAndSetTestResult		(const std::vector<UploadSampleResult<SampleType> >& results);
3190
3191	virtual void		testWithBufferSize		(UploadSampleResult<SampleType>& result, int bufferSize) = 0;
3192
3193	int					m_unmappingErrors;
3194
3195protected:
3196	const bool			m_bufferUnspecifiedAfterTest;
3197	const deUint32		m_bufferUsage;
3198	std::vector<deUint8> m_zeroData;
3199
3200	using BasicBufferCase<SampleType>::m_testCtx;
3201	using BasicBufferCase<SampleType>::m_context;
3202
3203	using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE;
3204	using BasicBufferCase<SampleType>::m_dummyProgram;
3205	using BasicBufferCase<SampleType>::m_dummyProgramPosLoc;
3206	using BasicBufferCase<SampleType>::m_bufferID;
3207	using BasicBufferCase<SampleType>::m_numSamples;
3208	using BasicBufferCase<SampleType>::m_bufferSizeMin;
3209	using BasicBufferCase<SampleType>::m_bufferSizeMax;
3210	using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
3211};
3212
3213template <typename SampleType>
3214ModifyAfterBasicCase<SampleType>::ModifyAfterBasicCase (Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest)
3215	: BasicBufferCase<SampleType>	(context, name, description, bufferSizeMin, bufferSizeMax, NUM_SAMPLES, 0)
3216	, m_unmappingErrors				(0)
3217	, m_bufferUnspecifiedAfterTest	(bufferUnspecifiedAfterTest)
3218	, m_bufferUsage					(usage)
3219	, m_zeroData					()
3220{
3221}
3222
3223template <typename SampleType>
3224ModifyAfterBasicCase<SampleType>::~ModifyAfterBasicCase (void)
3225{
3226	BasicBufferCase<SampleType>::deinit();
3227}
3228
3229template <typename SampleType>
3230void ModifyAfterBasicCase<SampleType>::init (void)
3231{
3232	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3233
3234	// init parent
3235
3236	BasicBufferCase<SampleType>::init();
3237
3238	// upload source
3239	m_zeroData.resize(m_bufferSizeMax, 0x00);
3240
3241	// log basic info
3242
3243	m_testCtx.getLog()
3244		<< tcu::TestLog::Message
3245		<< "Testing performance with " << (int)NUM_SAMPLES << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n"
3246		<< "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]."
3247		<< tcu::TestLog::EndMessage;
3248
3249	// log which transfer rate is the test result and buffer info
3250
3251	m_testCtx.getLog()
3252		<< tcu::TestLog::Message
3253		<< "Test result is the median transfer rate of the test samples.\n"
3254		<< "Buffer usage = " << glu::getUsageName(m_bufferUsage)
3255		<< tcu::TestLog::EndMessage;
3256
3257	// Set state for drawing so that we don't have to change these during the iteration
3258	{
3259		gl.useProgram(m_dummyProgram->getProgram());
3260		gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
3261		gl.enableVertexAttribArray(m_dummyProgramPosLoc);
3262	}
3263}
3264
3265template <typename SampleType>
3266void ModifyAfterBasicCase<SampleType>::deinit (void)
3267{
3268	m_zeroData.clear();
3269
3270	BasicBufferCase<SampleType>::deinit();
3271}
3272
3273template <typename SampleType>
3274void ModifyAfterBasicCase<SampleType>::drawBufferRange (int begin, int end)
3275{
3276	DE_ASSERT(begin % (int)sizeof(float[4]) == 0);
3277	DE_ASSERT(end % (int)sizeof(float[4]) == 0);
3278
3279	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3280
3281	// use given range
3282	gl.drawArrays(GL_POINTS, begin / (int)sizeof(float[4]), 1);
3283	gl.drawArrays(GL_POINTS, end / (int)sizeof(float[4]) - 1, 1);
3284}
3285
3286template <typename SampleType>
3287bool ModifyAfterBasicCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample)
3288{
3289	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
3290	const int				bufferSize			= sample.bufferSize;
3291	bool					testOk;
3292
3293	testOk = prepareAndRunTest(iteration, sample, bufferSize);
3294	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
3295
3296	if (!testOk)
3297	{
3298		const int unmapFailureThreshold = 4;
3299
3300		// only unmapping error can cause iteration failure
3301		if (++m_unmappingErrors >= unmapFailureThreshold)
3302			throw tcu::TestError("Too many unmapping errors, cannot continue.");
3303
3304		// just try again
3305		return false;
3306	}
3307
3308	return true;
3309}
3310
3311template <typename SampleType>
3312bool ModifyAfterBasicCase<SampleType>::prepareAndRunTest (int iteration, UploadSampleResult<SampleType>& result, int bufferSize)
3313{
3314	DE_UNREF(iteration);
3315
3316	DE_ASSERT(!m_bufferID);
3317	DE_ASSERT(deIsAligned32(bufferSize, 4*4)); // aligned to vec4
3318
3319	const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
3320	bool						testRunOk		= true;
3321	bool						unmappingFailed	= false;
3322
3323	// Upload initial buffer to the GPU...
3324	gl.genBuffers(1, &m_bufferID);
3325	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3326	gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3327
3328	// ...use it...
3329	gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
3330	drawBufferRange(0, bufferSize);
3331
3332	// ..and make sure it is uploaded
3333	BasicBufferCase<SampleType>::waitGLResults();
3334
3335	// warmup CPU before the test to make sure the power management governor
3336	// keeps us in the "high performance" mode
3337	{
3338		deYield();
3339		tcu::warmupCPU();
3340		deYield();
3341	}
3342
3343	// test
3344	try
3345	{
3346		// buffer is uploaded to the GPU. Draw from it.
3347		drawBufferRange(0, bufferSize);
3348
3349		// and test upload
3350		testWithBufferSize(result, bufferSize);
3351	}
3352	catch (UnmapFailureError&)
3353	{
3354		testRunOk = false;
3355		unmappingFailed = true;
3356	}
3357
3358	// clean up: make sure buffer is not in upload queue and delete it
3359
3360	// sourcing unspecified data causes undefined results, possibly program termination
3361	if (m_bufferUnspecifiedAfterTest || unmappingFailed)
3362		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3363
3364	drawBufferRange(0, bufferSize);
3365	BasicBufferCase<SampleType>::waitGLResults();
3366
3367	gl.deleteBuffers(1, &m_bufferID);
3368	m_bufferID = 0;
3369
3370	return testRunOk;
3371}
3372
3373template <typename SampleType>
3374void ModifyAfterBasicCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results)
3375{
3376	const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, false);
3377
3378	// Return median transfer rate of the samples
3379
3380	if (analysis.transferRateMedian == std::numeric_limits<float>::infinity())
3381	{
3382		// sample times are 1) invalid or 2) timer resolution too low
3383		// report speed 0 bytes / s since real value cannot be determined
3384		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
3385	}
3386	else
3387	{
3388		// report transfer rate in MB / s
3389		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(analysis.transferRateMedian / 1024.0f / 1024.0f, 2).c_str());
3390	}
3391}
3392
3393class ModifyAfterWithBufferDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3394{
3395public:
3396
3397	enum CaseFlags
3398	{
3399		FLAG_RESPECIFY_SIZE		= 0x1,
3400		FLAG_UPLOAD_REPEATED	= 0x2,
3401	};
3402
3403					ModifyAfterWithBufferDataCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags);
3404					~ModifyAfterWithBufferDataCase	(void);
3405
3406	void			init							(void);
3407	void			deinit							(void);
3408private:
3409	void			testWithBufferSize				(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
3410
3411	enum
3412	{
3413		NUM_REPEATS = 2
3414	};
3415
3416	const bool		m_respecifySize;
3417	const bool		m_repeatedUpload;
3418	const float		m_sizeDifferenceFactor;
3419};
3420
3421ModifyAfterWithBufferDataCase::ModifyAfterWithBufferDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags)
3422	: ModifyAfterBasicCase<SingleOperationDuration> (context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3423	, m_respecifySize								((flags & FLAG_RESPECIFY_SIZE) != 0)
3424	, m_repeatedUpload								((flags & FLAG_UPLOAD_REPEATED) != 0)
3425	, m_sizeDifferenceFactor						(1.3f)
3426{
3427	DE_ASSERT(!(m_repeatedUpload && m_respecifySize));
3428}
3429
3430ModifyAfterWithBufferDataCase::~ModifyAfterWithBufferDataCase (void)
3431{
3432	deinit();
3433}
3434
3435void ModifyAfterWithBufferDataCase::init (void)
3436{
3437	// Log the purpose of the test
3438
3439	if (m_repeatedUpload)
3440		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3441	else
3442		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage;
3443
3444	m_testCtx.getLog()
3445		<< tcu::TestLog::Message
3446		<< ((m_respecifySize) ?
3447			("Buffer size is increased and contents are modified with BufferData().\n") :
3448			("Buffer contents are modified with BufferData().\n"))
3449		<< tcu::TestLog::EndMessage;
3450
3451	// init parent
3452	ModifyAfterBasicCase<SingleOperationDuration>::init();
3453
3454	// make sure our zeroBuffer is large enough
3455	if (m_respecifySize)
3456	{
3457		const int largerBufferSize = deAlign32((int)(m_bufferSizeMax * m_sizeDifferenceFactor), 4*4);
3458		m_zeroData.resize(largerBufferSize, 0x00);
3459	}
3460}
3461
3462void ModifyAfterWithBufferDataCase::deinit (void)
3463{
3464	ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3465}
3466
3467void ModifyAfterWithBufferDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
3468{
3469	// always draw the same amount to make compares between cases sensible
3470	const int					drawStart			= deAlign32(bufferSize / 4, 4*4);
3471	const int					drawEnd				= deAlign32(bufferSize * 3 / 4, 4*4);
3472
3473	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3474	const int					largerBufferSize	= deAlign32((int)(bufferSize * m_sizeDifferenceFactor), 4*4);
3475	const int					newBufferSize		= (m_respecifySize) ? (largerBufferSize) : (bufferSize);
3476	deUint64					startTime;
3477	deUint64					endTime;
3478
3479	// repeat upload-draw
3480	if (m_repeatedUpload)
3481	{
3482		for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3483		{
3484			gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3485			drawBufferRange(drawStart, drawEnd);
3486		}
3487	}
3488
3489	// test upload
3490	startTime = deGetMicroseconds();
3491	gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3492	endTime = deGetMicroseconds();
3493
3494	result.duration.totalDuration = endTime - startTime;
3495	result.duration.fitResponseDuration = result.duration.totalDuration;
3496	result.writtenSize = newBufferSize;
3497}
3498
3499class ModifyAfterWithBufferSubDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3500{
3501public:
3502
3503	enum CaseFlags
3504	{
3505		FLAG_PARTIAL			= 0x1,
3506		FLAG_UPLOAD_REPEATED	= 0x2,
3507	};
3508
3509					ModifyAfterWithBufferSubDataCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags);
3510					~ModifyAfterWithBufferSubDataCase	(void);
3511
3512	void			init								(void);
3513	void			deinit								(void);
3514private:
3515	void			testWithBufferSize					(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
3516
3517	enum
3518	{
3519		NUM_REPEATS = 2
3520	};
3521
3522	const bool		m_partialUpload;
3523	const bool		m_repeatedUpload;
3524};
3525
3526ModifyAfterWithBufferSubDataCase::ModifyAfterWithBufferSubDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags)
3527	: ModifyAfterBasicCase<SingleOperationDuration>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3528	, m_partialUpload								((flags & FLAG_PARTIAL) != 0)
3529	, m_repeatedUpload								((flags & FLAG_UPLOAD_REPEATED) != 0)
3530{
3531}
3532
3533ModifyAfterWithBufferSubDataCase::~ModifyAfterWithBufferSubDataCase (void)
3534{
3535	deinit();
3536}
3537
3538void ModifyAfterWithBufferSubDataCase::init (void)
3539{
3540	// Log the purpose of the test
3541
3542	if (m_repeatedUpload)
3543		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3544	else
3545		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage;
3546
3547	m_testCtx.getLog()
3548		<< tcu::TestLog::Message
3549		<< ((m_partialUpload) ?
3550			("Half of the buffer contents are modified.\n") :
3551			("Buffer contents are fully respecified.\n"))
3552		<< tcu::TestLog::EndMessage;
3553
3554	ModifyAfterBasicCase<SingleOperationDuration>::init();
3555}
3556
3557void ModifyAfterWithBufferSubDataCase::deinit (void)
3558{
3559	ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3560}
3561
3562void ModifyAfterWithBufferSubDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
3563{
3564	// always draw the same amount to make compares between cases sensible
3565	const int					drawStart			= deAlign32(bufferSize / 4, 4*4);
3566	const int					drawEnd				= deAlign32(bufferSize * 3 / 4, 4*4);
3567
3568	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3569	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3570	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3571	deUint64					startTime;
3572	deUint64					endTime;
3573
3574	// make upload-draw stream
3575	if (m_repeatedUpload)
3576	{
3577		for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3578		{
3579			gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
3580			drawBufferRange(drawStart, drawEnd);
3581		}
3582	}
3583
3584	// test upload
3585	startTime = deGetMicroseconds();
3586	gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
3587	endTime = deGetMicroseconds();
3588
3589	result.duration.totalDuration = endTime - startTime;
3590	result.duration.fitResponseDuration = result.duration.totalDuration;
3591	result.writtenSize = subdataSize;
3592}
3593
3594class ModifyAfterWithMapBufferRangeCase : public ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>
3595{
3596public:
3597
3598	enum CaseFlags
3599	{
3600		FLAG_PARTIAL = 0x1,
3601	};
3602
3603					ModifyAfterWithMapBufferRangeCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags);
3604					~ModifyAfterWithMapBufferRangeCase	(void);
3605
3606	void			init								(void);
3607	void			deinit								(void);
3608private:
3609	static bool		isBufferUnspecifiedAfterUpload		(int flags, deUint32 mapFlags);
3610	void			testWithBufferSize					(UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize);
3611
3612	const bool		m_partialUpload;
3613	const deUint32	m_mapFlags;
3614};
3615
3616ModifyAfterWithMapBufferRangeCase::ModifyAfterWithMapBufferRangeCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags)
3617	: ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags))
3618	, m_partialUpload										((flags & FLAG_PARTIAL) != 0)
3619	, m_mapFlags											(glMapFlags)
3620{
3621}
3622
3623ModifyAfterWithMapBufferRangeCase::~ModifyAfterWithMapBufferRangeCase (void)
3624{
3625	deinit();
3626}
3627
3628void ModifyAfterWithMapBufferRangeCase::init (void)
3629{
3630	// Log the purpose of the test
3631
3632	m_testCtx.getLog()
3633		<< tcu::TestLog::Message
3634		<< "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n"
3635		<< ((m_partialUpload) ?
3636			("Half of the buffer is mapped.\n") :
3637			("Whole buffer is mapped.\n"))
3638		<< "Map bits:\n"
3639		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3640		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3641		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3642		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3643		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3644		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
3645		<< tcu::TestLog::EndMessage;
3646
3647	ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::init();
3648}
3649
3650void ModifyAfterWithMapBufferRangeCase::deinit (void)
3651{
3652	ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::deinit();
3653}
3654
3655bool ModifyAfterWithMapBufferRangeCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags)
3656{
3657	if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
3658		return true;
3659
3660	return false;
3661}
3662
3663void ModifyAfterWithMapBufferRangeCase::testWithBufferSize (UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize)
3664{
3665	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3666	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3667	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3668	void*						mapPtr;
3669
3670	// map
3671	{
3672		deUint64 startTime;
3673		deUint64 endTime;
3674
3675		startTime = deGetMicroseconds();
3676		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
3677		endTime = deGetMicroseconds();
3678
3679		if (!mapPtr)
3680			throw tcu::TestError("mapBufferRange returned null");
3681
3682		result.duration.mapDuration = endTime - startTime;
3683	}
3684
3685	// write
3686	{
3687		result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
3688	}
3689
3690	// unmap
3691	{
3692		deUint64		startTime;
3693		deUint64		endTime;
3694		glw::GLboolean	unmapSucceeded;
3695
3696		startTime = deGetMicroseconds();
3697		unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
3698		endTime = deGetMicroseconds();
3699
3700		if (unmapSucceeded != GL_TRUE)
3701			throw UnmapFailureError();
3702
3703		result.duration.unmapDuration = endTime - startTime;
3704	}
3705
3706	result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration;
3707	result.duration.fitResponseDuration = result.duration.totalDuration;
3708	result.writtenSize = subdataSize;
3709}
3710
3711class ModifyAfterWithMapBufferFlushCase : public ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>
3712{
3713public:
3714
3715	enum CaseFlags
3716	{
3717		FLAG_PARTIAL = 0x1,
3718	};
3719
3720					ModifyAfterWithMapBufferFlushCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags);
3721					~ModifyAfterWithMapBufferFlushCase	(void);
3722
3723	void			init								(void);
3724	void			deinit								(void);
3725private:
3726	static bool		isBufferUnspecifiedAfterUpload		(int flags, deUint32 mapFlags);
3727	void			testWithBufferSize					(UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize);
3728
3729	const bool		m_partialUpload;
3730	const deUint32	m_mapFlags;
3731};
3732
3733ModifyAfterWithMapBufferFlushCase::ModifyAfterWithMapBufferFlushCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags)
3734	: ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags))
3735	, m_partialUpload											((flags & FLAG_PARTIAL) != 0)
3736	, m_mapFlags												(glMapFlags)
3737{
3738}
3739
3740ModifyAfterWithMapBufferFlushCase::~ModifyAfterWithMapBufferFlushCase (void)
3741{
3742	deinit();
3743}
3744
3745void ModifyAfterWithMapBufferFlushCase::init (void)
3746{
3747	// Log the purpose of the test
3748
3749	m_testCtx.getLog()
3750		<< tcu::TestLog::Message
3751		<< "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n"
3752		<< ((m_partialUpload) ?
3753			("Half of the buffer is mapped.\n") :
3754			("Whole buffer is mapped.\n"))
3755		<< "Map bits:\n"
3756		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3757		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3758		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3759		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3760		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3761		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
3762		<< tcu::TestLog::EndMessage;
3763
3764	ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::init();
3765}
3766
3767void ModifyAfterWithMapBufferFlushCase::deinit (void)
3768{
3769	ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::deinit();
3770}
3771
3772bool ModifyAfterWithMapBufferFlushCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags)
3773{
3774	if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
3775		return true;
3776
3777	return false;
3778}
3779
3780void ModifyAfterWithMapBufferFlushCase::testWithBufferSize (UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize)
3781{
3782	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3783	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3784	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3785	void*						mapPtr;
3786
3787	// map
3788	{
3789		deUint64 startTime;
3790		deUint64 endTime;
3791
3792		startTime = deGetMicroseconds();
3793		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
3794		endTime = deGetMicroseconds();
3795
3796		if (!mapPtr)
3797			throw tcu::TestError("mapBufferRange returned null");
3798
3799		result.duration.mapDuration = endTime - startTime;
3800	}
3801
3802	// write
3803	{
3804		result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
3805	}
3806
3807	// flush
3808	{
3809		deUint64 startTime;
3810		deUint64 endTime;
3811
3812		startTime = deGetMicroseconds();
3813		gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, subdataSize);
3814		endTime = deGetMicroseconds();
3815
3816		result.duration.flushDuration = endTime - startTime;
3817	}
3818
3819	// unmap
3820	{
3821		deUint64		startTime;
3822		deUint64		endTime;
3823		glw::GLboolean	unmapSucceeded;
3824
3825		startTime = deGetMicroseconds();
3826		unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
3827		endTime = deGetMicroseconds();
3828
3829		if (unmapSucceeded != GL_TRUE)
3830			throw UnmapFailureError();
3831
3832		result.duration.unmapDuration = endTime - startTime;
3833	}
3834
3835	result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.flushDuration;
3836	result.duration.fitResponseDuration = result.duration.totalDuration;
3837	result.writtenSize = subdataSize;
3838}
3839
3840enum DrawMethod
3841{
3842	DRAWMETHOD_DRAW_ARRAYS = 0,
3843	DRAWMETHOD_DRAW_ELEMENTS,
3844
3845	DRAWMETHOD_LAST
3846};
3847
3848enum TargetBuffer
3849{
3850	TARGETBUFFER_VERTEX = 0,
3851	TARGETBUFFER_INDEX,
3852
3853	TARGETBUFFER_LAST
3854};
3855
3856enum BufferState
3857{
3858	BUFFERSTATE_NEW = 0,
3859	BUFFERSTATE_EXISTING,
3860
3861	BUFFERSTATE_LAST
3862};
3863
3864enum UploadMethod
3865{
3866	UPLOADMETHOD_BUFFER_DATA = 0,
3867	UPLOADMETHOD_BUFFER_SUB_DATA,
3868	UPLOADMETHOD_MAP_BUFFER_RANGE,
3869
3870	UPLOADMETHOD_LAST
3871};
3872
3873enum UnrelatedBufferType
3874{
3875	UNRELATEDBUFFERTYPE_NONE = 0,
3876	UNRELATEDBUFFERTYPE_VERTEX,
3877
3878	UNRELATEDBUFFERTYPE_LAST
3879};
3880
3881enum UploadRange
3882{
3883	UPLOADRANGE_FULL = 0,
3884	UPLOADRANGE_PARTIAL,
3885
3886	UPLOADRANGE_LAST
3887};
3888
3889struct LayeredGridSpec
3890{
3891	int gridWidth;
3892	int gridHeight;
3893	int gridLayers;
3894};
3895
3896static int getLayeredGridNumVertices (const LayeredGridSpec& scene)
3897{
3898	return scene.gridWidth * scene.gridHeight * scene.gridLayers * 6;
3899}
3900
3901static void generateLayeredGridVertexAttribData4C4V (std::vector<tcu::Vec4>& vertexData, const LayeredGridSpec& scene)
3902{
3903	// interleave color & vertex data
3904	const tcu::Vec4 green	(0.0f, 1.0f, 0.0f, 0.7f);
3905	const tcu::Vec4 yellow	(1.0f, 1.0f, 0.0f, 0.8f);
3906
3907	vertexData.resize(getLayeredGridNumVertices(scene) * 2);
3908
3909	for (int cellY = 0; cellY < scene.gridHeight; ++cellY)
3910	for (int cellX = 0; cellX < scene.gridWidth; ++cellX)
3911	for (int cellZ = 0; cellZ < scene.gridLayers; ++cellZ)
3912	{
3913		const tcu::Vec4	color		= (((cellX + cellY + cellZ) % 2) == 0) ? (green) : (yellow);
3914		const float		cellLeft	= (float(cellX  ) / scene.gridWidth  - 0.5f) * 2.0f;
3915		const float		cellRight	= (float(cellX+1) / scene.gridWidth  - 0.5f) * 2.0f;
3916		const float		cellTop		= (float(cellY+1) / scene.gridHeight - 0.5f) * 2.0f;
3917		const float		cellBottom	= (float(cellY  ) / scene.gridHeight - 0.5f) * 2.0f;
3918
3919		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  0] = color;
3920		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  1] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
3921
3922		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  2] = color;
3923		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  3] = tcu::Vec4(cellLeft, cellBottom, 0.0f, 1.0f);
3924
3925		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  4] = color;
3926		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  5] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
3927
3928		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  6] = color;
3929		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  7] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
3930
3931		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  8] = color;
3932		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  9] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
3933
3934		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 10] = color;
3935		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 11] = tcu::Vec4(cellRight, cellTop, 0.0f, 1.0f);
3936	}
3937}
3938
3939static void generateLayeredGridIndexData (std::vector<deUint32>& indexData, const LayeredGridSpec& scene)
3940{
3941	indexData.resize(getLayeredGridNumVertices(scene) * 2);
3942
3943	for (int ndx = 0; ndx < scene.gridLayers * scene.gridHeight * scene.gridWidth * 6; ++ndx)
3944		indexData[ndx] = ndx;
3945}
3946
3947class RenderPerformanceTestBase : public TestCase
3948{
3949public:
3950							RenderPerformanceTestBase	(Context& context, const char* name, const char* description);
3951							~RenderPerformanceTestBase	(void);
3952
3953protected:
3954	void					init						(void);
3955	void					deinit						(void);
3956
3957	void					waitGLResults				(void) const;
3958	void					setupVertexAttribs			(void) const;
3959
3960	enum
3961	{
3962		RENDER_AREA_SIZE = 128
3963	};
3964
3965private:
3966	glu::ShaderProgram*		m_renderProgram;
3967	int						m_colorLoc;
3968	int						m_positionLoc;
3969};
3970
3971RenderPerformanceTestBase::RenderPerformanceTestBase (Context& context, const char* name, const char* description)
3972	: TestCase			(context, tcu::NODETYPE_PERFORMANCE, name, description)
3973	, m_renderProgram	(DE_NULL)
3974	, m_colorLoc		(0)
3975	, m_positionLoc		(0)
3976{
3977}
3978
3979RenderPerformanceTestBase::~RenderPerformanceTestBase (void)
3980{
3981	deinit();
3982}
3983
3984void RenderPerformanceTestBase::init (void)
3985{
3986	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3987
3988	m_renderProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_colorVertexShader) << glu::FragmentSource(s_colorFragmentShader));
3989	if (!m_renderProgram->isOk())
3990	{
3991		m_testCtx.getLog() << *m_renderProgram;
3992		throw tcu::TestError("could not build program");
3993	}
3994
3995	m_colorLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_color");
3996	m_positionLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_position");
3997
3998	if (m_colorLoc == -1)
3999		throw tcu::TestError("Location of attribute a_color was -1");
4000	if (m_positionLoc == -1)
4001		throw tcu::TestError("Location of attribute a_position was -1");
4002}
4003
4004void RenderPerformanceTestBase::deinit (void)
4005{
4006	delete m_renderProgram;
4007	m_renderProgram = DE_NULL;
4008}
4009
4010void RenderPerformanceTestBase::setupVertexAttribs (void) const
4011{
4012	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
4013
4014	// buffers are bound
4015
4016	gl.enableVertexAttribArray(m_colorLoc);
4017	gl.enableVertexAttribArray(m_positionLoc);
4018
4019	gl.vertexAttribPointer(m_colorLoc,    4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), (const tcu::Vec4*)DE_NULL + 0);
4020	gl.vertexAttribPointer(m_positionLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), (const tcu::Vec4*)DE_NULL + 1);
4021
4022	gl.useProgram(m_renderProgram->getProgram());
4023
4024	GLU_EXPECT_NO_ERROR(gl.getError(), "set up rendering");
4025}
4026
4027void RenderPerformanceTestBase::waitGLResults (void) const
4028{
4029	tcu::Surface dummySurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4030	glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess());
4031}
4032
4033template <typename SampleType>
4034class RenderCase : public RenderPerformanceTestBase
4035{
4036public:
4037									RenderCase						(Context& context, const char* name, const char* description, DrawMethod drawMethod);
4038									~RenderCase						(void);
4039
4040protected:
4041	void							init							(void);
4042	void							deinit							(void);
4043
4044private:
4045	IterateResult					iterate							(void);
4046
4047protected:
4048	struct SampleResult
4049	{
4050		LayeredGridSpec					scene;
4051		RenderSampleResult<SampleType>	result;
4052	};
4053
4054	int								getMinWorkloadSize				(void) const;
4055	int								getMaxWorkloadSize				(void) const;
4056	int								getMinWorkloadDataSize			(void) const;
4057	int								getMaxWorkloadDataSize			(void) const;
4058	int								getVertexDataSize				(void) const;
4059	int								getNumSamples					(void) const;
4060	void							uploadScene						(const LayeredGridSpec& scene);
4061
4062	virtual void					runSample						(SampleResult& sample) = 0;
4063	virtual void					logAndSetTestResult				(const std::vector<SampleResult>& results);
4064
4065	void							mapResultsToRenderRateFormat	(std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const;
4066
4067	const DrawMethod				m_drawMethod;
4068
4069private:
4070	glw::GLuint						m_attributeBufferID;
4071	glw::GLuint						m_indexBufferID;
4072	int								m_iterationNdx;
4073	std::vector<int>				m_iterationOrder;
4074	std::vector<SampleResult>		m_results;
4075	int								m_numUnmapFailures;
4076};
4077
4078template <typename SampleType>
4079RenderCase<SampleType>::RenderCase (Context& context, const char* name, const char* description, DrawMethod drawMethod)
4080	: RenderPerformanceTestBase	(context, name, description)
4081	, m_drawMethod				(drawMethod)
4082	, m_attributeBufferID		(0)
4083	, m_indexBufferID			(0)
4084	, m_iterationNdx			(0)
4085	, m_numUnmapFailures		(0)
4086{
4087	DE_ASSERT(drawMethod < DRAWMETHOD_LAST);
4088}
4089
4090template <typename SampleType>
4091RenderCase<SampleType>::~RenderCase (void)
4092{
4093	deinit();
4094}
4095
4096template <typename SampleType>
4097void RenderCase<SampleType>::init (void)
4098{
4099	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
4100
4101	RenderPerformanceTestBase::init();
4102
4103	// requirements
4104
4105	if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
4106		m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
4107		throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target");
4108
4109	// gl state
4110
4111	gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4112
4113	// enable bleding to prevent grid layers from being discarded
4114	gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
4115	gl.blendEquation(GL_FUNC_ADD);
4116	gl.enable(GL_BLEND);
4117
4118	// generate iterations
4119
4120	{
4121		const int gridSizes[] = { 20, 26, 32, 38, 44, 50, 56, 62, 68, 74, 80,  86,  92,  98,  104, 110, 116, 122, 128 };
4122
4123		for (int gridNdx = 0; gridNdx < DE_LENGTH_OF_ARRAY(gridSizes); ++gridNdx)
4124		{
4125			m_results.push_back(SampleResult());
4126
4127			m_results.back().scene.gridHeight = gridSizes[gridNdx];
4128			m_results.back().scene.gridWidth = gridSizes[gridNdx];
4129			m_results.back().scene.gridLayers = 5;
4130
4131			m_results.back().result.numVertices = getLayeredGridNumVertices(m_results.back().scene);
4132
4133			// test cases set these, initialize to dummy values
4134			m_results.back().result.renderDataSize = -1;
4135			m_results.back().result.uploadedDataSize = -1;
4136			m_results.back().result.unrelatedDataSize = -1;
4137		}
4138	}
4139
4140	// randomize iteration order
4141	{
4142		m_iterationOrder.resize(m_results.size());
4143		generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_iterationOrder.size());
4144	}
4145}
4146
4147template <typename SampleType>
4148void RenderCase<SampleType>::deinit (void)
4149{
4150	RenderPerformanceTestBase::deinit();
4151
4152	if (m_attributeBufferID)
4153	{
4154		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_attributeBufferID);
4155		m_attributeBufferID = 0;
4156	}
4157
4158	if (m_indexBufferID)
4159	{
4160		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBufferID);
4161		m_indexBufferID = 0;
4162	}
4163}
4164
4165template <typename SampleType>
4166typename RenderCase<SampleType>::IterateResult RenderCase<SampleType>::iterate (void)
4167{
4168	const int		unmapFailureThreshold	= 3;
4169	const int		currentIteration		= m_iterationNdx;
4170	const int		currentConfigNdx		= m_iterationOrder[currentIteration];
4171	SampleResult&	currentSample			= m_results[currentConfigNdx];
4172
4173	try
4174	{
4175		runSample(currentSample);
4176		++m_iterationNdx;
4177	}
4178	catch (const UnmapFailureError& ex)
4179	{
4180		DE_UNREF(ex);
4181		++m_numUnmapFailures;
4182	}
4183
4184	if (m_numUnmapFailures > unmapFailureThreshold)
4185		throw tcu::TestError("Got too many unmap errors");
4186
4187	if (m_iterationNdx < (int)m_iterationOrder.size())
4188		return CONTINUE;
4189
4190	logAndSetTestResult(m_results);
4191	return STOP;
4192}
4193
4194template <typename SampleType>
4195int RenderCase<SampleType>::getMinWorkloadSize (void) const
4196{
4197	int result = getLayeredGridNumVertices(m_results[0].scene);
4198
4199	for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4200	{
4201		const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4202		result = de::min(result, workloadSize);
4203	}
4204
4205	return result;
4206}
4207
4208template <typename SampleType>
4209int RenderCase<SampleType>::getMaxWorkloadSize (void) const
4210{
4211	int result = getLayeredGridNumVertices(m_results[0].scene);
4212
4213	for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4214	{
4215		const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4216		result = de::max(result, workloadSize);
4217	}
4218
4219	return result;
4220}
4221
4222template <typename SampleType>
4223int RenderCase<SampleType>::getMinWorkloadDataSize (void) const
4224{
4225	return getMinWorkloadSize() * getVertexDataSize();
4226}
4227
4228template <typename SampleType>
4229int RenderCase<SampleType>::getMaxWorkloadDataSize (void) const
4230{
4231	return getMaxWorkloadSize() * getVertexDataSize();
4232}
4233
4234template <typename SampleType>
4235int RenderCase<SampleType>::getVertexDataSize (void) const
4236{
4237	const int numVectors	= 2;
4238	const int vec4Size		= 4 * sizeof(float);
4239
4240	return numVectors * vec4Size;
4241}
4242
4243template <typename SampleType>
4244int RenderCase<SampleType>::getNumSamples (void) const
4245{
4246	return (int)m_results.size();
4247}
4248
4249template <typename SampleType>
4250void RenderCase<SampleType>::uploadScene (const LayeredGridSpec& scene)
4251{
4252	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
4253
4254	// vertex buffer
4255	{
4256		std::vector<tcu::Vec4> vertexData;
4257
4258		generateLayeredGridVertexAttribData4C4V(vertexData, scene);
4259
4260		if (m_attributeBufferID == 0)
4261			gl.genBuffers(1, &m_attributeBufferID);
4262		gl.bindBuffer(GL_ARRAY_BUFFER, m_attributeBufferID);
4263		gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4264	}
4265
4266	// index buffer
4267	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4268	{
4269		std::vector<deUint32> indexData;
4270
4271		generateLayeredGridIndexData(indexData, scene);
4272
4273		if (m_indexBufferID == 0)
4274			gl.genBuffers(1, &m_indexBufferID);
4275		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBufferID);
4276		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4277	}
4278
4279	GLU_EXPECT_NO_ERROR(gl.getError(), "create buffers");
4280}
4281
4282template <typename SampleType>
4283void RenderCase<SampleType>::logAndSetTestResult (const std::vector<SampleResult>& results)
4284{
4285	std::vector<RenderSampleResult<SampleType> > mappedResults;
4286
4287	mapResultsToRenderRateFormat(mappedResults, results);
4288
4289	{
4290		const RenderSampleAnalyzeResult	analysis	= analyzeSampleResults(m_testCtx.getLog(), mappedResults);
4291		const float						rate		= analysis.renderRateAtRange;
4292
4293		if (rate == std::numeric_limits<float>::infinity())
4294		{
4295			// sample times are 1) invalid or 2) timer resolution too low
4296			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
4297		}
4298		else
4299		{
4300			// report transfer rate in millions of MiB/s
4301			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
4302		}
4303	}
4304}
4305
4306template <typename SampleType>
4307void RenderCase<SampleType>::mapResultsToRenderRateFormat (std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const
4308{
4309	dst.resize(src.size());
4310
4311	for (int ndx = 0; ndx < (int)src.size(); ++ndx)
4312		dst[ndx] = src[ndx].result;
4313}
4314
4315class ReferenceRenderTimeCase : public RenderCase<RenderReadDuration>
4316{
4317public:
4318			ReferenceRenderTimeCase		(Context& context, const char* name, const char* description, DrawMethod drawMethod);
4319
4320private:
4321	void	init						(void);
4322	void	runSample					(SampleResult& sample);
4323};
4324
4325ReferenceRenderTimeCase::ReferenceRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod)
4326	: RenderCase<RenderReadDuration>	(context, name, description, drawMethod)
4327{
4328}
4329
4330void ReferenceRenderTimeCase::init (void)
4331{
4332	const char* const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4333
4334	// init parent
4335	RenderCase<RenderReadDuration>::init();
4336
4337	// log
4338	m_testCtx.getLog()
4339		<< tcu::TestLog::Message
4340		<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4341		<< getNumSamples() << " test samples. Sample order is randomized.\n"
4342		<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4343		<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4344		<< "Workload sizes are in the range ["
4345			<< getMinWorkloadSize() << ",  "
4346			<< getMaxWorkloadSize() << "] vertices (["
4347			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4348			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4349		<< "Test result is the approximated total processing rate in MiB / s.\n"
4350		<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4351		<< "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results."
4352		<< tcu::TestLog::EndMessage;
4353}
4354
4355void ReferenceRenderTimeCase::runSample (SampleResult& sample)
4356{
4357	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
4358	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4359	const int				numVertices		= getLayeredGridNumVertices(sample.scene);
4360	const glu::Buffer		arrayBuffer		(m_context.getRenderContext());
4361	const glu::Buffer		indexBuffer		(m_context.getRenderContext());
4362	const glu::Buffer		unrelatedBuffer	(m_context.getRenderContext());
4363	std::vector<tcu::Vec4>	vertexData;
4364	std::vector<deUint32>	indexData;
4365	deUint64				startTime;
4366	deUint64				endTime;
4367
4368	// generate and upload buffers
4369
4370	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4371	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4372	gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4373
4374	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4375	{
4376		generateLayeredGridIndexData(indexData, sample.scene);
4377		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4378		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4379	}
4380
4381	setupVertexAttribs();
4382
4383	// make sure data is uploaded
4384
4385	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4386		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4387	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4388		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4389	else
4390		DE_ASSERT(false);
4391	waitGLResults();
4392
4393	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4394	gl.clear(GL_COLOR_BUFFER_BIT);
4395	waitGLResults();
4396
4397	tcu::warmupCPU();
4398
4399	// Measure both draw and associated readpixels
4400	{
4401		startTime = deGetMicroseconds();
4402
4403		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4404			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4405		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4406			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4407		else
4408			DE_ASSERT(false);
4409
4410		endTime = deGetMicroseconds();
4411
4412		sample.result.duration.renderDuration = endTime - startTime;
4413	}
4414
4415	{
4416		startTime = deGetMicroseconds();
4417		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4418		endTime = deGetMicroseconds();
4419
4420		sample.result.duration.readDuration = endTime - startTime;
4421	}
4422
4423	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
4424	sample.result.uploadedDataSize = 0;
4425	sample.result.unrelatedDataSize = 0;
4426	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4427	sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4428	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
4429}
4430
4431class UnrelatedUploadRenderTimeCase : public RenderCase<UnrelatedUploadRenderReadDuration>
4432{
4433public:
4434									UnrelatedUploadRenderTimeCase	(Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod);
4435
4436private:
4437	void							init							(void);
4438	void							runSample						(SampleResult& sample);
4439
4440	const UploadMethod				m_unrelatedUploadMethod;
4441};
4442
4443UnrelatedUploadRenderTimeCase::UnrelatedUploadRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod)
4444	: RenderCase<UnrelatedUploadRenderReadDuration>	(context, name, description, drawMethod)
4445	, m_unrelatedUploadMethod						(unrelatedUploadMethod)
4446{
4447	DE_ASSERT(m_unrelatedUploadMethod < UPLOADMETHOD_LAST);
4448}
4449
4450void UnrelatedUploadRenderTimeCase::init (void)
4451{
4452	const char* const	targetFunctionName	= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4453	tcu::MessageBuilder	message				(&m_testCtx.getLog());
4454
4455	// init parent
4456	RenderCase<UnrelatedUploadRenderReadDuration>::init();
4457
4458	// log
4459
4460	message
4461		<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4462		<< "Uploading an unrelated buffer just before issuing the rendering command with "
4463			<< ((m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_DATA)		? ("bufferData")		:
4464				(m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_SUB_DATA)	? ("bufferSubData")		:
4465				(m_unrelatedUploadMethod != UPLOADMETHOD_MAP_BUFFER_RANGE)	? ("mapBufferRange")	:
4466				((const char*)DE_NULL))
4467			<< ".\n"
4468		<< getNumSamples() << " test samples. Sample order is randomized.\n"
4469		<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4470		<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4471		<< "Workload sizes are in the range ["
4472			<< getMinWorkloadSize() << ",  "
4473			<< getMaxWorkloadSize() << "] vertices (["
4474			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4475			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4476		<< "Unrelated upload sizes are in the range ["
4477			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ", "
4478			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "]\n"
4479		<< "Test result is the approximated total processing rate in MiB / s.\n"
4480		<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4481		<< "Note that the data size and the time used in the unrelated upload is not included in the results.\n"
4482		<< "Note! Test result may not be useful as is but instead should be compared against the reference.* group and upload_and_draw.*_and_unrelated_upload group results.\n"
4483		<< tcu::TestLog::EndMessage;
4484}
4485
4486void UnrelatedUploadRenderTimeCase::runSample (SampleResult& sample)
4487{
4488	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
4489	tcu::Surface			resultSurface		(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4490	const int				numVertices			= getLayeredGridNumVertices(sample.scene);
4491	const glu::Buffer		arrayBuffer			(m_context.getRenderContext());
4492	const glu::Buffer		indexBuffer			(m_context.getRenderContext());
4493	const glu::Buffer		unrelatedBuffer		(m_context.getRenderContext());
4494	int						unrelatedUploadSize	= -1;
4495	int						renderUploadSize;
4496	std::vector<tcu::Vec4>	vertexData;
4497	std::vector<deUint32>	indexData;
4498	deUint64				startTime;
4499	deUint64				endTime;
4500
4501	// generate and upload buffers
4502
4503	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4504	renderUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4505
4506	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4507	gl.bufferData(GL_ARRAY_BUFFER, renderUploadSize, &vertexData[0], GL_STATIC_DRAW);
4508
4509	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4510	{
4511		generateLayeredGridIndexData(indexData, sample.scene);
4512		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4513		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4514	}
4515
4516	setupVertexAttribs();
4517
4518	// make sure data is uploaded
4519
4520	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4521		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4522	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4523		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4524	else
4525		DE_ASSERT(false);
4526	waitGLResults();
4527
4528	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4529	gl.clear(GL_COLOR_BUFFER_BIT);
4530	waitGLResults();
4531
4532	tcu::warmupCPU();
4533
4534	// Unrelated upload
4535	if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_DATA)
4536	{
4537		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4538
4539		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4540		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
4541	}
4542	else if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
4543	{
4544		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4545
4546		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4547		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
4548		gl.bufferSubData(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, &vertexData[0]);
4549	}
4550	else if (m_unrelatedUploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
4551	{
4552		void*			mapPtr;
4553		glw::GLboolean	unmapSuccessful;
4554
4555		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4556
4557		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4558		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
4559
4560		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
4561		if (!mapPtr)
4562			throw tcu::Exception("MapBufferRange returned NULL");
4563
4564		deMemcpy(mapPtr, &vertexData[0], unrelatedUploadSize);
4565
4566		// if unmapping fails, just try again later
4567		unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
4568		if (!unmapSuccessful)
4569			throw UnmapFailureError();
4570	}
4571	else
4572		DE_ASSERT(false);
4573
4574	DE_ASSERT(unrelatedUploadSize != -1);
4575
4576	// Measure both draw and associated readpixels
4577	{
4578		startTime = deGetMicroseconds();
4579
4580		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4581			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4582		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4583			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4584		else
4585			DE_ASSERT(false);
4586
4587		endTime = deGetMicroseconds();
4588
4589		sample.result.duration.renderDuration = endTime - startTime;
4590	}
4591
4592	{
4593		startTime = deGetMicroseconds();
4594		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4595		endTime = deGetMicroseconds();
4596
4597		sample.result.duration.readDuration = endTime - startTime;
4598	}
4599
4600	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
4601	sample.result.uploadedDataSize = renderUploadSize;
4602	sample.result.unrelatedDataSize = unrelatedUploadSize;
4603	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4604	sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4605	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
4606}
4607
4608class ReferenceReadPixelsTimeCase : public TestCase
4609{
4610public:
4611					ReferenceReadPixelsTimeCase		(Context& context, const char* name, const char* description);
4612
4613private:
4614	void			init							(void);
4615	IterateResult	iterate							(void);
4616	void			logAndSetTestResult				(void);
4617
4618	enum
4619	{
4620		RENDER_AREA_SIZE = 128
4621	};
4622
4623	const int			m_numSamples;
4624	int					m_sampleNdx;
4625	std::vector<int>	m_samples;
4626};
4627
4628ReferenceReadPixelsTimeCase::ReferenceReadPixelsTimeCase (Context& context, const char* name, const char* description)
4629	: TestCase		(context, tcu::NODETYPE_PERFORMANCE, name, description)
4630	, m_numSamples	(20)
4631	, m_sampleNdx	(0)
4632	, m_samples		(m_numSamples)
4633{
4634}
4635
4636void ReferenceReadPixelsTimeCase::init (void)
4637{
4638	m_testCtx.getLog()
4639		<< tcu::TestLog::Message
4640		<< "Measuring the time used in a single readPixels call with " << m_numSamples << " test samples.\n"
4641		<< "Test result is the median of the samples in microseconds.\n"
4642		<< "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results."
4643		<< tcu::TestLog::EndMessage;
4644}
4645
4646ReferenceReadPixelsTimeCase::IterateResult ReferenceReadPixelsTimeCase::iterate (void)
4647{
4648	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
4649	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4650	deUint64				startTime;
4651	deUint64				endTime;
4652
4653	deYield();
4654	tcu::warmupCPU();
4655	deYield();
4656
4657	// "Render" something and wait for it
4658	gl.clearColor(0.0f, 1.0f, m_sampleNdx / float(m_numSamples), 1.0f);
4659	gl.clear(GL_COLOR_BUFFER_BIT);
4660
4661	// wait for results
4662	glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4663
4664	// measure time used in readPixels
4665	startTime = deGetMicroseconds();
4666	glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4667	endTime = deGetMicroseconds();
4668
4669	m_samples[m_sampleNdx] = (int)(endTime - startTime);
4670
4671	if (++m_sampleNdx < m_numSamples)
4672		return CONTINUE;
4673
4674	logAndSetTestResult();
4675	return STOP;
4676}
4677
4678void ReferenceReadPixelsTimeCase::logAndSetTestResult (void)
4679{
4680	// Log sample list
4681	{
4682		m_testCtx.getLog()
4683			<< tcu::TestLog::SampleList("Samples", "Samples")
4684			<< tcu::TestLog::SampleInfo
4685			<< tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
4686			<< tcu::TestLog::EndSampleInfo;
4687
4688		for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
4689			m_testCtx.getLog()
4690				<< tcu::TestLog::Sample
4691				<< m_samples[sampleNdx]
4692				<< tcu::TestLog::EndSample;
4693
4694		m_testCtx.getLog() << tcu::TestLog::EndSampleList;
4695	}
4696
4697	// Log median
4698	{
4699		float median;
4700		float limit60Low;
4701		float limit60Up;
4702
4703		std::sort(m_samples.begin(), m_samples.end());
4704		median		= linearSample(m_samples, 0.5f);
4705		limit60Low	= linearSample(m_samples, 0.2f);
4706		limit60Up	= linearSample(m_samples, 0.8f);
4707
4708		m_testCtx.getLog()
4709			<< tcu::TestLog::Float("Median", "Median", "us", QP_KEY_TAG_TIME, median)
4710			<< tcu::TestLog::Message
4711			<< "60 % of samples within range:\n"
4712			<< tcu::TestLog::EndMessage
4713			<< tcu::TestLog::Float("Low60Range", "Lower", "us", QP_KEY_TAG_TIME, limit60Low)
4714			<< tcu::TestLog::Float("High60Range", "Upper", "us", QP_KEY_TAG_TIME, limit60Up);
4715
4716		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(median, 2).c_str());
4717	}
4718}
4719
4720template <typename SampleType>
4721class GenericUploadRenderTimeCase : public RenderCase<SampleType>
4722{
4723public:
4724	typedef typename RenderCase<SampleType>::SampleResult SampleResult;
4725
4726							GenericUploadRenderTimeCase	(Context&				context,
4727														 const char*			name,
4728														 const char*			description,
4729														 DrawMethod				method,
4730														 TargetBuffer			targetBuffer,
4731														 UploadMethod			uploadMethod,
4732														 BufferState			bufferState,
4733														 UploadRange			uploadRange,
4734														 UnrelatedBufferType	unrelatedBufferType);
4735
4736private:
4737	void						init					(void);
4738	void						runSample				(SampleResult& sample);
4739
4740	using RenderCase<SampleType>::RENDER_AREA_SIZE;
4741
4742	const TargetBuffer			m_targetBuffer;
4743	const BufferState			m_bufferState;
4744	const UploadMethod			m_uploadMethod;
4745	const UnrelatedBufferType	m_unrelatedBufferType;
4746	const UploadRange			m_uploadRange;
4747
4748	using RenderCase<SampleType>::m_context;
4749	using RenderCase<SampleType>::m_testCtx;
4750	using RenderCase<SampleType>::m_drawMethod;
4751};
4752
4753template <typename SampleType>
4754GenericUploadRenderTimeCase<SampleType>::GenericUploadRenderTimeCase (Context&				context,
4755																	  const char*			name,
4756																	  const char*			description,
4757																	  DrawMethod			method,
4758																	  TargetBuffer			targetBuffer,
4759																	  UploadMethod			uploadMethod,
4760																	  BufferState			bufferState,
4761																	  UploadRange			uploadRange,
4762																	  UnrelatedBufferType	unrelatedBufferType)
4763	: RenderCase<SampleType>	(context, name, description, method)
4764	, m_targetBuffer			(targetBuffer)
4765	, m_bufferState				(bufferState)
4766	, m_uploadMethod			(uploadMethod)
4767	, m_unrelatedBufferType		(unrelatedBufferType)
4768	, m_uploadRange				(uploadRange)
4769{
4770	DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
4771	DE_ASSERT(m_bufferState < BUFFERSTATE_LAST);
4772	DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
4773	DE_ASSERT(m_unrelatedBufferType < UNRELATEDBUFFERTYPE_LAST);
4774	DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
4775}
4776
4777template <typename SampleType>
4778void GenericUploadRenderTimeCase<SampleType>::init (void)
4779{
4780	// init parent
4781	RenderCase<SampleType>::init();
4782
4783	// log
4784	{
4785		const char* const	targetFunctionName		= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4786		const int			perVertexSize			= (m_targetBuffer == TARGETBUFFER_INDEX) ? (sizeof(deUint32)) : (sizeof(tcu::Vec4[2]));
4787		const int			fullMinUploadSize		= RenderCase<SampleType>::getMinWorkloadSize() * perVertexSize;
4788		const int			fullMaxUploadSize		= RenderCase<SampleType>::getMaxWorkloadSize() * perVertexSize;
4789		const int			minUploadSize			= (m_uploadRange == UPLOADRANGE_FULL) ? (fullMinUploadSize) : (deAlign32(fullMinUploadSize/2, 4));
4790		const int			maxUploadSize			= (m_uploadRange == UPLOADRANGE_FULL) ? (fullMaxUploadSize) : (deAlign32(fullMaxUploadSize/2, 4));
4791		const int			minUnrelatedUploadSize	= RenderCase<SampleType>::getMinWorkloadSize() * sizeof(tcu::Vec4[2]);
4792		const int			maxUnrelatedUploadSize	= RenderCase<SampleType>::getMaxWorkloadSize() * sizeof(tcu::Vec4[2]);
4793
4794		m_testCtx.getLog()
4795			<< tcu::TestLog::Message
4796			<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4797			<< "The "
4798				<< ((m_targetBuffer == TARGETBUFFER_INDEX) ? ("index") : ("vertex attrib"))
4799				<< " buffer "
4800				<< ((m_bufferState == BUFFERSTATE_NEW) ? ("") : ("contents "))
4801				<< "sourced by the rendering command "
4802				<< ((m_bufferState == BUFFERSTATE_NEW)		? ("is uploaded ") :
4803					(m_uploadRange == UPLOADRANGE_FULL)		? ("are specified ") :
4804					(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("are updated (partial upload) ") :
4805					((const char*)DE_NULL))
4806				<< "just before issuing the rendering command.\n"
4807			<< ((m_bufferState == BUFFERSTATE_EXISTING) ? ("The buffer has been used in rendering.\n") : ("The buffer is generated just before uploading.\n"))
4808			<< "Buffer "
4809				<< ((m_bufferState == BUFFERSTATE_NEW)		? ("is uploaded") :
4810					(m_uploadRange == UPLOADRANGE_FULL)		? ("contents are specified") :
4811					(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("contents are partially updated") :
4812					((const char*)DE_NULL))
4813				<< " with "
4814				<< ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange"))
4815				<< " command. Usage of the target buffer is DYNAMIC_DRAW.\n"
4816			<< ((m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("Mapping buffer with bits MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT | MAP_INVALIDATE_BUFFER_BIT | MAP_UNSYNCHRONIZED_BIT\n") : (""))
4817			<< ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Uploading an unrelated buffer just before issuing the rendering command with bufferData.\n") : (""))
4818			<< RenderCase<SampleType>::getNumSamples() << " test samples. Sample order is randomized.\n"
4819			<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4820			<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4821			<< "Workload sizes are in the range ["
4822				<< RenderCase<SampleType>::getMinWorkloadSize() << ",  "
4823				<< RenderCase<SampleType>::getMaxWorkloadSize() << "] vertices "
4824				<< "(["
4825				<< getHumanReadableByteSize(RenderCase<SampleType>::getMinWorkloadDataSize()) << ","
4826				<< getHumanReadableByteSize(RenderCase<SampleType>::getMaxWorkloadDataSize()) << "] to be processed).\n"
4827			<< "Upload sizes are in the range ["
4828				<< getHumanReadableByteSize(minUploadSize) << ","
4829				<< getHumanReadableByteSize(maxUploadSize) << "].\n"
4830			<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
4831				("Unrelated upload sizes are in the range [" + getHumanReadableByteSize(minUnrelatedUploadSize) + ", " + getHumanReadableByteSize(maxUnrelatedUploadSize) + "]\n") :
4832				(""))
4833			<< "Test result is the approximated processing rate in MiB / s.\n"
4834			<< "Note that while upload time is measured, the time used is not included in the results.\n"
4835			<< ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Note that the data size and the time used in the unrelated upload is not included in the results.\n") : (""))
4836			<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4837			<< "Note! Test result may not be useful as is but instead should be compared against the reference.* group and other upload_and_draw.* group results.\n"
4838			<< tcu::TestLog::EndMessage;
4839	}
4840}
4841
4842template <typename SampleType>
4843void GenericUploadRenderTimeCase<SampleType>::runSample (SampleResult& sample)
4844{
4845	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
4846	const glu::Buffer		arrayBuffer			(m_context.getRenderContext());
4847	const glu::Buffer		indexBuffer			(m_context.getRenderContext());
4848	const glu::Buffer		unrelatedBuffer		(m_context.getRenderContext());
4849	const int				numVertices			= getLayeredGridNumVertices(sample.scene);
4850	tcu::Surface			resultSurface		(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4851	deUint64				startTime;
4852	deUint64				endTime;
4853	std::vector<tcu::Vec4>	vertexData;
4854	std::vector<deUint32>	indexData;
4855
4856	// create data
4857
4858	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4859	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4860		generateLayeredGridIndexData(indexData, sample.scene);
4861
4862	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4863	gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4864	RenderCase<SampleType>::setupVertexAttribs();
4865
4866	// target should be an exisiting buffer? Draw from it once to make sure it exists on the gpu
4867
4868	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_EXISTING)
4869	{
4870		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_DYNAMIC_DRAW);
4871		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4872	}
4873	else if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_NEW)
4874	{
4875		// do not touch the vertex buffer
4876	}
4877	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_EXISTING)
4878	{
4879		// hint that the target buffer will be modified soon
4880		const glw::GLenum vertexDataUsage	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
4881		const glw::GLenum indexDataUsage	= (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
4882
4883		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], vertexDataUsage);
4884		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], indexDataUsage);
4885		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4886	}
4887	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_NEW)
4888	{
4889		if (m_targetBuffer == TARGETBUFFER_VERTEX)
4890		{
4891			// make the index buffer present on the gpu
4892			// use another vertex buffer to keep original buffer in unused state
4893			const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
4894
4895			gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
4896			RenderCase<SampleType>::setupVertexAttribs();
4897
4898			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4899			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4900			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4901
4902			// restore original state
4903			gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4904			RenderCase<SampleType>::setupVertexAttribs();
4905		}
4906		else if (m_targetBuffer == TARGETBUFFER_INDEX)
4907		{
4908			// make the vertex buffer present on the gpu
4909			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4910			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4911		}
4912		else
4913			DE_ASSERT(false);
4914	}
4915	else
4916		DE_ASSERT(false);
4917
4918	RenderCase<SampleType>::waitGLResults();
4919	GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
4920
4921	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4922	gl.clear(GL_COLOR_BUFFER_BIT);
4923	RenderCase<SampleType>::waitGLResults();
4924
4925	tcu::warmupCPU();
4926
4927	// upload
4928
4929	{
4930		glw::GLenum		target;
4931		glw::GLsizeiptr	size;
4932		glw::GLintptr	offset = 0;
4933		const void*		source;
4934
4935		if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
4936		{
4937			target	= GL_ARRAY_BUFFER;
4938			size	= (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
4939			source	= &vertexData[0];
4940		}
4941		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
4942		{
4943			target	= GL_ELEMENT_ARRAY_BUFFER;
4944			size	= (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32));
4945			source	= &indexData[0];
4946		}
4947		else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
4948		{
4949			DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
4950
4951			target	= GL_ARRAY_BUFFER;
4952			size	= (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
4953			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
4954			source	= (const deUint8*)&vertexData[0] + offset;
4955		}
4956		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
4957		{
4958			DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
4959
4960			// upload to 25% - 75% range
4961			target	= GL_ELEMENT_ARRAY_BUFFER;
4962			size	= (glw::GLsizeiptr)deAlign32((glw::GLsizeiptr)((int)(indexData.size() * sizeof(deUint32))) / 2, 4);
4963			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
4964			source	= (const deUint8*)&indexData[0] + offset;
4965		}
4966		else
4967		{
4968			DE_ASSERT(false);
4969			return;
4970		}
4971
4972		startTime = deGetMicroseconds();
4973
4974		if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
4975			gl.bufferData(target, size, source, GL_DYNAMIC_DRAW);
4976		else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
4977		{
4978			// create buffer storage
4979			if (m_bufferState == BUFFERSTATE_NEW)
4980				gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
4981			gl.bufferSubData(target, offset, size, source);
4982		}
4983		else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
4984		{
4985			void*			mapPtr;
4986			glw::GLboolean	unmapSuccessful;
4987
4988			// create buffer storage
4989			if (m_bufferState == BUFFERSTATE_NEW)
4990				gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
4991
4992			mapPtr = gl.mapBufferRange(target, offset, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
4993			if (!mapPtr)
4994				throw tcu::Exception("MapBufferRange returned NULL");
4995
4996			deMemcpy(mapPtr, source, (int)size);
4997
4998			// if unmapping fails, just try again later
4999			unmapSuccessful = gl.unmapBuffer(target);
5000			if (!unmapSuccessful)
5001				throw UnmapFailureError();
5002		}
5003		else
5004			DE_ASSERT(false);
5005
5006		endTime = deGetMicroseconds();
5007
5008		sample.result.uploadedDataSize = (int)size;
5009		sample.result.duration.uploadDuration = endTime - startTime;
5010	}
5011
5012	// unrelated
5013	if (m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX)
5014	{
5015		const int unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
5016
5017		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
5018		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
5019		// Attibute pointers are not modified, no need restore state
5020
5021		sample.result.unrelatedDataSize = unrelatedUploadSize;
5022	}
5023
5024	// draw
5025	{
5026		startTime = deGetMicroseconds();
5027
5028		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5029			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5030		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5031			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5032		else
5033			DE_ASSERT(false);
5034
5035		endTime = deGetMicroseconds();
5036
5037		sample.result.duration.renderDuration = endTime - startTime;
5038	}
5039
5040	// read
5041	{
5042		startTime = deGetMicroseconds();
5043		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5044		endTime = deGetMicroseconds();
5045
5046		sample.result.duration.readDuration = endTime - startTime;
5047	}
5048
5049	// set results
5050
5051	sample.result.renderDataSize = RenderCase<SampleType>::getVertexDataSize() * sample.result.numVertices;
5052
5053	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
5054	sample.result.duration.totalDuration = sample.result.duration.uploadDuration + sample.result.duration.renderDuration + sample.result.duration.readDuration;
5055	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
5056}
5057
5058class BufferInUseRenderTimeCase : public RenderCase<RenderUploadRenderReadDuration>
5059{
5060public:
5061	enum MapFlags
5062	{
5063		MAPFLAG_NONE = 0,
5064		MAPFLAG_INVALIDATE_BUFFER,
5065		MAPFLAG_INVALIDATE_RANGE,
5066
5067		MAPFLAG_LAST
5068	};
5069	enum UploadBufferTarget
5070	{
5071		UPLOADBUFFERTARGET_DIFFERENT_BUFFER = 0,
5072		UPLOADBUFFERTARGET_SAME_BUFFER,
5073
5074		UPLOADBUFFERTARGET_LAST
5075	};
5076								BufferInUseRenderTimeCase	(Context&			context,
5077															 const char*		name,
5078															 const char*		description,
5079															 DrawMethod			method,
5080															 MapFlags			mapFlags,
5081															 TargetBuffer		targetBuffer,
5082															 UploadMethod		uploadMethod,
5083															 UploadRange		uploadRange,
5084															 UploadBufferTarget	uploadTarget);
5085
5086private:
5087	void						init						(void);
5088	void						runSample					(SampleResult& sample);
5089
5090	const TargetBuffer			m_targetBuffer;
5091	const UploadMethod			m_uploadMethod;
5092	const UploadRange			m_uploadRange;
5093	const MapFlags				m_mapFlags;
5094	const UploadBufferTarget	m_uploadBufferTarget;
5095};
5096
5097BufferInUseRenderTimeCase::BufferInUseRenderTimeCase (Context&				context,
5098													  const char*			name,
5099													  const char*			description,
5100													  DrawMethod			method,
5101													  MapFlags				mapFlags,
5102													  TargetBuffer			targetBuffer,
5103													  UploadMethod			uploadMethod,
5104													  UploadRange			uploadRange,
5105													  UploadBufferTarget	uploadTarget)
5106	: RenderCase<RenderUploadRenderReadDuration>	(context, name, description, method)
5107	, m_targetBuffer								(targetBuffer)
5108	, m_uploadMethod								(uploadMethod)
5109	, m_uploadRange									(uploadRange)
5110	, m_mapFlags									(mapFlags)
5111	, m_uploadBufferTarget							(uploadTarget)
5112{
5113	DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
5114	DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
5115	DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
5116	DE_ASSERT(m_mapFlags < MAPFLAG_LAST);
5117	DE_ASSERT(m_uploadBufferTarget < UPLOADBUFFERTARGET_LAST);
5118}
5119
5120void BufferInUseRenderTimeCase::init (void)
5121{
5122	RenderCase<RenderUploadRenderReadDuration>::init();
5123
5124	// log
5125	{
5126		const char* const	targetFunctionName		= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
5127		const char* const	uploadFunctionName		= (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange");
5128		const bool			isReferenceCase			= (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER);
5129		tcu::MessageBuilder	message					(&m_testCtx.getLog());
5130
5131		message	<< "Measuring the time used in " << targetFunctionName << " call, a buffer upload, "
5132				<< targetFunctionName << " call using the uploaded buffer and readPixels call with different upload sizes.\n";
5133
5134		if (isReferenceCase)
5135			message << "Rendering:\n"
5136					<< "    before test: create and use buffers B and C\n"
5137					<< "    first draw: render using buffer B\n"
5138					<< ((m_uploadRange == UPLOADRANGE_FULL)		? ("    upload: respecify buffer C contents\n")	:
5139						(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("    upload: modify buffer C contents\n")	:
5140						((const char*)DE_NULL))
5141					<< "    second draw: render using buffer C\n"
5142					<< "    read: readPixels\n";
5143		else
5144			message << "Rendering:\n"
5145					<< "    before test: create and use buffer B\n"
5146					<< "    first draw: render using buffer B\n"
5147					<< ((m_uploadRange == UPLOADRANGE_FULL)		? ("    upload: respecify buffer B contents\n")	:
5148						(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("    upload: modify buffer B contents\n")	:
5149						((const char*)DE_NULL))
5150					<< "    second draw: render using buffer B\n"
5151					<< "    read: readPixels\n";
5152
5153		message	<< "Uploading using " << uploadFunctionName
5154					<< ((m_mapFlags == MAPFLAG_INVALIDATE_RANGE)	? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT")	:
5155						(m_mapFlags == MAPFLAG_INVALIDATE_BUFFER)	? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_BUFFER_BIT")	:
5156						(m_mapFlags == MAPFLAG_NONE)				? ("")														:
5157						((const char*)DE_NULL))
5158					<< "\n"
5159				<< getNumSamples() << " test samples. Sample order is randomized.\n"
5160				<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
5161				<< "Workload sizes are in the range ["
5162					<< getMinWorkloadSize() << ",  "
5163					<< getMaxWorkloadSize() << "] vertices "
5164					<< "(["
5165					<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
5166					<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
5167				<< "Test result is the approximated processing rate in MiB / s of the second draw call and the readPixels call.\n";
5168
5169		if (isReferenceCase)
5170			message	<< "Note! Test result should only be used as a baseline reference result for buffer.render_after_upload.draw_modify_draw test group results.";
5171		else
5172			message	<< "Note! Test result may not be useful as is but instead should be compared against the buffer.render_after_upload.reference.draw_upload_draw group results.\n";
5173
5174		message << tcu::TestLog::EndMessage;
5175	}
5176}
5177
5178void BufferInUseRenderTimeCase::runSample (SampleResult& sample)
5179{
5180	const glw::Functions&	gl						= m_context.getRenderContext().getFunctions();
5181	const glu::Buffer		arrayBuffer				(m_context.getRenderContext());
5182	const glu::Buffer		indexBuffer				(m_context.getRenderContext());
5183	const glu::Buffer		alternativeUploadBuffer	(m_context.getRenderContext());
5184	const int				numVertices				= getLayeredGridNumVertices(sample.scene);
5185	tcu::Surface			resultSurface			(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5186	deUint64				startTime;
5187	deUint64				endTime;
5188	std::vector<tcu::Vec4>	vertexData;
5189	std::vector<deUint32>	indexData;
5190
5191	// create data
5192
5193	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
5194	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5195		generateLayeredGridIndexData(indexData, sample.scene);
5196
5197	// make buffers used
5198
5199	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5200	gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5201	setupVertexAttribs();
5202
5203	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5204	{
5205		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5206		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5207	}
5208	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5209	{
5210		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5211		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW);
5212		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5213	}
5214	else
5215		DE_ASSERT(false);
5216
5217	// another pair of buffers for reference case
5218	if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5219	{
5220		if (m_targetBuffer == TARGETBUFFER_VERTEX)
5221		{
5222			gl.bindBuffer(GL_ARRAY_BUFFER, *alternativeUploadBuffer);
5223			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5224
5225			setupVertexAttribs();
5226			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5227		}
5228		else if (m_targetBuffer == TARGETBUFFER_INDEX)
5229		{
5230			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *alternativeUploadBuffer);
5231			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW);
5232			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5233		}
5234		else
5235			DE_ASSERT(false);
5236
5237		// restore state
5238		gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5239		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5240		setupVertexAttribs();
5241	}
5242
5243	waitGLResults();
5244	GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
5245
5246	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
5247	gl.clear(GL_COLOR_BUFFER_BIT);
5248	waitGLResults();
5249
5250	tcu::warmupCPU();
5251
5252	// first draw
5253	{
5254		startTime = deGetMicroseconds();
5255
5256		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5257			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5258		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5259			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5260		else
5261			DE_ASSERT(false);
5262
5263		endTime = deGetMicroseconds();
5264
5265		sample.result.duration.firstRenderDuration = endTime - startTime;
5266	}
5267
5268	// upload
5269	{
5270		glw::GLenum		target;
5271		glw::GLsizeiptr	size;
5272		glw::GLintptr	offset = 0;
5273		const void*		source;
5274
5275		if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
5276		{
5277			target	= GL_ARRAY_BUFFER;
5278			size	= (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
5279			source	= &vertexData[0];
5280		}
5281		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
5282		{
5283			target	= GL_ELEMENT_ARRAY_BUFFER;
5284			size	= (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32));
5285			source	= &indexData[0];
5286		}
5287		else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5288		{
5289			target	= GL_ARRAY_BUFFER;
5290			size	= (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
5291			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
5292			source	= (const deUint8*)&vertexData[0] + offset;
5293		}
5294		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5295		{
5296			// upload to 25% - 75% range
5297			target	= GL_ELEMENT_ARRAY_BUFFER;
5298			size	= (glw::GLsizeiptr)deAlign32((glw::GLsizeiptr)((int)(indexData.size() * sizeof(deUint32))) / 2, 4);
5299			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
5300			source	= (const deUint8*)&indexData[0] + offset;
5301		}
5302		else
5303		{
5304			DE_ASSERT(false);
5305			return;
5306		}
5307
5308		// reference case? don't modify the buffer in use
5309		if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5310			gl.bindBuffer(target, *alternativeUploadBuffer);
5311
5312		startTime = deGetMicroseconds();
5313
5314		if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5315			gl.bufferData(target, size, source, GL_STREAM_DRAW);
5316		else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5317			gl.bufferSubData(target, offset, size, source);
5318		else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5319		{
5320			const int		mapFlags	= (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER)	? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT)	:
5321										  (m_mapFlags == MAPFLAG_INVALIDATE_RANGE)	? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT)	:
5322										  (-1);
5323			void*			mapPtr;
5324			glw::GLboolean	unmapSuccessful;
5325
5326			mapPtr = gl.mapBufferRange(target, offset, size, mapFlags);
5327			if (!mapPtr)
5328				throw tcu::Exception("MapBufferRange returned NULL");
5329
5330			deMemcpy(mapPtr, source, (int)size);
5331
5332			// if unmapping fails, just try again later
5333			unmapSuccessful = gl.unmapBuffer(target);
5334			if (!unmapSuccessful)
5335				throw UnmapFailureError();
5336		}
5337		else
5338			DE_ASSERT(false);
5339
5340		endTime = deGetMicroseconds();
5341
5342		sample.result.uploadedDataSize = (int)size;
5343		sample.result.duration.uploadDuration = endTime - startTime;
5344	}
5345
5346	// second draw
5347	{
5348		// Source vertex data from alternative buffer in refernce case
5349		if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER && m_targetBuffer == TARGETBUFFER_VERTEX)
5350			setupVertexAttribs();
5351
5352		startTime = deGetMicroseconds();
5353
5354		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5355			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5356		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5357			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5358		else
5359			DE_ASSERT(false);
5360
5361		endTime = deGetMicroseconds();
5362
5363		sample.result.duration.secondRenderDuration = endTime - startTime;
5364	}
5365
5366	// read
5367	{
5368		startTime = deGetMicroseconds();
5369		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5370		endTime = deGetMicroseconds();
5371
5372		sample.result.duration.readDuration = endTime - startTime;
5373	}
5374
5375	// set results
5376
5377	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
5378
5379	sample.result.duration.renderReadDuration	= sample.result.duration.secondRenderDuration + sample.result.duration.readDuration;
5380	sample.result.duration.totalDuration		= sample.result.duration.firstRenderDuration +
5381												  sample.result.duration.uploadDuration +
5382												  sample.result.duration.secondRenderDuration +
5383												  sample.result.duration.readDuration;
5384	sample.result.duration.fitResponseDuration	= sample.result.duration.renderReadDuration;
5385}
5386
5387class UploadWaitDrawCase : public RenderPerformanceTestBase
5388{
5389public:
5390	struct Sample
5391	{
5392		int			numFrames;
5393		deUint64	uploadCallEndTime;
5394	};
5395	struct Result
5396	{
5397		deUint64	uploadDuration;
5398		deUint64	renderDuration;
5399		deUint64	readDuration;
5400		deUint64	renderReadDuration;
5401
5402		deUint64	timeBeforeUse;
5403	};
5404
5405							UploadWaitDrawCase				(Context&		context,
5406															 const char*	name,
5407															 const char*	description,
5408															 DrawMethod		drawMethod,
5409															 TargetBuffer	targetBuffer,
5410															 UploadMethod	uploadMethod,
5411															 BufferState	bufferState);
5412							~UploadWaitDrawCase				(void);
5413
5414private:
5415	void					init							(void);
5416	void					deinit							(void);
5417	IterateResult			iterate							(void);
5418
5419	void					uploadBuffer					(Sample& sample, Result& result);
5420	void					drawFromBuffer					(Sample& sample, Result& result);
5421	void					reuseAndDeleteBuffer			(void);
5422	void					logAndSetTestResult				(void);
5423	void					logSamples						(void);
5424	void					drawMisc						(void);
5425	int						findStabilizationSample			(deUint64 (Result::*target), const char* description);
5426	bool					checkSampleTemporalStability	(deUint64 (Result::*target), const char* description);
5427
5428	const DrawMethod		m_drawMethod;
5429	const TargetBuffer		m_targetBuffer;
5430	const UploadMethod		m_uploadMethod;
5431	const BufferState		m_bufferState;
5432
5433	const int				m_numSamplesPerSwap;
5434	const int				m_numMaxSwaps;
5435
5436	int						m_frameNdx;
5437	int						m_sampleNdx;
5438	int						m_numVertices;
5439
5440	std::vector<tcu::Vec4>	m_vertexData;
5441	std::vector<deUint32>	m_indexData;
5442	std::vector<Sample>		m_samples;
5443	std::vector<Result>		m_results;
5444	std::vector<int>		m_iterationOrder;
5445
5446	deUint32				m_vertexBuffer;
5447	deUint32				m_indexBuffer;
5448	deUint32				m_miscBuffer;
5449	int						m_numMiscVertices;
5450};
5451
5452UploadWaitDrawCase::UploadWaitDrawCase (Context&		context,
5453										const char*		name,
5454										const char*		description,
5455										DrawMethod		drawMethod,
5456										TargetBuffer	targetBuffer,
5457										UploadMethod	uploadMethod,
5458										BufferState		bufferState)
5459	: RenderPerformanceTestBase	(context, name, description)
5460	, m_drawMethod				(drawMethod)
5461	, m_targetBuffer			(targetBuffer)
5462	, m_uploadMethod			(uploadMethod)
5463	, m_bufferState				(bufferState)
5464	, m_numSamplesPerSwap		(10)
5465	, m_numMaxSwaps				(4)
5466	, m_frameNdx				(0)
5467	, m_sampleNdx				(0)
5468	, m_numVertices				(-1)
5469	, m_vertexBuffer			(0)
5470	, m_indexBuffer				(0)
5471	, m_miscBuffer				(0)
5472	, m_numMiscVertices			(-1)
5473{
5474}
5475
5476UploadWaitDrawCase::~UploadWaitDrawCase (void)
5477{
5478	deinit();
5479}
5480
5481void UploadWaitDrawCase::init (void)
5482{
5483	const glw::Functions&	gl						= m_context.getRenderContext().getFunctions();
5484	const int				vertexAttribSize		= (int)sizeof(tcu::Vec4) * 2; // color4, position4
5485	const int				vertexIndexSize			= (int)sizeof(deUint32);
5486	const int				vertexUploadDataSize	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (vertexAttribSize) : (vertexIndexSize);
5487
5488	RenderPerformanceTestBase::init();
5489
5490	// requirements
5491
5492	if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
5493		m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
5494		throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target");
5495
5496	// gl state
5497
5498	gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5499
5500	// enable bleding to prevent grid layers from being discarded
5501
5502	gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
5503	gl.blendEquation(GL_FUNC_ADD);
5504	gl.enable(GL_BLEND);
5505
5506	// scene
5507
5508	{
5509		LayeredGridSpec scene;
5510
5511		// create ~8MB workload with similar characteristics as in the other test
5512		// => makes comparison to other results more straightforward
5513		scene.gridWidth = 93;
5514		scene.gridHeight = 93;
5515		scene.gridLayers = 5;
5516
5517		generateLayeredGridVertexAttribData4C4V(m_vertexData, scene);
5518		generateLayeredGridIndexData(m_indexData, scene);
5519		m_numVertices = getLayeredGridNumVertices(scene);
5520	}
5521
5522	// buffers
5523
5524	if (m_bufferState == BUFFERSTATE_NEW)
5525	{
5526		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5527		{
5528			// reads from two buffers, prepare the static buffer
5529
5530			if (m_targetBuffer == TARGETBUFFER_VERTEX)
5531			{
5532				// index buffer is static, use another vertex buffer to keep original buffer in unused state
5533				const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
5534
5535				gl.genBuffers(1, &m_indexBuffer);
5536				gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
5537				gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5538				gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW);
5539				gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], GL_STATIC_DRAW);
5540
5541				setupVertexAttribs();
5542				gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5543			}
5544			else if (m_targetBuffer == TARGETBUFFER_INDEX)
5545			{
5546				// vertex buffer is static
5547				gl.genBuffers(1, &m_vertexBuffer);
5548				gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5549				gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW);
5550
5551				setupVertexAttribs();
5552				gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5553			}
5554			else
5555				DE_ASSERT(false);
5556		}
5557	}
5558	else if (m_bufferState == BUFFERSTATE_EXISTING)
5559	{
5560		const glw::GLenum vertexUsage	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
5561		const glw::GLenum indexUsage	= (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
5562
5563		gl.genBuffers(1, &m_vertexBuffer);
5564		gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5565		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], vertexUsage);
5566
5567		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5568		{
5569			gl.genBuffers(1, &m_indexBuffer);
5570			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5571			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], indexUsage);
5572		}
5573
5574		setupVertexAttribs();
5575
5576		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5577			gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5578		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5579			gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5580		else
5581			DE_ASSERT(false);
5582	}
5583	else
5584		DE_ASSERT(false);
5585
5586	// misc draw buffer
5587	{
5588		std::vector<tcu::Vec4>	vertexData;
5589		LayeredGridSpec			scene;
5590
5591		// create ~1.5MB workload with similar characteristics
5592		scene.gridWidth = 40;
5593		scene.gridHeight = 40;
5594		scene.gridLayers = 5;
5595
5596		generateLayeredGridVertexAttribData4C4V(vertexData, scene);
5597
5598		gl.genBuffers(1, &m_miscBuffer);
5599		gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
5600		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(sizeof(tcu::Vec4) * vertexData.size()), &vertexData[0], GL_STATIC_DRAW);
5601
5602		m_numMiscVertices = getLayeredGridNumVertices(scene);
5603	}
5604
5605	// iterations
5606	{
5607		m_samples.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap);
5608		m_results.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap);
5609
5610		for (int numSwaps = 0; numSwaps <= m_numMaxSwaps; ++numSwaps)
5611		for (int sampleNdx = 0; sampleNdx < m_numSamplesPerSwap; ++sampleNdx)
5612		{
5613			const int index = numSwaps*m_numSamplesPerSwap + sampleNdx;
5614
5615			m_samples[index].numFrames = numSwaps;
5616		}
5617
5618		m_iterationOrder.resize(m_samples.size());
5619		generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_samples.size());
5620	}
5621
5622	// log
5623	m_testCtx.getLog()
5624		<< tcu::TestLog::Message
5625		<< "Measuring time used in " << ((m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements")) << " and readPixels call.\n"
5626		<< "Drawing using a buffer that has been uploaded N frames ago. Testing with N within range [0, " << m_numMaxSwaps << "].\n"
5627		<< "Uploaded buffer is a " << ((m_targetBuffer == TARGETBUFFER_VERTEX) ? ("vertex attribute") : ("index")) << " buffer.\n"
5628		<< "Uploading using "
5629			<< ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)		? ("bufferData")																							:
5630				(m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)	? ("bufferSubData")																							:
5631				(m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)	? ("mapBufferRange, flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT")	:
5632				((const char*)DE_NULL))
5633			<< "\n"
5634		<< "Upload size is " << getHumanReadableByteSize(m_numVertices * vertexUploadDataSize) << ".\n"
5635		<< ((m_bufferState == BUFFERSTATE_EXISTING) ? ("All test samples use the same buffer object.\n") : (""))
5636		<< "Test result is the number of frames (swaps) required for the render time to stabilize.\n"
5637		<< "Assuming combined time used in the draw call and readPixels call is stabilizes to a constant value.\n"
5638		<< tcu::TestLog::EndMessage;
5639}
5640
5641void UploadWaitDrawCase::deinit (void)
5642{
5643	RenderPerformanceTestBase::deinit();
5644
5645	if (m_vertexBuffer)
5646	{
5647		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_vertexBuffer);
5648		m_vertexBuffer = 0;
5649	}
5650	if (m_indexBuffer)
5651	{
5652		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBuffer);
5653		m_indexBuffer = 0;
5654	}
5655	if (m_miscBuffer)
5656	{
5657		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_miscBuffer);
5658		m_miscBuffer = 0;
5659	}
5660}
5661
5662UploadWaitDrawCase::IterateResult UploadWaitDrawCase::iterate (void)
5663{
5664	const glw::Functions&	gl								= m_context.getRenderContext().getFunctions();
5665	const int				betweenIterationDummyFrameCount = 5; // draw misc between test samples
5666	const int				frameNdx						= m_frameNdx++;
5667	const int				currentSampleNdx				= m_iterationOrder[m_sampleNdx];
5668
5669	// Simulate work for about 8ms
5670	busyWait(8000);
5671
5672	// Dummy rendering during dummy frames
5673	if (frameNdx != m_samples[currentSampleNdx].numFrames)
5674	{
5675		// draw similar from another buffer
5676		drawMisc();
5677	}
5678
5679	if (frameNdx == 0)
5680	{
5681		// upload and start the clock
5682		uploadBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
5683	}
5684
5685	if (frameNdx == m_samples[currentSampleNdx].numFrames) // \note: not else if, m_samples[currentSampleNdx].numFrames can be 0
5686	{
5687		// draw using the uploaded buffer
5688		drawFromBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
5689
5690		// re-use buffer for something else to make sure test iteration do not affect each other
5691		if (m_bufferState == BUFFERSTATE_NEW)
5692			reuseAndDeleteBuffer();
5693	}
5694	else if (frameNdx == m_samples[currentSampleNdx].numFrames + betweenIterationDummyFrameCount)
5695	{
5696		// next sample
5697		++m_sampleNdx;
5698		m_frameNdx = 0;
5699	}
5700
5701	GLU_EXPECT_NO_ERROR(gl.getError(), "post-iterate");
5702
5703	if (m_sampleNdx < (int)m_samples.size())
5704		return CONTINUE;
5705
5706	logAndSetTestResult();
5707	return STOP;
5708}
5709
5710void UploadWaitDrawCase::uploadBuffer (Sample& sample, Result& result)
5711{
5712	const glw::Functions&	gl			= m_context.getRenderContext().getFunctions();
5713	deUint64				startTime;
5714	deUint64				endTime;
5715	glw::GLenum				target;
5716	glw::GLsizeiptr			size;
5717	const void*				source;
5718
5719	// data source
5720
5721	if (m_targetBuffer == TARGETBUFFER_VERTEX)
5722	{
5723		DE_ASSERT((m_vertexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
5724
5725		target	= GL_ARRAY_BUFFER;
5726		size	= (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4));
5727		source	= &m_vertexData[0];
5728	}
5729	else if (m_targetBuffer == TARGETBUFFER_INDEX)
5730	{
5731		DE_ASSERT((m_indexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
5732
5733		target	= GL_ELEMENT_ARRAY_BUFFER;
5734		size	= (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32));
5735		source	= &m_indexData[0];
5736	}
5737	else
5738	{
5739		DE_ASSERT(false);
5740		return;
5741	}
5742
5743	// gen buffer
5744
5745	if (m_bufferState == BUFFERSTATE_NEW)
5746	{
5747		if (m_targetBuffer == TARGETBUFFER_VERTEX)
5748		{
5749			gl.genBuffers(1, &m_vertexBuffer);
5750			gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5751		}
5752		else if (m_targetBuffer == TARGETBUFFER_INDEX)
5753		{
5754			gl.genBuffers(1, &m_indexBuffer);
5755			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5756		}
5757		else
5758			DE_ASSERT(false);
5759
5760		if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA ||
5761			m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5762		{
5763			gl.bufferData(target, size, DE_NULL, GL_STATIC_DRAW);
5764		}
5765	}
5766	else if (m_bufferState == BUFFERSTATE_EXISTING)
5767	{
5768		if (m_targetBuffer == TARGETBUFFER_VERTEX)
5769			gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5770		else if (m_targetBuffer == TARGETBUFFER_INDEX)
5771			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5772		else
5773			DE_ASSERT(false);
5774	}
5775	else
5776		DE_ASSERT(false);
5777
5778	// upload
5779
5780	startTime = deGetMicroseconds();
5781
5782	if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5783		gl.bufferData(target, size, source, GL_STATIC_DRAW);
5784	else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5785		gl.bufferSubData(target, 0, size, source);
5786	else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5787	{
5788		void*			mapPtr;
5789		glw::GLboolean	unmapSuccessful;
5790
5791		mapPtr = gl.mapBufferRange(target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
5792		if (!mapPtr)
5793			throw tcu::Exception("MapBufferRange returned NULL");
5794
5795		deMemcpy(mapPtr, source, (int)size);
5796
5797		// if unmapping fails, just try again later
5798		unmapSuccessful = gl.unmapBuffer(target);
5799		if (!unmapSuccessful)
5800			throw UnmapFailureError();
5801	}
5802	else
5803		DE_ASSERT(false);
5804
5805	endTime = deGetMicroseconds();
5806
5807	sample.uploadCallEndTime = endTime;
5808	result.uploadDuration = endTime - startTime;
5809}
5810
5811void UploadWaitDrawCase::drawFromBuffer (Sample& sample, Result& result)
5812{
5813	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
5814	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5815	deUint64				startTime;
5816	deUint64				endTime;
5817
5818	DE_ASSERT(m_vertexBuffer != 0);
5819	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5820		DE_ASSERT(m_indexBuffer == 0);
5821	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5822		DE_ASSERT(m_indexBuffer != 0);
5823	else
5824		DE_ASSERT(false);
5825
5826	// draw
5827	{
5828		gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5829		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5830			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5831
5832		setupVertexAttribs();
5833
5834		// microseconds passed since return from upload call
5835		result.timeBeforeUse = deGetMicroseconds() - sample.uploadCallEndTime;
5836
5837		startTime = deGetMicroseconds();
5838
5839		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5840			gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5841		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5842			gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5843		else
5844			DE_ASSERT(false);
5845
5846		endTime = deGetMicroseconds();
5847
5848		result.renderDuration = endTime - startTime;
5849	}
5850
5851	// read
5852	{
5853		startTime = deGetMicroseconds();
5854		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5855		endTime = deGetMicroseconds();
5856
5857		result.readDuration = endTime - startTime;
5858	}
5859
5860	result.renderReadDuration = result.renderDuration + result.readDuration;
5861}
5862
5863void UploadWaitDrawCase::reuseAndDeleteBuffer (void)
5864{
5865	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
5866
5867	if (m_targetBuffer == TARGETBUFFER_INDEX)
5868	{
5869		// respecify and delete index buffer
5870		static const deUint32 indices[3] = {1, 3, 8};
5871
5872		DE_ASSERT(m_indexBuffer != 0);
5873
5874		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
5875		gl.drawElements(GL_TRIANGLES, 3, GL_UNSIGNED_INT, DE_NULL);
5876		gl.deleteBuffers(1, &m_indexBuffer);
5877		m_indexBuffer = 0;
5878	}
5879	else if (m_targetBuffer == TARGETBUFFER_VERTEX)
5880	{
5881		// respecify and delete vertex buffer
5882		static const tcu::Vec4 coloredTriangle[6] =
5883		{
5884			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.4f, -0.4f, 0.0f, 1.0f),
5885			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.2f,  0.4f, 0.0f, 1.0f),
5886			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4( 0.8f, -0.1f, 0.0f, 1.0f),
5887		};
5888
5889		DE_ASSERT(m_vertexBuffer != 0);
5890
5891		gl.bufferData(GL_ARRAY_BUFFER, sizeof(coloredTriangle), coloredTriangle, GL_STATIC_DRAW);
5892		gl.drawArrays(GL_TRIANGLES, 0, 3);
5893		gl.deleteBuffers(1, &m_vertexBuffer);
5894		m_vertexBuffer = 0;
5895	}
5896
5897	waitGLResults();
5898}
5899
5900void UploadWaitDrawCase::logAndSetTestResult (void)
5901{
5902	int		uploadStabilization;
5903	int		renderReadStabilization;
5904	int		renderStabilization;
5905	int		readStabilization;
5906	bool	temporallyStable;
5907
5908	{
5909		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Samples", "Result samples");
5910		logSamples();
5911	}
5912
5913	{
5914		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Stabilization", "Sample stability");
5915
5916		// log stabilization points
5917		renderReadStabilization	= findStabilizationSample(&Result::renderReadDuration, "Combined draw and read");
5918		uploadStabilization		= findStabilizationSample(&Result::uploadDuration, "Upload time");
5919		renderStabilization		= findStabilizationSample(&Result::renderDuration, "Draw call time");
5920		readStabilization		= findStabilizationSample(&Result::readDuration, "ReadPixels time");
5921
5922		temporallyStable		= true;
5923		temporallyStable		&= checkSampleTemporalStability(&Result::renderReadDuration, "Combined draw and read");
5924		temporallyStable		&= checkSampleTemporalStability(&Result::uploadDuration, "Upload time");
5925		temporallyStable		&= checkSampleTemporalStability(&Result::renderDuration, "Draw call time");
5926		temporallyStable		&= checkSampleTemporalStability(&Result::readDuration, "ReadPixels time");
5927	}
5928
5929	{
5930		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Results", "Results");
5931
5932		// Check result sanily
5933		if (uploadStabilization != 0)
5934			m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Upload times are not stable, test result may not be accurate." << tcu::TestLog::EndMessage;
5935		if (!temporallyStable)
5936			m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Time samples do not seem to be temporally stable, sample times seem to drift to one direction during test execution." << tcu::TestLog::EndMessage;
5937
5938		// render & read
5939		if (renderReadStabilization == -1)
5940			m_testCtx.getLog() << tcu::TestLog::Message << "Combined time used in draw call and ReadPixels did not stabilize." << tcu::TestLog::EndMessage;
5941		else
5942			m_testCtx.getLog() << tcu::TestLog::Integer("RenderReadStabilizationPoint", "Combined draw call and ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, renderReadStabilization);
5943
5944		// draw call
5945		if (renderStabilization == -1)
5946			m_testCtx.getLog() << tcu::TestLog::Message << "Time used in draw call did not stabilize." << tcu::TestLog::EndMessage;
5947		else
5948			m_testCtx.getLog() << tcu::TestLog::Integer("DrawCallStabilizationPoint", "Draw call time stabilization time", "frames", QP_KEY_TAG_TIME, renderStabilization);
5949
5950		// readpixels
5951		if (readStabilization == -1)
5952			m_testCtx.getLog() << tcu::TestLog::Message << "Time used in ReadPixels did not stabilize." << tcu::TestLog::EndMessage;
5953		else
5954			m_testCtx.getLog() << tcu::TestLog::Integer("ReadPixelsStabilizationPoint", "ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, readStabilization);
5955
5956		// Report renderReadStabilization
5957		if (renderReadStabilization != -1)
5958			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(renderReadStabilization).c_str());
5959		else
5960			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(m_numMaxSwaps).c_str()); // don't report -1
5961	}
5962}
5963
5964void UploadWaitDrawCase::logSamples (void)
5965{
5966	// Inverse m_iterationOrder
5967
5968	std::vector<int> runOrder(m_iterationOrder.size());
5969	for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
5970		runOrder[m_iterationOrder[ndx]] = ndx;
5971
5972	// Log samples
5973
5974	m_testCtx.getLog()
5975		<< tcu::TestLog::SampleList("Samples", "Samples")
5976		<< tcu::TestLog::SampleInfo
5977		<< tcu::TestLog::ValueInfo("NumSwaps",		"SwapBuffers before use",			"",		QP_SAMPLE_VALUE_TAG_PREDICTOR)
5978		<< tcu::TestLog::ValueInfo("Delay",			"Time before use",					"us",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
5979		<< tcu::TestLog::ValueInfo("RunOrder",		"Sample run order",					"",		QP_SAMPLE_VALUE_TAG_PREDICTOR)
5980		<< tcu::TestLog::ValueInfo("DrawReadTime",	"Draw call and ReadPixels time",	"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5981		<< tcu::TestLog::ValueInfo("TotalTime",		"Total time",						"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5982		<< tcu::TestLog::ValueInfo("Upload time",	"Upload time",						"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5983		<< tcu::TestLog::ValueInfo("DrawCallTime",	"Draw call time",					"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5984		<< tcu::TestLog::ValueInfo("ReadTime",		"ReadPixels time",					"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5985		<< tcu::TestLog::EndSampleInfo;
5986
5987	for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
5988		m_testCtx.getLog()
5989			<< tcu::TestLog::Sample
5990			<< m_samples[sampleNdx].numFrames
5991			<< (int)m_results[sampleNdx].timeBeforeUse
5992			<< runOrder[sampleNdx]
5993			<< (int)m_results[sampleNdx].renderReadDuration
5994			<< (int)(m_results[sampleNdx].renderReadDuration + m_results[sampleNdx].uploadDuration)
5995			<< (int)m_results[sampleNdx].uploadDuration
5996			<< (int)m_results[sampleNdx].renderDuration
5997			<< (int)m_results[sampleNdx].readDuration
5998			<< tcu::TestLog::EndSample;
5999
6000	m_testCtx.getLog() << tcu::TestLog::EndSampleList;
6001}
6002
6003void UploadWaitDrawCase::drawMisc (void)
6004{
6005	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
6006
6007	gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
6008	setupVertexAttribs();
6009	gl.drawArrays(GL_TRIANGLES, 0, m_numMiscVertices);
6010}
6011
6012struct DistributionCompareResult
6013{
6014	bool	equal;
6015	float	standardDeviations;
6016};
6017
6018template <typename Comparer>
6019static float sumOfRanks (const std::vector<deUint64>& testSamples, const std::vector<deUint64>& allSamples, const Comparer& comparer)
6020{
6021	float sum = 0;
6022
6023	for (int sampleNdx = 0; sampleNdx < (int)testSamples.size(); ++sampleNdx)
6024	{
6025		const deUint64	testSample		= testSamples[sampleNdx];
6026		const int		lowerIndex		= (int)(std::lower_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6027		const int		upperIndex		= (int)(std::upper_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6028		const int		lowerRank		= lowerIndex + 1;	// convert zero-indexed to rank
6029		const int		upperRank		= upperIndex;		// convert zero-indexed to rank, upperIndex is last equal + 1
6030		const float		rankMidpoint	= (lowerRank + upperRank) / 2.0f;
6031
6032		sum += rankMidpoint;
6033	}
6034
6035	return sum;
6036}
6037
6038template <typename Comparer>
6039static DistributionCompareResult distributionCompare (const std::vector<deUint64>& orderedObservationsA, const std::vector<deUint64>& orderedObservationsB, const Comparer& comparer)
6040{
6041	// Mann�Whitney U test
6042
6043	const int				n1			= (int)orderedObservationsA.size();
6044	const int				n2			= (int)orderedObservationsB.size();
6045	std::vector<deUint64>	allSamples	(n1 + n2);
6046
6047	std::copy(orderedObservationsA.begin(), orderedObservationsA.end(), allSamples.begin());
6048	std::copy(orderedObservationsB.begin(), orderedObservationsB.end(), allSamples.begin() + n1);
6049	std::sort(allSamples.begin(), allSamples.end());
6050
6051	{
6052		const float					R1		= sumOfRanks(orderedObservationsA, allSamples, comparer);
6053
6054		const float					U1		= n1*n2 + n1*(n1 + 1)/2 - R1;
6055		const float					U2		= (n1 * n2) - U1;
6056		const float					U		= de::min(U1, U2);
6057
6058		// \note: sample sizes might not be large enough to expect normal distribution but we do it anyway
6059
6060		const float					mU		= n1*n2 / 2.0f;
6061		const float					sigmaU	= deFloatSqrt((n1*n2*(n1+n2+1)) / 12.0f);
6062		const float					z		= (U - mU) / sigmaU;
6063
6064		DistributionCompareResult	result;
6065
6066		result.equal				= (de::abs(z) <= 1.96f); // accept within 95% confidence interval
6067		result.standardDeviations	= z;
6068
6069		return result;
6070	}
6071}
6072
6073template <typename T>
6074struct ThresholdComparer
6075{
6076	float	relativeThreshold;
6077	T		absoluteThreshold;
6078
6079	bool operator() (const T& a, const T& b) const
6080	{
6081		const float diff = de::abs((float)a - (float)b);
6082
6083		// thresholds
6084		if (diff <= (float)absoluteThreshold)
6085			return false;
6086		if (diff <= a*relativeThreshold ||
6087			diff <= b*relativeThreshold)
6088			return false;
6089
6090		// cmp
6091		return a < b;
6092	}
6093};
6094
6095int UploadWaitDrawCase::findStabilizationSample (deUint64 (UploadWaitDrawCase::Result::*target), const char* description)
6096{
6097	std::vector<std::vector<deUint64> >	sampleObservations(m_numMaxSwaps+1);
6098	ThresholdComparer<deUint64>			comparer;
6099
6100	comparer.relativeThreshold = 0.15f;	// 15%
6101	comparer.absoluteThreshold = 100;	// (us), assumed sampling precision
6102
6103	// get observations and order them
6104
6105	for (int swapNdx = 0; swapNdx <= m_numMaxSwaps; ++swapNdx)
6106	{
6107		int insertNdx = 0;
6108
6109		sampleObservations[swapNdx].resize(m_numSamplesPerSwap);
6110
6111		for (int ndx = 0; ndx < (int)m_samples.size(); ++ndx)
6112			if (m_samples[ndx].numFrames == swapNdx)
6113				sampleObservations[swapNdx][insertNdx++] = m_results[ndx].*target;
6114
6115		DE_ASSERT(insertNdx == m_numSamplesPerSwap);
6116
6117		std::sort(sampleObservations[swapNdx].begin(), sampleObservations[swapNdx].end());
6118	}
6119
6120	// find stabilization point
6121
6122	for (int sampleNdx = m_numMaxSwaps-1; sampleNdx != -1; --sampleNdx )
6123	{
6124		// Distribution is equal to all following distributions
6125		for (int cmpTargetDistribution = sampleNdx+1; cmpTargetDistribution <= m_numMaxSwaps; ++cmpTargetDistribution)
6126		{
6127			// Stable section ends here?
6128			const DistributionCompareResult result = distributionCompare(sampleObservations[sampleNdx], sampleObservations[cmpTargetDistribution], comparer);
6129			if (!result.equal)
6130			{
6131				// Last two samples are not equal? Samples never stabilized
6132				if (sampleNdx == m_numMaxSwaps-1)
6133				{
6134					m_testCtx.getLog()
6135						<< tcu::TestLog::Message
6136						<< description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n"
6137						<< "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6138						<< "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6139						<< "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n"
6140						<< tcu::TestLog::EndMessage;
6141					return -1;
6142				}
6143				else
6144				{
6145					m_testCtx.getLog()
6146						<< tcu::TestLog::Message
6147						<< description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n"
6148						<< "\tSamples with swap count " << sampleNdx << " are not part of the tail of stable results.\n"
6149						<< "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6150						<< "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6151						<< "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n"
6152						<< tcu::TestLog::EndMessage;
6153
6154					return sampleNdx+1;
6155				}
6156			}
6157		}
6158	}
6159
6160	m_testCtx.getLog()
6161		<< tcu::TestLog::Message
6162		<< description << ": All samples seem to have the same distribution"
6163		<< tcu::TestLog::EndMessage;
6164
6165	// all distributions equal
6166	return 0;
6167}
6168
6169bool UploadWaitDrawCase::checkSampleTemporalStability (deUint64 (UploadWaitDrawCase::Result::*target), const char* description)
6170{
6171	// Try to find correlation with sample order and sample times
6172
6173	const int						numDataPoints	= (int)m_iterationOrder.size();
6174	std::vector<tcu::Vec2>			dataPoints		(m_iterationOrder.size());
6175	LineParametersWithConfidence	lineFit;
6176
6177	for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
6178	{
6179		dataPoints[m_iterationOrder[ndx]].x() = (float)ndx;
6180		dataPoints[m_iterationOrder[ndx]].y() = (float)(m_results[m_iterationOrder[ndx]].*target);
6181	}
6182
6183	lineFit = theilSenSiegelLinearRegression(dataPoints, 0.6f);
6184
6185	// Difference of more than 25% of the offset along the whole sample range
6186	if (de::abs(lineFit.coefficient) * numDataPoints > de::abs(lineFit.offset) * 0.25f)
6187	{
6188		m_testCtx.getLog()
6189			<< tcu::TestLog::Message
6190			<< description << ": Correlation with data point observation order and result time. Results are not temporally stable, observations are not independent.\n"
6191			<< "\tCoefficient: " << lineFit.coefficient << " (us / observation)\n"
6192			<< tcu::TestLog::EndMessage;
6193
6194		return false;
6195	}
6196	else
6197		return true;
6198}
6199
6200} // anonymous
6201
6202BufferDataUploadTests::BufferDataUploadTests (Context& context)
6203	: TestCaseGroup(context, "data_upload", "Buffer data upload performance tests")
6204{
6205}
6206
6207BufferDataUploadTests::~BufferDataUploadTests (void)
6208{
6209}
6210
6211void BufferDataUploadTests::init (void)
6212{
6213	static const struct BufferUsage
6214	{
6215		const char* name;
6216		deUint32	usage;
6217		bool		primaryUsage;
6218	} bufferUsages[] =
6219	{
6220		{ "stream_draw",	GL_STREAM_DRAW,		true	},
6221		{ "stream_read",	GL_STREAM_READ,		false	},
6222		{ "stream_copy",	GL_STREAM_COPY,		false	},
6223		{ "static_draw",	GL_STATIC_DRAW,		true	},
6224		{ "static_read",	GL_STATIC_READ,		false	},
6225		{ "static_copy",	GL_STATIC_COPY,		false	},
6226		{ "dynamic_draw",	GL_DYNAMIC_DRAW,	true	},
6227		{ "dynamic_read",	GL_DYNAMIC_READ,	false	},
6228		{ "dynamic_copy",	GL_DYNAMIC_COPY,	false	},
6229	};
6230
6231	tcu::TestCaseGroup* const referenceGroup			= new tcu::TestCaseGroup(m_testCtx, "reference",			"Reference functions");
6232	tcu::TestCaseGroup* const functionCallGroup			= new tcu::TestCaseGroup(m_testCtx, "function_call",		"Function call timing");
6233	tcu::TestCaseGroup* const modifyAfterUseGroup		= new tcu::TestCaseGroup(m_testCtx, "modify_after_use",		"Function call time after buffer has been used");
6234	tcu::TestCaseGroup* const renderAfterUploadGroup	= new tcu::TestCaseGroup(m_testCtx, "render_after_upload",	"Function call time of draw commands after buffer has been modified");
6235
6236	addChild(referenceGroup);
6237	addChild(functionCallGroup);
6238	addChild(modifyAfterUseGroup);
6239	addChild(renderAfterUploadGroup);
6240
6241	// .reference
6242	{
6243		static const struct BufferSizeRange
6244		{
6245			const char* name;
6246			int			minBufferSize;
6247			int			maxBufferSize;
6248			int			numSamples;
6249			bool		largeBuffersCase;
6250		} sizeRanges[] =
6251		{
6252			{ "small_buffers", 0,		1 << 18,	64,		false	}, // !< 0kB - 256kB
6253			{ "large_buffers", 1 << 18,	1 << 24,	32,		true	}, // !< 256kB - 16MB
6254		};
6255
6256		for (int bufferSizeRangeNdx = 0; bufferSizeRangeNdx < DE_LENGTH_OF_ARRAY(sizeRanges); ++bufferSizeRangeNdx)
6257		{
6258			referenceGroup->addChild(new ReferenceMemcpyCase(m_context,
6259															 std::string("memcpy_").append(sizeRanges[bufferSizeRangeNdx].name).c_str(),
6260															 "Test memcpy performance",
6261															 sizeRanges[bufferSizeRangeNdx].minBufferSize,
6262															 sizeRanges[bufferSizeRangeNdx].maxBufferSize,
6263															 sizeRanges[bufferSizeRangeNdx].numSamples,
6264															 sizeRanges[bufferSizeRangeNdx].largeBuffersCase));
6265		}
6266	}
6267
6268	// .function_call
6269	{
6270		const int minBufferSize		= 0;		// !< 0kiB
6271		const int maxBufferSize		= 1 << 24;	// !< 16MiB
6272		const int numDataSamples	= 25;
6273		const int numMapSamples		= 25;
6274
6275		tcu::TestCaseGroup* const bufferDataMethodGroup		= new tcu::TestCaseGroup(m_testCtx, "buffer_data", "Use glBufferData");
6276		tcu::TestCaseGroup* const bufferSubDataMethodGroup	= new tcu::TestCaseGroup(m_testCtx, "buffer_sub_data", "Use glBufferSubData");
6277		tcu::TestCaseGroup* const mapBufferRangeMethodGroup	= new tcu::TestCaseGroup(m_testCtx, "map_buffer_range", "Use glMapBufferRange");
6278
6279		functionCallGroup->addChild(bufferDataMethodGroup);
6280		functionCallGroup->addChild(bufferSubDataMethodGroup);
6281		functionCallGroup->addChild(mapBufferRangeMethodGroup);
6282
6283		// .buffer_data
6284		{
6285			static const struct TargetCase
6286			{
6287				tcu::TestCaseGroup*				group;
6288				BufferDataUploadCase::CaseType	caseType;
6289				bool							allUsages;
6290			} targetCases[] =
6291			{
6292				{ new tcu::TestCaseGroup(m_testCtx, "new_buffer",				"Target new buffer"),							BufferDataUploadCase::CASE_NEW_BUFFER,			true	},
6293				{ new tcu::TestCaseGroup(m_testCtx, "unspecified_buffer",		"Target new unspecified buffer"),				BufferDataUploadCase::CASE_UNSPECIFIED_BUFFER,	true	},
6294				{ new tcu::TestCaseGroup(m_testCtx, "specified_buffer",			"Target new specified buffer"),					BufferDataUploadCase::CASE_SPECIFIED_BUFFER,	true	},
6295				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer",				"Target buffer that was used in draw"),			BufferDataUploadCase::CASE_USED_BUFFER,			true	},
6296				{ new tcu::TestCaseGroup(m_testCtx, "larger_used_buffer",		"Target larger buffer that was used in draw"),	BufferDataUploadCase::CASE_USED_LARGER_BUFFER,	false	},
6297			};
6298
6299			for (int targetNdx = 0; targetNdx < DE_LENGTH_OF_ARRAY(targetCases); ++targetNdx)
6300			{
6301				bufferDataMethodGroup->addChild(targetCases[targetNdx].group);
6302
6303				for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6304					if (bufferUsages[usageNdx].primaryUsage || targetCases[targetNdx].allUsages)
6305						targetCases[targetNdx].group->addChild(new BufferDataUploadCase(m_context,
6306																						std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6307																						std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6308																						minBufferSize,
6309																						maxBufferSize,
6310																						numDataSamples,
6311																						bufferUsages[usageNdx].usage,
6312																						targetCases[targetNdx].caseType));
6313			}
6314		}
6315
6316		// .buffer_sub_data
6317		{
6318			static const struct FlagCase
6319			{
6320				tcu::TestCaseGroup*					group;
6321				BufferSubDataUploadCase::CaseType	parentCase;
6322				bool								allUsages;
6323				int									flags;
6324			} flagCases[] =
6325			{
6326				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_full_upload",					    ""),															BufferSubDataUploadCase::CASE_USED_BUFFER,	true,	BufferSubDataUploadCase::FLAG_FULL_UPLOAD															},
6327				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_full_upload",    "Clear buffer with bufferData(...,NULL) before sub data call"),	BufferSubDataUploadCase::CASE_USED_BUFFER,	false,	BufferSubDataUploadCase::FLAG_FULL_UPLOAD    | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE	},
6328				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_partial_upload",                   ""),															BufferSubDataUploadCase::CASE_USED_BUFFER,	true,	BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD														},
6329				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_partial_upload", "Clear buffer with bufferData(...,NULL) before sub data call"),	BufferSubDataUploadCase::CASE_USED_BUFFER,	false,	BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE	},
6330			};
6331
6332			for (int flagNdx = 0; flagNdx < DE_LENGTH_OF_ARRAY(flagCases); ++flagNdx)
6333			{
6334				bufferSubDataMethodGroup->addChild(flagCases[flagNdx].group);
6335
6336				for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6337					if (bufferUsages[usageNdx].primaryUsage || flagCases[flagNdx].allUsages)
6338							flagCases[flagNdx].group->addChild(new BufferSubDataUploadCase(m_context,
6339																						   std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6340																						   std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6341																						   minBufferSize,
6342																						   maxBufferSize,
6343																						   numDataSamples,
6344																						   bufferUsages[usageNdx].usage,
6345																						   flagCases[flagNdx].parentCase,
6346																						   flagCases[flagNdx].flags));
6347			}
6348		}
6349
6350		// .map_buffer_range
6351		{
6352			static const struct FlagCase
6353			{
6354				const char*	name;
6355				bool		usefulForUnusedBuffers;
6356				bool		allUsages;
6357				int			glFlags;
6358				int			caseFlags;
6359			} flagCases[] =
6360			{
6361				{ "flag_write_full",										true,	true,	GL_MAP_WRITE_BIT,																0																				},
6362				{ "flag_write_partial",										true,	true,	GL_MAP_WRITE_BIT,																MapBufferRangeCase::FLAG_PARTIAL												},
6363				{ "flag_read_write_full",									true,	true,	GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,												0																				},
6364				{ "flag_read_write_partial",								true,	true,	GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,												MapBufferRangeCase::FLAG_PARTIAL												},
6365				{ "flag_invalidate_range_full",								true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									0																				},
6366				{ "flag_invalidate_range_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_PARTIAL												},
6367				{ "flag_invalidate_buffer_full",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,								0																				},
6368				{ "flag_invalidate_buffer_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,								MapBufferRangeCase::FLAG_PARTIAL												},
6369				{ "flag_write_full_manual_invalidate_buffer",				false,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_MANUAL_INVALIDATION									},
6370				{ "flag_write_partial_manual_invalidate_buffer",			false,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_PARTIAL | MapBufferRangeCase::FLAG_MANUAL_INVALIDATION	},
6371				{ "flag_unsynchronized_full",								true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,									0																				},
6372				{ "flag_unsynchronized_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,									MapBufferRangeCase::FLAG_PARTIAL												},
6373				{ "flag_unsynchronized_and_invalidate_buffer_full",			true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,	0																				},
6374				{ "flag_unsynchronized_and_invalidate_buffer_partial",		true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,	MapBufferRangeCase::FLAG_PARTIAL												},
6375			};
6376			static const struct FlushCases
6377			{
6378				const char*	name;
6379				int			glFlags;
6380				int			caseFlags;
6381			} flushCases[] =
6382			{
6383				{ "flag_flush_explicit_map_full",					GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	0												},
6384				{ "flag_flush_explicit_map_partial",				GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_PARTIAL			},
6385				{ "flag_flush_explicit_map_full_flush_in_parts",	GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_FLUSH_IN_PARTS	},
6386				{ "flag_flush_explicit_map_full_flush_partial",		GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_FLUSH_PARTIAL		},
6387			};
6388			static const struct MapTestGroup
6389			{
6390				int					flags;
6391				bool				unusedBufferCase;
6392				tcu::TestCaseGroup* group;
6393			} groups[] =
6394			{
6395				{ MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER,	true,	new tcu::TestCaseGroup(m_testCtx, "new_unspecified_buffer", "Test with unused, unspecified buffers"),				},
6396				{ MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER,		true,	new tcu::TestCaseGroup(m_testCtx, "new_specified_buffer", "Test with unused, specified buffers"),					},
6397				{ 0,														false,	new tcu::TestCaseGroup(m_testCtx, "used_buffer", "Test with used (data has been sourced from a buffer) buffers")	},
6398			};
6399
6400			// we OR same flags to both range and flushRange cases, make sure it is legal
6401			DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER);
6402			DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER);
6403
6404			for (int groupNdx = 0; groupNdx < DE_LENGTH_OF_ARRAY(groups); ++groupNdx)
6405			{
6406				tcu::TestCaseGroup* const bufferTypeGroup = groups[groupNdx].group;
6407
6408				mapBufferRangeMethodGroup->addChild(bufferTypeGroup);
6409
6410				for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flagCases); ++caseNdx)
6411				{
6412					if (groups[groupNdx].unusedBufferCase && !flagCases[caseNdx].usefulForUnusedBuffers)
6413						continue;
6414
6415					tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flagCases[caseNdx].name, "");
6416					bufferTypeGroup->addChild(bufferUsageGroup);
6417
6418					for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6419						if (bufferUsages[usageNdx].primaryUsage || flagCases[caseNdx].allUsages)
6420							bufferUsageGroup->addChild(new MapBufferRangeCase(m_context,
6421																			  bufferUsages[usageNdx].name,
6422																			  std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6423																			  minBufferSize,
6424																			  maxBufferSize,
6425																			  numMapSamples,
6426																			  bufferUsages[usageNdx].usage,
6427																			  flagCases[caseNdx].glFlags,
6428																			  flagCases[caseNdx].caseFlags | groups[groupNdx].flags));
6429				}
6430
6431				for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flushCases); ++caseNdx)
6432				{
6433					tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flushCases[caseNdx].name, "");
6434					bufferTypeGroup->addChild(bufferUsageGroup);
6435
6436					for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6437						if (bufferUsages[usageNdx].primaryUsage)
6438							bufferUsageGroup->addChild(new MapBufferRangeFlushCase(m_context,
6439																				   bufferUsages[usageNdx].name,
6440																				   std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6441																				   minBufferSize,
6442																				   maxBufferSize,
6443																				   numMapSamples,
6444																				   bufferUsages[usageNdx].usage,
6445																				   flushCases[caseNdx].glFlags,
6446																				   flushCases[caseNdx].caseFlags | groups[groupNdx].flags));
6447				}
6448			}
6449		}
6450	}
6451
6452	// .modify_after_use
6453	{
6454		const int minBufferSize	= 0;		// !< 0kiB
6455		const int maxBufferSize	= 1 << 24;	// !< 16MiB
6456
6457		static const struct Usage
6458		{
6459			const char* name;
6460			const char* description;
6461			deUint32	usage;
6462		} usages[] =
6463		{
6464			{ "static_draw",	"Test with GL_STATIC_DRAW",		GL_STATIC_DRAW	},
6465			{ "dynamic_draw",	"Test with GL_DYNAMIC_DRAW",	GL_DYNAMIC_DRAW	},
6466			{ "stream_draw",	"Test with GL_STREAM_DRAW",		GL_STREAM_DRAW },
6467
6468		};
6469
6470		for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(usages); ++usageNdx)
6471		{
6472			tcu::TestCaseGroup* const usageGroup = new tcu::TestCaseGroup(m_testCtx, usages[usageNdx].name, usages[usageNdx].description);
6473			modifyAfterUseGroup->addChild(usageGroup);
6474
6475			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data",							"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0));
6476			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data_different_size",			"Respecify buffer contents and size after use",			minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_RESPECIFY_SIZE));
6477			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data_repeated",					"Respecify buffer contents after upload and use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_UPLOAD_REPEATED));
6478
6479			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_full",					"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0));
6480			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_partial",				"Respecify buffer contents partially use",				minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
6481			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_full_repeated",		"Respecify buffer contents after upload and use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED));
6482			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_partial_repeated",		"Respecify buffer contents partially upload and use",	minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED | ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
6483
6484			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_write_full",					"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT));
6485			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_write_partial",				"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT));
6486			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_read_write_full",				"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
6487			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_read_write_partial",			"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
6488			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_range_full",		"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
6489			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_range_partial",	"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
6490			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_buffer_full",		"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
6491			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_buffer_partial",	"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
6492			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_unsynchronized_full",			"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
6493			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_unsynchronized_partial",		"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
6494
6495			usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase	(m_context, "map_flag_flush_explicit_full",			"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
6496			usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase	(m_context, "map_flag_flush_explicit_partial",		"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferFlushCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
6497		}
6498	}
6499
6500	// .render_after_upload
6501	{
6502		// .reference
6503		{
6504			tcu::TestCaseGroup* const renderReferenceGroup = new tcu::TestCaseGroup(m_testCtx, "reference", "Baseline results");
6505			renderAfterUploadGroup->addChild(renderReferenceGroup);
6506
6507			// .draw
6508			{
6509				tcu::TestCaseGroup* const drawGroup = new tcu::TestCaseGroup(m_testCtx, "draw", "Time usage of functions with non-modified buffers");
6510				renderReferenceGroup->addChild(drawGroup);
6511
6512				// Time consumed by readPixels
6513				drawGroup->addChild(new ReferenceReadPixelsTimeCase	(m_context, "read_pixels",		"Measure time consumed by readPixels() function call"));
6514
6515				// Time consumed by rendering
6516				drawGroup->addChild(new ReferenceRenderTimeCase		(m_context, "draw_arrays",		"Measure time consumed by drawArrays() function call",		DRAWMETHOD_DRAW_ARRAYS));
6517				drawGroup->addChild(new ReferenceRenderTimeCase		(m_context, "draw_elements",	"Measure time consumed by drawElements() function call",	DRAWMETHOD_DRAW_ELEMENTS));
6518			}
6519
6520			// .draw_upload_draw
6521			{
6522				static const struct
6523				{
6524					const char*		name;
6525					const char*		description;
6526					DrawMethod		drawMethod;
6527					TargetBuffer	targetBuffer;
6528					bool			partial;
6529				} uploadTargets[] =
6530				{
6531					{
6532						"draw_arrays_upload_vertices",
6533						"Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.",
6534						DRAWMETHOD_DRAW_ARRAYS,
6535						TARGETBUFFER_VERTEX,
6536						false
6537					},
6538					{
6539						"draw_arrays_upload_vertices_partial",
6540						"Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.",
6541						DRAWMETHOD_DRAW_ARRAYS,
6542						TARGETBUFFER_VERTEX,
6543						true
6544					},
6545					{
6546						"draw_elements_upload_vertices",
6547						"Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.",
6548						DRAWMETHOD_DRAW_ELEMENTS,
6549						TARGETBUFFER_VERTEX,
6550						false
6551					},
6552					{
6553						"draw_elements_upload_indices",
6554						"Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.",
6555						DRAWMETHOD_DRAW_ELEMENTS,
6556						TARGETBUFFER_INDEX,
6557						false
6558					},
6559					{
6560						"draw_elements_upload_indices_partial",
6561						"Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.",
6562						DRAWMETHOD_DRAW_ELEMENTS,
6563						TARGETBUFFER_INDEX,
6564						true
6565					},
6566				};
6567				static const struct
6568				{
6569					const char*							name;
6570					const char*							description;
6571					UploadMethod						uploadMethod;
6572					BufferInUseRenderTimeCase::MapFlags	mapFlags;
6573					bool								supportsPartialUpload;
6574				} uploadMethods[] =
6575				{
6576					{ "buffer_data",						"bufferData",		UPLOADMETHOD_BUFFER_DATA,		BufferInUseRenderTimeCase::MAPFLAG_NONE,				false	},
6577					{ "buffer_sub_data",					"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	BufferInUseRenderTimeCase::MAPFLAG_NONE,				true	},
6578					{ "map_buffer_range_invalidate_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE,	true	},
6579					{ "map_buffer_range_invalidate_buffer",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER,	false	},
6580				};
6581
6582				tcu::TestCaseGroup* const drawUploadDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_upload_draw", "Time usage of functions draw, upload and another draw");
6583				renderReferenceGroup->addChild(drawUploadDrawGroup);
6584
6585				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6586				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6587				{
6588					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6589
6590					if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6591						continue;
6592
6593					drawUploadDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context,
6594																				name.c_str(),
6595																				uploadTargets[uploadTargetNdx].description,
6596																				uploadTargets[uploadTargetNdx].drawMethod,
6597																				uploadMethods[uploadMethodNdx].mapFlags,
6598																				uploadTargets[uploadTargetNdx].targetBuffer,
6599																				uploadMethods[uploadMethodNdx].uploadMethod,
6600																				(uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6601																				BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_DIFFERENT_BUFFER));
6602				}
6603			}
6604		}
6605
6606		// .upload_unrelated_and_draw
6607		{
6608			static const struct
6609			{
6610				const char*		name;
6611				const char*		description;
6612				DrawMethod		drawMethod;
6613			} drawMethods[] =
6614			{
6615				{ "draw_arrays",	"drawArrays",	DRAWMETHOD_DRAW_ARRAYS		},
6616				{ "draw_elements",	"drawElements",	DRAWMETHOD_DRAW_ELEMENTS	},
6617			};
6618
6619			static const struct
6620			{
6621				const char*		name;
6622				UploadMethod	uploadMethod;
6623			} uploadMethods[] =
6624			{
6625				{ "buffer_data",		UPLOADMETHOD_BUFFER_DATA		},
6626				{ "buffer_sub_data",	UPLOADMETHOD_BUFFER_SUB_DATA	},
6627				{ "map_buffer_range",	UPLOADMETHOD_MAP_BUFFER_RANGE	},
6628			};
6629
6630			tcu::TestCaseGroup* const uploadUnrelatedGroup = new tcu::TestCaseGroup(m_testCtx, "upload_unrelated_and_draw", "Time usage of functions after an unrelated upload");
6631			renderAfterUploadGroup->addChild(uploadUnrelatedGroup);
6632
6633			for (int drawMethodNdx = 0; drawMethodNdx < DE_LENGTH_OF_ARRAY(drawMethods); ++drawMethodNdx)
6634			for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6635			{
6636				const std::string name = std::string() + drawMethods[drawMethodNdx].name + "_upload_unrelated_with_" + uploadMethods[uploadMethodNdx].name;
6637				const std::string desc = std::string() + "Measure time consumed by " + drawMethods[drawMethodNdx].description + " function call after an unrelated upload";
6638
6639				// Time consumed by rendering command after an unrelated upload
6640
6641				uploadUnrelatedGroup->addChild(new UnrelatedUploadRenderTimeCase(m_context, name.c_str(), desc.c_str(), drawMethods[drawMethodNdx].drawMethod, uploadMethods[uploadMethodNdx].uploadMethod));
6642			}
6643		}
6644
6645		// .upload_and_draw
6646		{
6647			static const struct
6648			{
6649				const char*			name;
6650				const char*			description;
6651				BufferState			bufferState;
6652				UnrelatedBufferType	unrelatedBuffer;
6653				bool				supportsPartialUpload;
6654			} bufferConfigs[] =
6655			{
6656				{ "used_buffer",						"Upload to an used buffer",											BUFFERSTATE_EXISTING,	UNRELATEDBUFFERTYPE_NONE,	true	},
6657				{ "new_buffer",							"Upload to a new buffer",											BUFFERSTATE_NEW,		UNRELATEDBUFFERTYPE_NONE,	false	},
6658				{ "used_buffer_and_unrelated_upload",	"Upload to an used buffer and an unrelated buffer and then draw",	BUFFERSTATE_EXISTING,	UNRELATEDBUFFERTYPE_VERTEX,	true	},
6659				{ "new_buffer_and_unrelated_upload",	"Upload to a new buffer and an unrelated buffer and then draw",		BUFFERSTATE_NEW,		UNRELATEDBUFFERTYPE_VERTEX,	false	},
6660			};
6661
6662			tcu::TestCaseGroup* const uploadAndDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_and_draw", "Time usage of rendering functions with modified buffers");
6663			renderAfterUploadGroup->addChild(uploadAndDrawGroup);
6664
6665			// .used_buffer
6666			// .new_buffer
6667			// .used_buffer_and_unrelated_upload
6668			// .new_buffer_and_unrelated_upload
6669			for (int stateNdx = 0; stateNdx < DE_LENGTH_OF_ARRAY(bufferConfigs); ++stateNdx)
6670			{
6671				static const struct
6672				{
6673					const char*		name;
6674					const char*		description;
6675					DrawMethod		drawMethod;
6676					TargetBuffer	targetBuffer;
6677					bool			partial;
6678				} uploadTargets[] =
6679				{
6680					{
6681						"draw_arrays_upload_vertices",
6682						"Measure time consumed by vertex attribute upload, drawArrays, and readPixels function calls",
6683						DRAWMETHOD_DRAW_ARRAYS,
6684						TARGETBUFFER_VERTEX,
6685						false
6686					},
6687					{
6688						"draw_arrays_upload_vertices_partial",
6689						"Measure time consumed by partial vertex attribute upload, drawArrays, and readPixels function calls",
6690						DRAWMETHOD_DRAW_ARRAYS,
6691						TARGETBUFFER_VERTEX,
6692						true
6693					},
6694					{
6695						"draw_elements_upload_vertices",
6696						"Measure time consumed by vertex attribute upload, drawElements, and readPixels function calls",
6697						DRAWMETHOD_DRAW_ELEMENTS,
6698						TARGETBUFFER_VERTEX,
6699						false
6700					},
6701					{
6702						"draw_elements_upload_indices",
6703						"Measure time consumed by index upload, drawElements, and readPixels function calls",
6704						DRAWMETHOD_DRAW_ELEMENTS,
6705						TARGETBUFFER_INDEX,
6706						false
6707					},
6708					{
6709						"draw_elements_upload_indices_partial",
6710						"Measure time consumed by partial index upload, drawElements, and readPixels function calls",
6711						DRAWMETHOD_DRAW_ELEMENTS,
6712						TARGETBUFFER_INDEX,
6713						true
6714					},
6715				};
6716				static const struct
6717				{
6718					const char*		name;
6719					const char*		description;
6720					UploadMethod	uploadMethod;
6721					bool			supportsPartialUpload;
6722				} uploadMethods[] =
6723				{
6724					{ "buffer_data",		"bufferData",		UPLOADMETHOD_BUFFER_DATA,		false	},
6725					{ "buffer_sub_data",	"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	true	},
6726					{ "map_buffer_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	true	},
6727				};
6728
6729				tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, bufferConfigs[stateNdx].name, bufferConfigs[stateNdx].description);
6730				uploadAndDrawGroup->addChild(group);
6731
6732				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6733				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6734				{
6735					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6736
6737					if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6738						continue;
6739					if (uploadTargets[uploadTargetNdx].partial && !bufferConfigs[stateNdx].supportsPartialUpload)
6740						continue;
6741
6742					// Don't log unrelated buffer information to samples if there is no such buffer
6743
6744					if (bufferConfigs[stateNdx].unrelatedBuffer == UNRELATEDBUFFERTYPE_NONE)
6745					{
6746						typedef UploadRenderReadDuration				SampleType;
6747						typedef GenericUploadRenderTimeCase<SampleType>	TestType;
6748
6749						group->addChild(new TestType(m_context,
6750													 name.c_str(),
6751													 uploadTargets[uploadTargetNdx].description,
6752													 uploadTargets[uploadTargetNdx].drawMethod,
6753													 uploadTargets[uploadTargetNdx].targetBuffer,
6754													 uploadMethods[uploadMethodNdx].uploadMethod,
6755													 bufferConfigs[stateNdx].bufferState,
6756													 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6757													 bufferConfigs[stateNdx].unrelatedBuffer));
6758					}
6759					else
6760					{
6761						typedef UploadRenderReadDurationWithUnrelatedUploadSize	SampleType;
6762						typedef GenericUploadRenderTimeCase<SampleType>			TestType;
6763
6764						group->addChild(new TestType(m_context,
6765													 name.c_str(),
6766													 uploadTargets[uploadTargetNdx].description,
6767													 uploadTargets[uploadTargetNdx].drawMethod,
6768													 uploadTargets[uploadTargetNdx].targetBuffer,
6769													 uploadMethods[uploadMethodNdx].uploadMethod,
6770													 bufferConfigs[stateNdx].bufferState,
6771													 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6772													 bufferConfigs[stateNdx].unrelatedBuffer));
6773					}
6774				}
6775			}
6776		}
6777
6778		// .draw_modify_draw
6779		{
6780			static const struct
6781			{
6782				const char*		name;
6783				const char*		description;
6784				DrawMethod		drawMethod;
6785				TargetBuffer	targetBuffer;
6786				bool			partial;
6787			} uploadTargets[] =
6788			{
6789				{
6790					"draw_arrays_upload_vertices",
6791					"Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.",
6792					DRAWMETHOD_DRAW_ARRAYS,
6793					TARGETBUFFER_VERTEX,
6794					false
6795				},
6796				{
6797					"draw_arrays_upload_vertices_partial",
6798					"Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.",
6799					DRAWMETHOD_DRAW_ARRAYS,
6800					TARGETBUFFER_VERTEX,
6801					true
6802				},
6803				{
6804					"draw_elements_upload_vertices",
6805					"Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.",
6806					DRAWMETHOD_DRAW_ELEMENTS,
6807					TARGETBUFFER_VERTEX,
6808					false
6809				},
6810				{
6811					"draw_elements_upload_indices",
6812					"Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.",
6813					DRAWMETHOD_DRAW_ELEMENTS,
6814					TARGETBUFFER_INDEX,
6815					false
6816				},
6817				{
6818					"draw_elements_upload_indices_partial",
6819					"Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.",
6820					DRAWMETHOD_DRAW_ELEMENTS,
6821					TARGETBUFFER_INDEX,
6822					true
6823				},
6824			};
6825			static const struct
6826			{
6827				const char*							name;
6828				const char*							description;
6829				UploadMethod						uploadMethod;
6830				BufferInUseRenderTimeCase::MapFlags	mapFlags;
6831				bool								supportsPartialUpload;
6832			} uploadMethods[] =
6833			{
6834				{ "buffer_data",						"bufferData",		UPLOADMETHOD_BUFFER_DATA,		BufferInUseRenderTimeCase::MAPFLAG_NONE,				false	},
6835				{ "buffer_sub_data",					"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	BufferInUseRenderTimeCase::MAPFLAG_NONE,				true	},
6836				{ "map_buffer_range_invalidate_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE,	true	},
6837				{ "map_buffer_range_invalidate_buffer",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER,	false	},
6838			};
6839
6840			tcu::TestCaseGroup* const drawModifyDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_modify_draw", "Time used in rendering functions with modified buffers while original buffer is still in use");
6841			renderAfterUploadGroup->addChild(drawModifyDrawGroup);
6842
6843			for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6844			for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6845			{
6846				const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6847
6848				if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6849					continue;
6850
6851				drawModifyDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context,
6852																			name.c_str(),
6853																			uploadTargets[uploadTargetNdx].description,
6854																			uploadTargets[uploadTargetNdx].drawMethod,
6855																			uploadMethods[uploadMethodNdx].mapFlags,
6856																			uploadTargets[uploadTargetNdx].targetBuffer,
6857																			uploadMethods[uploadMethodNdx].uploadMethod,
6858																			(uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6859																			BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_SAME_BUFFER));
6860			}
6861		}
6862
6863		// .upload_wait_draw
6864		{
6865			static const struct
6866			{
6867				const char*	name;
6868				const char*	description;
6869				BufferState	bufferState;
6870			} bufferStates[] =
6871			{
6872				{ "new_buffer",		"Uploading to just generated name",	BUFFERSTATE_NEW			},
6873				{ "used_buffer",	"Uploading to a used buffer",		BUFFERSTATE_EXISTING	},
6874			};
6875			static const struct
6876			{
6877				const char*		name;
6878				const char*		description;
6879				DrawMethod		drawMethod;
6880				TargetBuffer	targetBuffer;
6881			} uploadTargets[] =
6882			{
6883				{ "draw_arrays_vertices",	"Upload vertex data, draw with drawArrays",		DRAWMETHOD_DRAW_ARRAYS,		TARGETBUFFER_VERTEX	},
6884				{ "draw_elements_vertices",	"Upload vertex data, draw with drawElements",	DRAWMETHOD_DRAW_ELEMENTS,	TARGETBUFFER_VERTEX	},
6885				{ "draw_elements_indices",	"Upload index data, draw with drawElements",	DRAWMETHOD_DRAW_ELEMENTS,	TARGETBUFFER_INDEX	},
6886			};
6887			static const struct
6888			{
6889				const char*		name;
6890				const char*		description;
6891				UploadMethod	uploadMethod;
6892			} uploadMethods[] =
6893			{
6894				{ "buffer_data",		"bufferData",		UPLOADMETHOD_BUFFER_DATA		},
6895				{ "buffer_sub_data",	"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA	},
6896				{ "map_buffer_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE	},
6897			};
6898
6899			tcu::TestCaseGroup* const uploadSwapDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_wait_draw", "Time used in rendering functions after a buffer upload N frames ago");
6900			renderAfterUploadGroup->addChild(uploadSwapDrawGroup);
6901
6902			for (int bufferStateNdx = 0; bufferStateNdx < DE_LENGTH_OF_ARRAY(bufferStates); ++bufferStateNdx)
6903			{
6904				tcu::TestCaseGroup* const bufferGroup = new tcu::TestCaseGroup(m_testCtx, bufferStates[bufferStateNdx].name, bufferStates[bufferStateNdx].description);
6905				uploadSwapDrawGroup->addChild(bufferGroup);
6906
6907				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6908				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6909				{
6910					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6911
6912					bufferGroup->addChild(new UploadWaitDrawCase(m_context,
6913																 name.c_str(),
6914																 uploadTargets[uploadTargetNdx].description,
6915																 uploadTargets[uploadTargetNdx].drawMethod,
6916																 uploadTargets[uploadTargetNdx].targetBuffer,
6917																 uploadMethods[uploadMethodNdx].uploadMethod,
6918																 bufferStates[bufferStateNdx].bufferState));
6919				}
6920			}
6921		}
6922	}
6923}
6924
6925} // Performance
6926} // gles3
6927} // deqp
6928