1/*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.0 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Buffer data upload performance tests.
22 *//*--------------------------------------------------------------------*/
23
24#include "es3pBufferDataUploadTests.hpp"
25#include "glsCalibration.hpp"
26#include "tcuTestLog.hpp"
27#include "tcuVectorUtil.hpp"
28#include "tcuSurface.hpp"
29#include "tcuCPUWarmup.hpp"
30#include "tcuRenderTarget.hpp"
31#include "gluRenderContext.hpp"
32#include "gluShaderProgram.hpp"
33#include "gluStrUtil.hpp"
34#include "gluPixelTransfer.hpp"
35#include "gluObjectWrapper.hpp"
36#include "glwFunctions.hpp"
37#include "glwEnums.hpp"
38#include "deClock.h"
39#include "deMath.h"
40#include "deStringUtil.hpp"
41#include "deRandom.hpp"
42#include "deMemory.h"
43#include "deThread.h"
44#include "deMeta.hpp"
45
46#include <algorithm>
47#include <iomanip>
48#include <limits>
49
50namespace deqp
51{
52namespace gles3
53{
54namespace Performance
55{
56namespace
57{
58
59using gls::theilSenSiegelLinearRegression;
60using gls::LineParametersWithConfidence;
61using de::meta::EnableIf;
62using de::meta::Not;
63
64static const char* const s_dummyVertexShader =		"#version 300 es\n"
65													"in highp vec4 a_position;\n"
66													"void main (void)\n"
67													"{\n"
68													"	gl_Position = a_position;\n"
69													"}\n";
70
71static const char* const s_dummyFragnentShader =	"#version 300 es\n"
72													"layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
73													"void main (void)\n"
74													"{\n"
75													"	dEQP_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
76													"}\n";
77
78static const char* const s_colorVertexShader =		"#version 300 es\n"
79													"in highp vec4 a_position;\n"
80													"in highp vec4 a_color;\n"
81													"out highp vec4 v_color;\n"
82													"void main (void)\n"
83													"{\n"
84													"	gl_Position = a_position;\n"
85													"	v_color = a_color;\n"
86													"}\n";
87
88static const char* const s_colorFragmentShader =	"#version 300 es\n"
89													"layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
90													"in mediump vec4 v_color;\n"
91													"void main (void)\n"
92													"{\n"
93													"	dEQP_FragColor = v_color;\n"
94													"}\n";
95
96struct SingleOperationDuration
97{
98	deUint64 totalDuration;
99	deUint64 fitResponseDuration; // used for fitting
100};
101
102struct MapBufferRangeDuration
103{
104	deUint64 mapDuration;
105	deUint64 unmapDuration;
106	deUint64 writeDuration;
107	deUint64 allocDuration;
108	deUint64 totalDuration;
109
110	deUint64 fitResponseDuration;
111};
112
113struct MapBufferRangeDurationNoAlloc
114{
115	deUint64 mapDuration;
116	deUint64 unmapDuration;
117	deUint64 writeDuration;
118	deUint64 totalDuration;
119
120	deUint64 fitResponseDuration;
121};
122
123struct MapBufferRangeFlushDuration
124{
125	deUint64 mapDuration;
126	deUint64 unmapDuration;
127	deUint64 writeDuration;
128	deUint64 flushDuration;
129	deUint64 allocDuration;
130	deUint64 totalDuration;
131
132	deUint64 fitResponseDuration;
133};
134
135struct MapBufferRangeFlushDurationNoAlloc
136{
137	deUint64 mapDuration;
138	deUint64 unmapDuration;
139	deUint64 writeDuration;
140	deUint64 flushDuration;
141	deUint64 totalDuration;
142
143	deUint64 fitResponseDuration;
144};
145
146struct RenderReadDuration
147{
148	deUint64 renderDuration;
149	deUint64 readDuration;
150	deUint64 renderReadDuration;
151	deUint64 totalDuration;
152
153	deUint64 fitResponseDuration;
154};
155
156struct UnrelatedUploadRenderReadDuration
157{
158	deUint64 renderDuration;
159	deUint64 readDuration;
160	deUint64 renderReadDuration;
161	deUint64 totalDuration;
162
163	deUint64 fitResponseDuration;
164};
165
166struct UploadRenderReadDuration
167{
168	deUint64 uploadDuration;
169	deUint64 renderDuration;
170	deUint64 readDuration;
171	deUint64 totalDuration;
172	deUint64 renderReadDuration;
173
174	deUint64 fitResponseDuration;
175};
176
177struct UploadRenderReadDurationWithUnrelatedUploadSize
178{
179	deUint64 uploadDuration;
180	deUint64 renderDuration;
181	deUint64 readDuration;
182	deUint64 totalDuration;
183	deUint64 renderReadDuration;
184
185	deUint64 fitResponseDuration;
186};
187
188struct RenderUploadRenderReadDuration
189{
190	deUint64 firstRenderDuration;
191	deUint64 uploadDuration;
192	deUint64 secondRenderDuration;
193	deUint64 readDuration;
194	deUint64 totalDuration;
195	deUint64 renderReadDuration;
196
197	deUint64 fitResponseDuration;
198};
199
200template <typename SampleT>
201struct UploadSampleResult
202{
203	typedef SampleT SampleType;
204
205	int			bufferSize;
206	int			allocatedSize;
207	int			writtenSize;
208	SampleType	duration;
209};
210
211template <typename SampleT>
212struct RenderSampleResult
213{
214	typedef SampleT SampleType;
215
216	int			uploadedDataSize;
217	int			renderDataSize;
218	int			unrelatedDataSize;
219	int			numVertices;
220	SampleT		duration;
221};
222
223struct SingleOperationStatistics
224{
225	float minTime;
226	float maxTime;
227	float medianTime;
228	float min2DecileTime;		// !< minimum value in the 2nd decile
229	float max9DecileTime;		// !< maximum value in the 9th decile
230};
231
232struct SingleCallStatistics
233{
234	SingleOperationStatistics	result;
235
236	float						medianRate;
237	float						maxDiffTime;
238	float						maxDiff9DecileTime;
239	float						medianDiffTime;
240
241	float						maxRelDiffTime;
242	float						max9DecileRelDiffTime;
243	float						medianRelDiffTime;
244};
245
246struct MapCallStatistics
247{
248	SingleOperationStatistics	map;
249	SingleOperationStatistics	unmap;
250	SingleOperationStatistics	write;
251	SingleOperationStatistics	alloc;
252	SingleOperationStatistics	result;
253
254	float						medianRate;
255	float						maxDiffTime;
256	float						maxDiff9DecileTime;
257	float						medianDiffTime;
258
259	float						maxRelDiffTime;
260	float						max9DecileRelDiffTime;
261	float						medianRelDiffTime;
262};
263
264struct MapFlushCallStatistics
265{
266	SingleOperationStatistics	map;
267	SingleOperationStatistics	unmap;
268	SingleOperationStatistics	write;
269	SingleOperationStatistics	flush;
270	SingleOperationStatistics	alloc;
271	SingleOperationStatistics	result;
272
273	float						medianRate;
274	float						maxDiffTime;
275	float						maxDiff9DecileTime;
276	float						medianDiffTime;
277
278	float						maxRelDiffTime;
279	float						max9DecileRelDiffTime;
280	float						medianRelDiffTime;
281};
282
283struct RenderReadStatistics
284{
285	SingleOperationStatistics	render;
286	SingleOperationStatistics	read;
287	SingleOperationStatistics	result;
288	SingleOperationStatistics	total;
289
290	float						medianRate;
291	float						maxDiffTime;
292	float						maxDiff9DecileTime;
293	float						medianDiffTime;
294
295	float						maxRelDiffTime;
296	float						max9DecileRelDiffTime;
297	float						medianRelDiffTime;
298};
299
300struct UploadRenderReadStatistics
301{
302	SingleOperationStatistics	upload;
303	SingleOperationStatistics	render;
304	SingleOperationStatistics	read;
305	SingleOperationStatistics	result;
306	SingleOperationStatistics	total;
307
308	float						medianRate;
309	float						maxDiffTime;
310	float						maxDiff9DecileTime;
311	float						medianDiffTime;
312
313	float						maxRelDiffTime;
314	float						max9DecileRelDiffTime;
315	float						medianRelDiffTime;
316};
317
318struct RenderUploadRenderReadStatistics
319{
320	SingleOperationStatistics	firstRender;
321	SingleOperationStatistics	upload;
322	SingleOperationStatistics	secondRender;
323	SingleOperationStatistics	read;
324	SingleOperationStatistics	result;
325	SingleOperationStatistics	total;
326
327	float						medianRate;
328	float						maxDiffTime;
329	float						maxDiff9DecileTime;
330	float						medianDiffTime;
331
332	float						maxRelDiffTime;
333	float						max9DecileRelDiffTime;
334	float						medianRelDiffTime;
335};
336
337template <typename T>
338struct SampleTypeTraits
339{
340};
341
342template <>
343struct SampleTypeTraits<SingleOperationDuration>
344{
345	typedef SingleCallStatistics StatsType;
346
347	enum { HAS_MAP_STATS		= 0	};
348	enum { HAS_UNMAP_STATS		= 0	};
349	enum { HAS_WRITE_STATS		= 0	};
350	enum { HAS_FLUSH_STATS		= 0	};
351	enum { HAS_ALLOC_STATS		= 0	};
352	enum { LOG_CONTRIBUTIONS	= 0	};
353};
354
355template <>
356struct SampleTypeTraits<MapBufferRangeDuration>
357{
358	typedef MapCallStatistics StatsType;
359
360	enum { HAS_MAP_STATS		= 1	};
361	enum { HAS_UNMAP_STATS		= 1	};
362	enum { HAS_WRITE_STATS		= 1	};
363	enum { HAS_FLUSH_STATS		= 0	};
364	enum { HAS_ALLOC_STATS		= 1	};
365	enum { LOG_CONTRIBUTIONS	= 1	};
366};
367
368template <>
369struct SampleTypeTraits<MapBufferRangeDurationNoAlloc>
370{
371	typedef MapCallStatistics StatsType;
372
373	enum { HAS_MAP_STATS		= 1	};
374	enum { HAS_UNMAP_STATS		= 1	};
375	enum { HAS_WRITE_STATS		= 1	};
376	enum { HAS_FLUSH_STATS		= 0	};
377	enum { HAS_ALLOC_STATS		= 0	};
378	enum { LOG_CONTRIBUTIONS	= 1	};
379};
380
381template <>
382struct SampleTypeTraits<MapBufferRangeFlushDuration>
383{
384	typedef MapFlushCallStatistics StatsType;
385
386	enum { HAS_MAP_STATS		= 1	};
387	enum { HAS_UNMAP_STATS		= 1	};
388	enum { HAS_WRITE_STATS		= 1	};
389	enum { HAS_FLUSH_STATS		= 1	};
390	enum { HAS_ALLOC_STATS		= 1	};
391	enum { LOG_CONTRIBUTIONS	= 1	};
392};
393
394template <>
395struct SampleTypeTraits<MapBufferRangeFlushDurationNoAlloc>
396{
397	typedef MapFlushCallStatistics StatsType;
398
399	enum { HAS_MAP_STATS		= 1	};
400	enum { HAS_UNMAP_STATS		= 1	};
401	enum { HAS_WRITE_STATS		= 1	};
402	enum { HAS_FLUSH_STATS		= 1	};
403	enum { HAS_ALLOC_STATS		= 0	};
404	enum { LOG_CONTRIBUTIONS	= 1	};
405};
406
407template <>
408struct SampleTypeTraits<RenderReadDuration>
409{
410	typedef RenderReadStatistics StatsType;
411
412	enum { HAS_RENDER_STATS			= 1	};
413	enum { HAS_READ_STATS			= 1	};
414	enum { HAS_UPLOAD_STATS			= 0	};
415	enum { HAS_TOTAL_STATS			= 1	};
416	enum { HAS_FIRST_RENDER_STATS	= 0	};
417	enum { HAS_SECOND_RENDER_STATS	= 0	};
418
419	enum { LOG_CONTRIBUTIONS	= 1	};
420};
421
422template <>
423struct SampleTypeTraits<UnrelatedUploadRenderReadDuration>
424{
425	typedef RenderReadStatistics StatsType;
426
427	enum { HAS_RENDER_STATS			= 1	};
428	enum { HAS_READ_STATS			= 1	};
429	enum { HAS_UPLOAD_STATS			= 0	};
430	enum { HAS_TOTAL_STATS			= 1	};
431	enum { HAS_FIRST_RENDER_STATS	= 0	};
432	enum { HAS_SECOND_RENDER_STATS	= 0	};
433
434	enum { LOG_CONTRIBUTIONS	= 1	};
435};
436
437template <>
438struct SampleTypeTraits<UploadRenderReadDuration>
439{
440	typedef UploadRenderReadStatistics StatsType;
441
442	enum { HAS_RENDER_STATS			= 1	};
443	enum { HAS_READ_STATS			= 1	};
444	enum { HAS_UPLOAD_STATS			= 1	};
445	enum { HAS_TOTAL_STATS			= 1	};
446	enum { HAS_FIRST_RENDER_STATS	= 0	};
447	enum { HAS_SECOND_RENDER_STATS	= 0	};
448
449	enum { LOG_CONTRIBUTIONS			= 1	};
450	enum { LOG_UNRELATED_UPLOAD_SIZE	= 0 };
451};
452
453template <>
454struct SampleTypeTraits<UploadRenderReadDurationWithUnrelatedUploadSize>
455{
456	typedef UploadRenderReadStatistics StatsType;
457
458	enum { HAS_RENDER_STATS			= 1	};
459	enum { HAS_READ_STATS			= 1	};
460	enum { HAS_UPLOAD_STATS			= 1	};
461	enum { HAS_TOTAL_STATS			= 1	};
462	enum { HAS_FIRST_RENDER_STATS	= 0	};
463	enum { HAS_SECOND_RENDER_STATS	= 0	};
464
465	enum { LOG_CONTRIBUTIONS			= 1	};
466	enum { LOG_UNRELATED_UPLOAD_SIZE	= 1 };
467};
468
469template <>
470struct SampleTypeTraits<RenderUploadRenderReadDuration>
471{
472	typedef RenderUploadRenderReadStatistics StatsType;
473
474	enum { HAS_RENDER_STATS			= 0	};
475	enum { HAS_READ_STATS			= 1	};
476	enum { HAS_UPLOAD_STATS			= 1	};
477	enum { HAS_TOTAL_STATS			= 1	};
478	enum { HAS_FIRST_RENDER_STATS	= 1	};
479	enum { HAS_SECOND_RENDER_STATS	= 1	};
480
481	enum { LOG_CONTRIBUTIONS			= 1	};
482	enum { LOG_UNRELATED_UPLOAD_SIZE	= 1 };
483};
484
485struct UploadSampleAnalyzeResult
486{
487	float transferRateMedian;
488	float transferRateAtRange;
489	float transferRateAtInfinity;
490};
491
492struct RenderSampleAnalyzeResult
493{
494	float renderRateMedian;
495	float renderRateAtRange;
496	float renderRateAtInfinity;
497};
498
499class UnmapFailureError : public std::exception
500{
501public:
502	UnmapFailureError (void) : std::exception() {}
503};
504
505static std::string getHumanReadableByteSize (int numBytes)
506{
507	std::ostringstream buf;
508
509	if (numBytes < 1024)
510		buf << numBytes << " byte(s)";
511	else if (numBytes < 1024 * 1024)
512		buf << de::floatToString(numBytes/1024.0f, 1) << " KiB";
513	else
514		buf << de::floatToString(numBytes/1024.0f/1024.0f, 1) << " MiB";
515
516	return buf.str();
517}
518
519static deUint64 medianTimeMemcpy (void* dst, const void* src, int numBytes)
520{
521	// Time used by memcpy is assumed to be asymptotically linear
522
523	// With large numBytes, the probability of context switch or other random
524	// event is high. Apply memcpy in parts and report how much time would
525	// memcpy have used with the median transfer rate.
526
527	// Less than 1MiB, no need to do anything special
528	if (numBytes < 1048576)
529	{
530		deUint64 startTime;
531		deUint64 endTime;
532
533		deYield();
534
535		startTime = deGetMicroseconds();
536		deMemcpy(dst, src, numBytes);
537		endTime = deGetMicroseconds();
538
539		return endTime - startTime;
540	}
541	else
542	{
543		// Do memcpy in multiple parts
544
545		const int	numSections		= 5;
546		const int	sectionAlign	= 16;
547
548		int			sectionStarts[numSections+1];
549		int			sectionLens[numSections];
550		deUint64	sectionTimes[numSections];
551		deUint64	medianTime;
552		deUint64	bestTime		= 0;
553
554		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
555			sectionStarts[sectionNdx] = deAlign32((numBytes * sectionNdx / numSections), sectionAlign);
556		sectionStarts[numSections] = numBytes;
557
558		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
559			sectionLens[sectionNdx] = sectionStarts[sectionNdx+1] - sectionStarts[sectionNdx];
560
561		// Memcpy is usually called after mapbuffer range which may take
562		// a lot of time. To prevent power management from kicking in during
563		// copy, warm up more.
564		{
565			deYield();
566			tcu::warmupCPU();
567			deYield();
568		}
569
570		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
571		{
572			deUint64 startTime;
573			deUint64 endTime;
574
575			startTime = deGetMicroseconds();
576			deMemcpy((deUint8*)dst + sectionStarts[sectionNdx], (const deUint8*)src + sectionStarts[sectionNdx], sectionLens[sectionNdx]);
577			endTime = deGetMicroseconds();
578
579			sectionTimes[sectionNdx] = endTime - startTime;
580
581			if (!bestTime || sectionTimes[sectionNdx] < bestTime)
582				bestTime = sectionTimes[sectionNdx];
583
584			// Detect if write takes 50% longer than it should, and warm up if that happened
585			if (sectionNdx != numSections-1 && (float)sectionTimes[sectionNdx] > 1.5f * bestTime)
586			{
587				deYield();
588				tcu::warmupCPU();
589				deYield();
590			}
591		}
592
593		std::sort(sectionTimes, sectionTimes + numSections);
594
595		if ((numSections % 2) == 0)
596			medianTime = (sectionTimes[numSections / 2 - 1] + sectionTimes[numSections / 2]) / 2;
597		else
598			medianTime = sectionTimes[numSections / 2];
599
600		return medianTime*numSections;
601	}
602}
603
604static float dummyCalculation (float initial, int workSize)
605{
606	float	a = initial;
607	int		b = 123;
608
609	for (int ndx = 0; ndx < workSize; ++ndx)
610	{
611		a = deFloatCos(a + (float)b);
612		b = (b + 63) % 107 + de::abs((int)(a*10.0f));
613	}
614
615	return a + (float)b;
616}
617
618static void busyWait (int microseconds)
619{
620	const deUint64	maxSingleWaitTime	= 1000; // 1ms
621	const deUint64	endTime				= deGetMicroseconds() + microseconds;
622	float			dummy				= *tcu::warmupCPUInternal::g_dummy.m_v;
623	int				workSize			= 500;
624
625	// exponentially increase work, cap to 1ms
626	while (deGetMicroseconds() < endTime)
627	{
628		const deUint64	startTime		= deGetMicroseconds();
629		deUint64		totalTime;
630
631		dummy = dummyCalculation(dummy, workSize);
632
633		totalTime = deGetMicroseconds() - startTime;
634
635		if (totalTime >= maxSingleWaitTime)
636			break;
637		else
638			workSize *= 2;
639	}
640
641	// "wait"
642	while (deGetMicroseconds() < endTime)
643		dummy = dummyCalculation(dummy, workSize);
644
645	*tcu::warmupCPUInternal::g_dummy.m_v = dummy;
646}
647
648// Sample from given values using linear interpolation at a given position as if values were laid to range [0, 1]
649template <typename T>
650static float linearSample (const std::vector<T>& values, float position)
651{
652	DE_ASSERT(position >= 0.0f);
653	DE_ASSERT(position <= 1.0f);
654
655	const float	floatNdx			= ((int)values.size() - 1) * position;
656	const int	lowerNdx			= (int)deFloatFloor(floatNdx);
657	const int	higherNdx			= lowerNdx + 1;
658	const float	interpolationFactor = floatNdx - (float)lowerNdx;
659
660	DE_ASSERT(lowerNdx >= 0 && lowerNdx < (int)values.size());
661	DE_ASSERT(higherNdx >= 0 && higherNdx < (int)values.size());
662	DE_ASSERT(interpolationFactor >= 0 && interpolationFactor < 1.0f);
663
664	return tcu::mix((float)values[lowerNdx], (float)values[higherNdx], interpolationFactor);
665}
666
667template <typename T>
668SingleOperationStatistics calculateSingleOperationStatistics (const std::vector<T>& samples, deUint64 T::SampleType::*target)
669{
670	SingleOperationStatistics	stats;
671	std::vector<deUint64>		values(samples.size());
672
673	for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
674		values[ndx] = samples[ndx].duration.*target;
675
676	std::sort(values.begin(), values.end());
677
678	stats.minTime			= (float)values.front();
679	stats.maxTime			= (float)values.back();
680	stats.medianTime		= linearSample(values, 0.5f);
681	stats.min2DecileTime	= linearSample(values, 0.1f);
682	stats.max9DecileTime	= linearSample(values, 0.9f);
683
684	return stats;
685}
686
687template <typename StatisticsType, typename SampleType>
688void calculateBasicStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples, int SampleType::*predictor)
689{
690	std::vector<deUint64> values(samples.size());
691
692	for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
693		values[ndx] = samples[ndx].duration.fitResponseDuration;
694
695	// median rate
696	{
697		std::vector<float> processingRates(samples.size());
698
699		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
700		{
701			const float timeInSeconds = values[ndx] / 1000.0f / 1000.0f;
702			processingRates[ndx] = samples[ndx].*predictor / timeInSeconds;
703		}
704
705		std::sort(processingRates.begin(), processingRates.end());
706
707		stats.medianRate = linearSample(processingRates, 0.5f);
708	}
709
710	// results compared to the approximation
711	{
712		std::vector<float> timeDiffs(samples.size());
713
714		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
715		{
716			const float prediction	= samples[ndx].*predictor * fit.coefficient + fit.offset;
717			const float actual		= (float)values[ndx];
718			timeDiffs[ndx] = actual - prediction;
719		}
720		std::sort(timeDiffs.begin(), timeDiffs.end());
721
722		stats.maxDiffTime			= timeDiffs.back();
723		stats.maxDiff9DecileTime	= linearSample(timeDiffs, 0.9f);
724		stats.medianDiffTime		= linearSample(timeDiffs, 0.5f);
725	}
726
727	// relative comparison to the approximation
728	{
729		std::vector<float> relativeDiffs(samples.size());
730
731		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
732		{
733			const float prediction	= samples[ndx].*predictor * fit.coefficient + fit.offset;
734			const float actual		= (float)values[ndx];
735
736			// Ignore cases where we predict negative times, or if
737			// ratio would be (nearly) infinite: ignore if predicted
738			// time is less than 1 microsecond
739			if (prediction < 1.0f)
740				relativeDiffs[ndx] = 0.0f;
741			else
742				relativeDiffs[ndx] = (actual - prediction) / prediction;
743		}
744		std::sort(relativeDiffs.begin(), relativeDiffs.end());
745
746		stats.maxRelDiffTime		= relativeDiffs.back();
747		stats.max9DecileRelDiffTime	= linearSample(relativeDiffs, 0.9f);
748		stats.medianRelDiffTime		= linearSample(relativeDiffs, 0.5f);
749	}
750
751	// values calculated using sorted timings
752
753	std::sort(values.begin(), values.end());
754
755	stats.result.minTime = (float)values.front();
756	stats.result.maxTime = (float)values.back();
757	stats.result.medianTime = linearSample(values, 0.5f);
758	stats.result.min2DecileTime = linearSample(values, 0.1f);
759	stats.result.max9DecileTime = linearSample(values, 0.9f);
760}
761
762template <typename StatisticsType, typename SampleType>
763void calculateBasicTransferStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples)
764{
765	calculateBasicStatistics(stats, fit, samples, &SampleType::writtenSize);
766}
767
768template <typename StatisticsType, typename SampleType>
769void calculateBasicRenderStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples)
770{
771	calculateBasicStatistics(stats, fit, samples, &SampleType::renderDataSize);
772}
773
774static SingleCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples)
775{
776	SingleCallStatistics stats;
777
778	calculateBasicTransferStatistics(stats, fit, samples);
779
780	return stats;
781}
782
783static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples)
784{
785	MapCallStatistics stats;
786
787	calculateBasicTransferStatistics(stats, fit, samples);
788
789	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::mapDuration);
790	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::unmapDuration);
791	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::writeDuration);
792	stats.alloc	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::allocDuration);
793
794	return stats;
795}
796
797static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples)
798{
799	MapFlushCallStatistics stats;
800
801	calculateBasicTransferStatistics(stats, fit, samples);
802
803	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::mapDuration);
804	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::unmapDuration);
805	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::writeDuration);
806	stats.flush	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::flushDuration);
807	stats.alloc	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::allocDuration);
808
809	return stats;
810}
811
812static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples)
813{
814	MapCallStatistics stats;
815
816	calculateBasicTransferStatistics(stats, fit, samples);
817
818	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::mapDuration);
819	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::unmapDuration);
820	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::writeDuration);
821
822	return stats;
823}
824
825static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples)
826{
827	MapFlushCallStatistics stats;
828
829	calculateBasicTransferStatistics(stats, fit, samples);
830
831	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::mapDuration);
832	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::unmapDuration);
833	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::writeDuration);
834	stats.flush	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::flushDuration);
835
836	return stats;
837}
838
839static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderReadDuration> >& samples)
840{
841	RenderReadStatistics stats;
842
843	calculateBasicRenderStatistics(stats, fit, samples);
844
845	stats.render	= calculateSingleOperationStatistics(samples, &RenderReadDuration::renderDuration);
846	stats.read		= calculateSingleOperationStatistics(samples, &RenderReadDuration::readDuration);
847	stats.total		= calculateSingleOperationStatistics(samples, &RenderReadDuration::totalDuration);
848
849	return stats;
850}
851
852static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples)
853{
854	RenderReadStatistics stats;
855
856	calculateBasicRenderStatistics(stats, fit, samples);
857
858	stats.render	= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::renderDuration);
859	stats.read		= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::readDuration);
860	stats.total		= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::totalDuration);
861
862	return stats;
863}
864
865static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples)
866{
867	UploadRenderReadStatistics stats;
868
869	calculateBasicRenderStatistics(stats, fit, samples);
870
871	stats.upload	= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::uploadDuration);
872	stats.render	= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::renderDuration);
873	stats.read		= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::readDuration);
874	stats.total		= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::totalDuration);
875
876	return stats;
877}
878
879static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples)
880{
881	UploadRenderReadStatistics stats;
882
883	calculateBasicRenderStatistics(stats, fit, samples);
884
885	stats.upload	= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::uploadDuration);
886	stats.render	= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::renderDuration);
887	stats.read		= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::readDuration);
888	stats.total		= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::totalDuration);
889
890	return stats;
891}
892
893static RenderUploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples)
894{
895	RenderUploadRenderReadStatistics stats;
896
897	calculateBasicRenderStatistics(stats, fit, samples);
898
899	stats.firstRender	= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::firstRenderDuration);
900	stats.upload		= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::uploadDuration);
901	stats.secondRender	= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::secondRenderDuration);
902	stats.read			= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::readDuration);
903	stats.total			= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::totalDuration);
904
905	return stats;
906}
907
908template <typename DurationType>
909static LineParametersWithConfidence fitLineToSamples (const std::vector<UploadSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration)
910{
911	std::vector<tcu::Vec2> samplePoints;
912
913	for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
914	{
915		tcu::Vec2 point;
916
917		point.x() = (float)(samples[sampleNdx].writtenSize);
918		point.y() = (float)(samples[sampleNdx].duration.*target);
919
920		samplePoints.push_back(point);
921	}
922
923	return theilSenSiegelLinearRegression(samplePoints, 0.6f);
924}
925
926template <typename DurationType>
927static LineParametersWithConfidence fitLineToSamples (const std::vector<RenderSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration)
928{
929	std::vector<tcu::Vec2> samplePoints;
930
931	for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
932	{
933		tcu::Vec2 point;
934
935		point.x() = (float)(samples[sampleNdx].renderDataSize);
936		point.y() = (float)(samples[sampleNdx].duration.*target);
937
938		samplePoints.push_back(point);
939	}
940
941	return theilSenSiegelLinearRegression(samplePoints, 0.6f);
942}
943
944template <typename T>
945static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, int beginNdx, int endNdx, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration)
946{
947	return fitLineToSamples(samples, beginNdx, endNdx, 1, target);
948}
949
950template <typename T>
951static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration)
952{
953	return fitLineToSamples(samples, 0, (int)samples.size(), target);
954}
955
956static float getAreaBetweenLines (float xmin, float xmax, float lineAOffset, float lineACoefficient, float lineBOffset, float lineBCoefficient)
957{
958	const float lineAMin		= lineAOffset + lineACoefficient * xmin;
959	const float lineAMax		= lineAOffset + lineACoefficient * xmax;
960	const float lineBMin		= lineBOffset + lineBCoefficient * xmin;
961	const float lineBMax		= lineBOffset + lineBCoefficient * xmax;
962	const bool	aOverBAtBegin	= (lineAMin > lineBMin);
963	const bool	aOverBAtEnd		= (lineAMax > lineBMax);
964
965	if (aOverBAtBegin == aOverBAtEnd)
966	{
967		// lines do not intersect
968
969		const float midpoint	= (xmin + xmax) / 2.0f;
970		const float width		= (xmax - xmin);
971
972		const float lineAHeight	= lineAOffset + lineACoefficient * midpoint;
973		const float lineBHeight	= lineBOffset + lineBCoefficient * midpoint;
974
975		return width * de::abs(lineAHeight - lineBHeight);
976	}
977	else
978	{
979
980		// lines intersect
981
982		const float approachCoeffient	= de::abs(lineACoefficient - lineBCoefficient);
983		const float epsilon				= 0.0001f;
984		const float leftHeight			= de::abs(lineAMin - lineBMin);
985		const float rightHeight			= de::abs(lineAMax - lineBMax);
986
987		if (approachCoeffient < epsilon)
988			return 0.0f;
989
990		return (0.5f * leftHeight * (leftHeight / approachCoeffient)) + (0.5f * rightHeight * (rightHeight / approachCoeffient));
991	}
992}
993
994template <typename T>
995static float calculateSampleFitLinearity (const std::vector<T>& samples, int T::*predictor)
996{
997	// Compare the fitted line of first half of the samples to the fitted line of
998	// the second half of the samples. Calculate a AABB that fully contains every
999	// sample's x component and both fit lines in this range. Calculate the ratio
1000	// of the area between the lines and the AABB.
1001
1002	const float				epsilon				= 1.e-6f;
1003	const int				midPoint			= (int)samples.size() / 2;
1004	const LineParametersWithConfidence	startApproximation	= fitLineToSamples(samples, 0, midPoint, &T::SampleType::fitResponseDuration);
1005	const LineParametersWithConfidence	endApproximation	= fitLineToSamples(samples, midPoint, (int)samples.size(), &T::SampleType::fitResponseDuration);
1006
1007	const float				aabbMinX			= (float)(samples.front().*predictor);
1008	const float				aabbMinY			= de::min(startApproximation.offset + startApproximation.coefficient*aabbMinX, endApproximation.offset + endApproximation.coefficient*aabbMinX);
1009	const float				aabbMaxX			= (float)(samples.back().*predictor);
1010	const float				aabbMaxY			= de::max(startApproximation.offset + startApproximation.coefficient*aabbMaxX, endApproximation.offset + endApproximation.coefficient*aabbMaxX);
1011
1012	const float				aabbArea			= (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1013	const float				areaBetweenLines	= getAreaBetweenLines(aabbMinX, aabbMaxX, startApproximation.offset, startApproximation.coefficient, endApproximation.offset, endApproximation.coefficient);
1014	const float				errorAreaRatio		= (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1015
1016	return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1017}
1018
1019template <typename DurationType>
1020static float calculateSampleFitLinearity (const std::vector<UploadSampleResult<DurationType> >& samples)
1021{
1022	return calculateSampleFitLinearity(samples, &UploadSampleResult<DurationType>::writtenSize);
1023}
1024
1025template <typename DurationType>
1026static float calculateSampleFitLinearity (const std::vector<RenderSampleResult<DurationType> >& samples)
1027{
1028	return calculateSampleFitLinearity(samples, &RenderSampleResult<DurationType>::renderDataSize);
1029}
1030
1031template <typename T>
1032static float calculateSampleTemporalStability (const std::vector<T>& samples, int T::*predictor)
1033{
1034	// Samples are sampled in the following order: 1) even samples (in random order) 2) odd samples (in random order)
1035	// Compare the fitted line of even samples to the fitted line of the odd samples. Calculate a AABB that fully
1036	// contains every sample's x component and both fit lines in this range. Calculate the ratio of the area between
1037	// the lines and the AABB.
1038
1039	const float				epsilon				= 1.e-6f;
1040	const LineParametersWithConfidence	evenApproximation	= fitLineToSamples(samples, 0, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1041	const LineParametersWithConfidence	oddApproximation	= fitLineToSamples(samples, 1, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1042
1043	const float				aabbMinX			= (float)(samples.front().*predictor);
1044	const float				aabbMinY			= de::min(evenApproximation.offset + evenApproximation.coefficient*aabbMinX, oddApproximation.offset + oddApproximation.coefficient*aabbMinX);
1045	const float				aabbMaxX			= (float)(samples.back().*predictor);
1046	const float				aabbMaxY			= de::max(evenApproximation.offset + evenApproximation.coefficient*aabbMaxX, oddApproximation.offset + oddApproximation.coefficient*aabbMaxX);
1047
1048	const float				aabbArea			= (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1049	const float				areaBetweenLines	= getAreaBetweenLines(aabbMinX, aabbMaxX, evenApproximation.offset, evenApproximation.coefficient, oddApproximation.offset, oddApproximation.coefficient);
1050	const float				errorAreaRatio		= (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1051
1052	return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1053}
1054
1055template <typename DurationType>
1056static float calculateSampleTemporalStability (const std::vector<UploadSampleResult<DurationType> >& samples)
1057{
1058	return calculateSampleTemporalStability(samples, &UploadSampleResult<DurationType>::writtenSize);
1059}
1060
1061template <typename DurationType>
1062static float calculateSampleTemporalStability (const std::vector<RenderSampleResult<DurationType> >& samples)
1063{
1064	return calculateSampleTemporalStability(samples, &RenderSampleResult<DurationType>::renderDataSize);
1065}
1066
1067template <typename DurationType>
1068static void bucketizeSamplesUniformly (const std::vector<UploadSampleResult<DurationType> >& samples, std::vector<UploadSampleResult<DurationType> >* buckets, int numBuckets, int& minBufferSize, int& maxBufferSize)
1069{
1070	minBufferSize = 0;
1071	maxBufferSize = 0;
1072
1073	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1074	{
1075		DE_ASSERT(samples[sampleNdx].allocatedSize != 0);
1076
1077		if (!minBufferSize || samples[sampleNdx].allocatedSize < minBufferSize)
1078			minBufferSize = samples[sampleNdx].allocatedSize;
1079		if (!maxBufferSize || samples[sampleNdx].allocatedSize > maxBufferSize)
1080			maxBufferSize = samples[sampleNdx].allocatedSize;
1081	}
1082
1083	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1084	{
1085		const float bucketNdxFloat	= (samples[sampleNdx].allocatedSize - minBufferSize) / (float)(maxBufferSize - minBufferSize) * numBuckets;
1086		const int bucketNdx			= de::clamp((int)deFloatFloor(bucketNdxFloat), 0, numBuckets-1);
1087
1088		buckets[bucketNdx].push_back(samples[sampleNdx]);
1089	}
1090}
1091
1092template <typename SampleType>
1093static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1094{
1095	log	<< tcu::TestLog::Float("MapRangeMin", "MapRange: Min time", "us", QP_KEY_TAG_TIME, stats.map.minTime)
1096		<< tcu::TestLog::Float("MapRangeMax", "MapRange: Max time", "us", QP_KEY_TAG_TIME, stats.map.maxTime)
1097		<< tcu::TestLog::Float("MapRangeMin90", "MapRange: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.map.min2DecileTime)
1098		<< tcu::TestLog::Float("MapRangeMax90", "MapRange: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.map.max9DecileTime)
1099		<< tcu::TestLog::Float("MapRangeMedian", "MapRange: Median time", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1100}
1101
1102template <typename SampleType>
1103static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1104{
1105	log	<< tcu::TestLog::Float("UnmapMin", "Unmap: Min time", "us", QP_KEY_TAG_TIME, stats.unmap.minTime)
1106		<< tcu::TestLog::Float("UnmapMax", "Unmap: Max time", "us", QP_KEY_TAG_TIME, stats.unmap.maxTime)
1107		<< tcu::TestLog::Float("UnmapMin90", "Unmap: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.unmap.min2DecileTime)
1108		<< tcu::TestLog::Float("UnmapMax90", "Unmap: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.unmap.max9DecileTime)
1109		<< tcu::TestLog::Float("UnmapMedian", "Unmap: Median time", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1110}
1111
1112template <typename SampleType>
1113static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1114{
1115	log	<< tcu::TestLog::Float("WriteMin", "Write: Min time", "us", QP_KEY_TAG_TIME, stats.write.minTime)
1116		<< tcu::TestLog::Float("WriteMax", "Write: Max time", "us", QP_KEY_TAG_TIME, stats.write.maxTime)
1117		<< tcu::TestLog::Float("WriteMin90", "Write: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.write.min2DecileTime)
1118		<< tcu::TestLog::Float("WriteMax90", "Write: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.write.max9DecileTime)
1119		<< tcu::TestLog::Float("WriteMedian", "Write: Median time", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1120}
1121
1122template <typename SampleType>
1123static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1124{
1125	log	<< tcu::TestLog::Float("FlushMin", "Flush: Min time", "us", QP_KEY_TAG_TIME, stats.flush.minTime)
1126		<< tcu::TestLog::Float("FlushMax", "Flush: Max time", "us", QP_KEY_TAG_TIME, stats.flush.maxTime)
1127		<< tcu::TestLog::Float("FlushMin90", "Flush: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.flush.min2DecileTime)
1128		<< tcu::TestLog::Float("FlushMax90", "Flush: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.flush.max9DecileTime)
1129		<< tcu::TestLog::Float("FlushMedian", "Flush: Median time", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1130}
1131
1132template <typename SampleType>
1133static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1134{
1135	log	<< tcu::TestLog::Float("AllocMin", "Alloc: Min time", "us", QP_KEY_TAG_TIME, stats.alloc.minTime)
1136		<< tcu::TestLog::Float("AllocMax", "Alloc: Max time", "us", QP_KEY_TAG_TIME, stats.alloc.maxTime)
1137		<< tcu::TestLog::Float("AllocMin90", "Alloc: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.alloc.min2DecileTime)
1138		<< tcu::TestLog::Float("AllocMax90", "Alloc: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.alloc.max9DecileTime)
1139		<< tcu::TestLog::Float("AllocMedian", "Alloc: Median time", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1140}
1141
1142template <typename SampleType>
1143static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1144{
1145	DE_UNREF(log);
1146	DE_UNREF(stats);
1147}
1148
1149template <typename SampleType>
1150static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1151{
1152	DE_UNREF(log);
1153	DE_UNREF(stats);
1154}
1155
1156template <typename SampleType>
1157static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1158{
1159	DE_UNREF(log);
1160	DE_UNREF(stats);
1161}
1162
1163template <typename SampleType>
1164static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1165{
1166	DE_UNREF(log);
1167	DE_UNREF(stats);
1168}
1169
1170template <typename SampleType>
1171static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1172{
1173	DE_UNREF(log);
1174	DE_UNREF(stats);
1175}
1176
1177template <typename SampleType>
1178static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1179{
1180	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::mapDuration);
1181	log	<< tcu::TestLog::Float("MapConstantCost", "Map: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1182		<< tcu::TestLog::Float("MapLinearCost", "Map: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1183		<< tcu::TestLog::Float("MapMedianCost", "Map: Median cost", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1184}
1185
1186template <typename SampleType>
1187static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1188{
1189	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::unmapDuration);
1190	log	<< tcu::TestLog::Float("UnmapConstantCost", "Unmap: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1191		<< tcu::TestLog::Float("UnmapLinearCost", "Unmap: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1192		<< tcu::TestLog::Float("UnmapMedianCost", "Unmap: Median cost", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1193}
1194
1195template <typename SampleType>
1196static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1197{
1198	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::writeDuration);
1199	log	<< tcu::TestLog::Float("WriteConstantCost", "Write: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1200		<< tcu::TestLog::Float("WriteLinearCost", "Write: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1201		<< tcu::TestLog::Float("WriteMedianCost", "Write: Median cost", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1202}
1203
1204template <typename SampleType>
1205static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1206{
1207	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::flushDuration);
1208	log	<< tcu::TestLog::Float("FlushConstantCost", "Flush: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1209		<< tcu::TestLog::Float("FlushLinearCost", "Flush: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1210		<< tcu::TestLog::Float("FlushMedianCost", "Flush: Median cost", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1211}
1212
1213template <typename SampleType>
1214static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1215{
1216	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::allocDuration);
1217	log	<< tcu::TestLog::Float("AllocConstantCost", "Alloc: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1218		<< tcu::TestLog::Float("AllocLinearCost", "Alloc: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1219		<< tcu::TestLog::Float("AllocMedianCost", "Alloc: Median cost", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1220}
1221
1222template <typename SampleType>
1223static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1224{
1225	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::renderDuration);
1226	log	<< tcu::TestLog::Float("DrawCallConstantCost", "DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1227		<< tcu::TestLog::Float("DrawCallLinearCost", "DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1228		<< tcu::TestLog::Float("DrawCallMedianCost", "DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.render.medianTime);
1229}
1230
1231template <typename SampleType>
1232static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_READ_STATS>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1233{
1234	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::readDuration);
1235	log	<< tcu::TestLog::Float("ReadConstantCost", "Read: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1236		<< tcu::TestLog::Float("ReadLinearCost", "Read: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1237		<< tcu::TestLog::Float("ReadMedianCost", "Read: Median cost", "us", QP_KEY_TAG_TIME, stats.read.medianTime);
1238}
1239
1240template <typename SampleType>
1241static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1242{
1243	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::uploadDuration);
1244	log	<< tcu::TestLog::Float("UploadConstantCost", "Upload: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1245		<< tcu::TestLog::Float("UploadLinearCost", "Upload: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1246		<< tcu::TestLog::Float("UploadMedianCost", "Upload: Median cost", "us", QP_KEY_TAG_TIME, stats.upload.medianTime);
1247}
1248
1249template <typename SampleType>
1250static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1251{
1252	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::totalDuration);
1253	log	<< tcu::TestLog::Float("TotalConstantCost", "Total: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1254		<< tcu::TestLog::Float("TotalLinearCost", "Total: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1255		<< tcu::TestLog::Float("TotalMedianCost", "Total: Median cost", "us", QP_KEY_TAG_TIME, stats.total.medianTime);
1256}
1257
1258template <typename SampleType>
1259static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1260{
1261	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::firstRenderDuration);
1262	log	<< tcu::TestLog::Float("FirstDrawCallConstantCost", "First DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1263		<< tcu::TestLog::Float("FirstDrawCallLinearCost", "First DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1264		<< tcu::TestLog::Float("FirstDrawCallMedianCost", "First DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.firstRender.medianTime);
1265}
1266
1267template <typename SampleType>
1268static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1269{
1270	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::secondRenderDuration);
1271	log	<< tcu::TestLog::Float("SecondDrawCallConstantCost", "Second DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1272		<< tcu::TestLog::Float("SecondDrawCallLinearCost", "Second DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1273		<< tcu::TestLog::Float("SecondDrawCallMedianCost", "Second DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.secondRender.medianTime);
1274}
1275
1276template <typename SampleType>
1277static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1278{
1279	DE_UNREF(log);
1280	DE_UNREF(samples);
1281	DE_UNREF(stats);
1282}
1283
1284template <typename SampleType>
1285static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1286{
1287	DE_UNREF(log);
1288	DE_UNREF(samples);
1289	DE_UNREF(stats);
1290}
1291
1292template <typename SampleType>
1293static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1294{
1295	DE_UNREF(log);
1296	DE_UNREF(samples);
1297	DE_UNREF(stats);
1298}
1299
1300template <typename SampleType>
1301static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1302{
1303	DE_UNREF(log);
1304	DE_UNREF(samples);
1305	DE_UNREF(stats);
1306}
1307
1308template <typename SampleType>
1309static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1310{
1311	DE_UNREF(log);
1312	DE_UNREF(samples);
1313	DE_UNREF(stats);
1314}
1315
1316template <typename SampleType>
1317static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Value>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1318{
1319	DE_UNREF(log);
1320	DE_UNREF(samples);
1321	DE_UNREF(stats);
1322}
1323
1324template <typename SampleType>
1325static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_READ_STATS>::Value>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1326{
1327	DE_UNREF(log);
1328	DE_UNREF(samples);
1329	DE_UNREF(stats);
1330}
1331
1332template <typename SampleType>
1333static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Value>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1334{
1335	DE_UNREF(log);
1336	DE_UNREF(samples);
1337	DE_UNREF(stats);
1338}
1339
1340template <typename SampleType>
1341static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Value>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1342{
1343	DE_UNREF(log);
1344	DE_UNREF(samples);
1345	DE_UNREF(stats);
1346}
1347
1348template <typename SampleType>
1349static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Value>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1350{
1351	DE_UNREF(log);
1352	DE_UNREF(samples);
1353	DE_UNREF(stats);
1354}
1355
1356template <typename SampleType>
1357static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Value>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1358{
1359	DE_UNREF(log);
1360	DE_UNREF(samples);
1361	DE_UNREF(stats);
1362}
1363
1364void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples)
1365{
1366	log << tcu::TestLog::SampleList("Samples", "Samples")
1367		<< tcu::TestLog::SampleInfo
1368		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1369		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1370		<< tcu::TestLog::ValueInfo("UploadTime",		"Upload time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1371		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1372		<< tcu::TestLog::EndSampleInfo;
1373
1374	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1375	{
1376		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1377		log	<< tcu::TestLog::Sample
1378			<< samples[sampleNdx].writtenSize
1379			<< samples[sampleNdx].bufferSize
1380			<< (int)samples[sampleNdx].duration.totalDuration
1381			<< fitResidual
1382			<< tcu::TestLog::EndSample;
1383	}
1384
1385	log << tcu::TestLog::EndSampleList;
1386}
1387
1388void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples)
1389{
1390	log << tcu::TestLog::SampleList("Samples", "Samples")
1391		<< tcu::TestLog::SampleInfo
1392		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1393		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1394		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1395		<< tcu::TestLog::ValueInfo("AllocTime",			"Alloc time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1396		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1397		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1398		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1399		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1400		<< tcu::TestLog::EndSampleInfo;
1401
1402	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1403	{
1404		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1405		log	<< tcu::TestLog::Sample
1406			<< samples[sampleNdx].writtenSize
1407			<< samples[sampleNdx].bufferSize
1408			<< (int)samples[sampleNdx].duration.totalDuration
1409			<< (int)samples[sampleNdx].duration.allocDuration
1410			<< (int)samples[sampleNdx].duration.mapDuration
1411			<< (int)samples[sampleNdx].duration.unmapDuration
1412			<< (int)samples[sampleNdx].duration.writeDuration
1413			<< fitResidual
1414			<< tcu::TestLog::EndSample;
1415	}
1416
1417	log << tcu::TestLog::EndSampleList;
1418}
1419
1420void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples)
1421{
1422	log << tcu::TestLog::SampleList("Samples", "Samples")
1423		<< tcu::TestLog::SampleInfo
1424		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1425		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1426		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1427		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1428		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1429		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1430		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1431		<< tcu::TestLog::EndSampleInfo;
1432
1433	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1434	{
1435		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1436		log	<< tcu::TestLog::Sample
1437			<< samples[sampleNdx].writtenSize
1438			<< samples[sampleNdx].bufferSize
1439			<< (int)samples[sampleNdx].duration.totalDuration
1440			<< (int)samples[sampleNdx].duration.mapDuration
1441			<< (int)samples[sampleNdx].duration.unmapDuration
1442			<< (int)samples[sampleNdx].duration.writeDuration
1443			<< fitResidual
1444			<< tcu::TestLog::EndSample;
1445	}
1446
1447	log << tcu::TestLog::EndSampleList;
1448}
1449
1450void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples)
1451{
1452	log << tcu::TestLog::SampleList("Samples", "Samples")
1453		<< tcu::TestLog::SampleInfo
1454		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1455		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1456		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1457		<< tcu::TestLog::ValueInfo("AllocTime",			"Alloc time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1458		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1459		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1460		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1461		<< tcu::TestLog::ValueInfo("FlushTime",			"Flush time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1462		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1463		<< tcu::TestLog::EndSampleInfo;
1464
1465	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1466	{
1467		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1468		log	<< tcu::TestLog::Sample
1469			<< samples[sampleNdx].writtenSize
1470			<< samples[sampleNdx].bufferSize
1471			<< (int)samples[sampleNdx].duration.totalDuration
1472			<< (int)samples[sampleNdx].duration.allocDuration
1473			<< (int)samples[sampleNdx].duration.mapDuration
1474			<< (int)samples[sampleNdx].duration.unmapDuration
1475			<< (int)samples[sampleNdx].duration.writeDuration
1476			<< (int)samples[sampleNdx].duration.flushDuration
1477			<< fitResidual
1478			<< tcu::TestLog::EndSample;
1479	}
1480
1481	log << tcu::TestLog::EndSampleList;
1482}
1483
1484void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples)
1485{
1486	log << tcu::TestLog::SampleList("Samples", "Samples")
1487		<< tcu::TestLog::SampleInfo
1488		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1489		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1490		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1491		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1492		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1493		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1494		<< tcu::TestLog::ValueInfo("FlushTime",			"Flush time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1495		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1496		<< tcu::TestLog::EndSampleInfo;
1497
1498	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1499	{
1500		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1501		log	<< tcu::TestLog::Sample
1502			<< samples[sampleNdx].writtenSize
1503			<< samples[sampleNdx].bufferSize
1504			<< (int)samples[sampleNdx].duration.totalDuration
1505			<< (int)samples[sampleNdx].duration.mapDuration
1506			<< (int)samples[sampleNdx].duration.unmapDuration
1507			<< (int)samples[sampleNdx].duration.writeDuration
1508			<< (int)samples[sampleNdx].duration.flushDuration
1509			<< fitResidual
1510			<< tcu::TestLog::EndSample;
1511	}
1512
1513	log << tcu::TestLog::EndSampleList;
1514}
1515
1516void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderReadDuration> >& samples)
1517{
1518	log << tcu::TestLog::SampleList("Samples", "Samples")
1519		<< tcu::TestLog::SampleInfo
1520		<< tcu::TestLog::ValueInfo("DataSize",			"Data processed",		"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1521		<< tcu::TestLog::ValueInfo("VertexCount",		"Number of vertices",	"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1522		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1523		<< tcu::TestLog::ValueInfo("DrawCallTime",		"Draw call time",		"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1524		<< tcu::TestLog::ValueInfo("ReadTime",			"ReadPixels time",		"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1525		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1526		<< tcu::TestLog::EndSampleInfo;
1527
1528	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1529	{
1530		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1531		log	<< tcu::TestLog::Sample
1532			<< samples[sampleNdx].renderDataSize
1533			<< samples[sampleNdx].numVertices
1534			<< (int)samples[sampleNdx].duration.renderReadDuration
1535			<< (int)samples[sampleNdx].duration.renderDuration
1536			<< (int)samples[sampleNdx].duration.readDuration
1537			<< fitResidual
1538			<< tcu::TestLog::EndSample;
1539	}
1540
1541	log << tcu::TestLog::EndSampleList;
1542}
1543
1544void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples)
1545{
1546	log << tcu::TestLog::SampleList("Samples", "Samples")
1547		<< tcu::TestLog::SampleInfo
1548		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1549		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",		"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1550		<< tcu::TestLog::ValueInfo("UnrelatedUploadSize",	"Unrelated upload size",	"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1551		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1552		<< tcu::TestLog::ValueInfo("DrawCallTime",			"Draw call time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1553		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1554		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1555		<< tcu::TestLog::EndSampleInfo;
1556
1557	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1558	{
1559		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1560		log	<< tcu::TestLog::Sample
1561			<< samples[sampleNdx].renderDataSize
1562			<< samples[sampleNdx].numVertices
1563			<< samples[sampleNdx].unrelatedDataSize
1564			<< (int)samples[sampleNdx].duration.renderReadDuration
1565			<< (int)samples[sampleNdx].duration.renderDuration
1566			<< (int)samples[sampleNdx].duration.readDuration
1567			<< fitResidual
1568			<< tcu::TestLog::EndSample;
1569	}
1570
1571	log << tcu::TestLog::EndSampleList;
1572}
1573
1574void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples)
1575{
1576	log << tcu::TestLog::SampleList("Samples", "Samples")
1577		<< tcu::TestLog::SampleInfo
1578		<< tcu::TestLog::ValueInfo("DataSize",			"Data processed",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1579		<< tcu::TestLog::ValueInfo("UploadSize",		"Data uploaded",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1580		<< tcu::TestLog::ValueInfo("VertexCount",		"Number of vertices",				"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1581		<< tcu::TestLog::ValueInfo("DrawReadTime",		"Draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1582		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1583		<< tcu::TestLog::ValueInfo("Upload time",		"Upload time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1584		<< tcu::TestLog::ValueInfo("DrawCallTime",		"Draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1585		<< tcu::TestLog::ValueInfo("ReadTime",			"ReadPixels time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1586		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1587		<< tcu::TestLog::EndSampleInfo;
1588
1589	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1590	{
1591		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1592		log	<< tcu::TestLog::Sample
1593			<< samples[sampleNdx].renderDataSize
1594			<< samples[sampleNdx].uploadedDataSize
1595			<< samples[sampleNdx].numVertices
1596			<< (int)samples[sampleNdx].duration.renderReadDuration
1597			<< (int)samples[sampleNdx].duration.totalDuration
1598			<< (int)samples[sampleNdx].duration.uploadDuration
1599			<< (int)samples[sampleNdx].duration.renderDuration
1600			<< (int)samples[sampleNdx].duration.readDuration
1601			<< fitResidual
1602			<< tcu::TestLog::EndSample;
1603	}
1604
1605	log << tcu::TestLog::EndSampleList;
1606}
1607
1608void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples)
1609{
1610	log << tcu::TestLog::SampleList("Samples", "Samples")
1611		<< tcu::TestLog::SampleInfo
1612		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1613		<< tcu::TestLog::ValueInfo("UploadSize",			"Data uploaded",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1614		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",				"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1615		<< tcu::TestLog::ValueInfo("UnrelatedUploadSize",	"Unrelated upload size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1616		<< tcu::TestLog::ValueInfo("DrawReadTime",			"Draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1617		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1618		<< tcu::TestLog::ValueInfo("Upload time",			"Upload time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1619		<< tcu::TestLog::ValueInfo("DrawCallTime",			"Draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1620		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1621		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1622		<< tcu::TestLog::EndSampleInfo;
1623
1624	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1625	{
1626		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1627		log	<< tcu::TestLog::Sample
1628			<< samples[sampleNdx].renderDataSize
1629			<< samples[sampleNdx].uploadedDataSize
1630			<< samples[sampleNdx].numVertices
1631			<< samples[sampleNdx].unrelatedDataSize
1632			<< (int)samples[sampleNdx].duration.renderReadDuration
1633			<< (int)samples[sampleNdx].duration.totalDuration
1634			<< (int)samples[sampleNdx].duration.uploadDuration
1635			<< (int)samples[sampleNdx].duration.renderDuration
1636			<< (int)samples[sampleNdx].duration.readDuration
1637			<< fitResidual
1638			<< tcu::TestLog::EndSample;
1639	}
1640
1641	log << tcu::TestLog::EndSampleList;
1642}
1643
1644void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples)
1645{
1646	log << tcu::TestLog::SampleList("Samples", "Samples")
1647		<< tcu::TestLog::SampleInfo
1648		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",						"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1649		<< tcu::TestLog::ValueInfo("UploadSize",			"Data uploaded",						"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1650		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",					"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1651		<< tcu::TestLog::ValueInfo("DrawReadTime",			"Second draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1652		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1653		<< tcu::TestLog::ValueInfo("FirstDrawCallTime",		"First draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1654		<< tcu::TestLog::ValueInfo("Upload time",			"Upload time",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1655		<< tcu::TestLog::ValueInfo("SecondDrawCallTime",	"Second draw call time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1656		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1657		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1658		<< tcu::TestLog::EndSampleInfo;
1659
1660	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1661	{
1662		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1663		log	<< tcu::TestLog::Sample
1664			<< samples[sampleNdx].renderDataSize
1665			<< samples[sampleNdx].uploadedDataSize
1666			<< samples[sampleNdx].numVertices
1667			<< (int)samples[sampleNdx].duration.renderReadDuration
1668			<< (int)samples[sampleNdx].duration.totalDuration
1669			<< (int)samples[sampleNdx].duration.firstRenderDuration
1670			<< (int)samples[sampleNdx].duration.uploadDuration
1671			<< (int)samples[sampleNdx].duration.secondRenderDuration
1672			<< (int)samples[sampleNdx].duration.readDuration
1673			<< fitResidual
1674			<< tcu::TestLog::EndSample;
1675	}
1676
1677	log << tcu::TestLog::EndSampleList;
1678}
1679
1680template <typename SampleType>
1681static UploadSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, bool logBucketPerformance)
1682{
1683	// Assume data is linear with some outliers, fit a line
1684	const LineParametersWithConfidence									theilSenFitting						= fitLineToSamples(samples);
1685	const typename SampleTypeTraits<SampleType>::StatsType	resultStats							= calculateSampleStatistics(theilSenFitting, samples);
1686	float													approximatedTransferRate;
1687	float													approximatedTransferRateNoConstant;
1688
1689	// Output raw samples
1690	{
1691		const tcu::ScopedLogSection	section(log, "Samples", "Samples");
1692		logSampleList(log, theilSenFitting, samples);
1693	}
1694
1695	// Calculate results for different ranges
1696	if (logBucketPerformance)
1697	{
1698		const int										numBuckets				= 4;
1699		int												minBufferSize			= 0;
1700		int												maxBufferSize			= 0;
1701		std::vector<UploadSampleResult<SampleType> >	buckets[numBuckets];
1702
1703		bucketizeSamplesUniformly(samples, &buckets[0], numBuckets, minBufferSize, maxBufferSize);
1704
1705		for (int bucketNdx = 0; bucketNdx < numBuckets; ++bucketNdx)
1706		{
1707			if (buckets[bucketNdx].empty())
1708				continue;
1709
1710			// Print a nice result summary
1711
1712			const int												bucketRangeMin	= minBufferSize + (int)(( bucketNdx    / (float)numBuckets) * (maxBufferSize - minBufferSize));
1713			const int												bucketRangeMax	= minBufferSize + (int)(((bucketNdx+1) / (float)numBuckets) * (maxBufferSize - minBufferSize));
1714			const typename SampleTypeTraits<SampleType>::StatsType	stats			= calculateSampleStatistics(theilSenFitting, buckets[bucketNdx]);
1715			const tcu::ScopedLogSection								section			(log, "BufferSizeRange", std::string("Transfer performance with buffer size in range [").append(getHumanReadableByteSize(bucketRangeMin).append(", ").append(getHumanReadableByteSize(bucketRangeMax).append("]"))));
1716
1717			logMapRangeStats<SampleType>(log, stats);
1718			logUnmapStats<SampleType>(log, stats);
1719			logWriteStats<SampleType>(log, stats);
1720			logFlushStats<SampleType>(log, stats);
1721			logAllocStats<SampleType>(log, stats);
1722
1723			log	<< tcu::TestLog::Float("Min", "Total: Min time", "us", QP_KEY_TAG_TIME, stats.result.minTime)
1724				<< tcu::TestLog::Float("Max", "Total: Max time", "us", QP_KEY_TAG_TIME, stats.result.maxTime)
1725				<< tcu::TestLog::Float("Min90", "Total: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.result.min2DecileTime)
1726				<< tcu::TestLog::Float("Max90", "Total: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.result.max9DecileTime)
1727				<< tcu::TestLog::Float("Median", "Total: Median time", "us", QP_KEY_TAG_TIME, stats.result.medianTime)
1728				<< tcu::TestLog::Float("MedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, stats.medianRate / 1024.0f / 1024.0f)
1729				<< tcu::TestLog::Float("MaxDiff", "Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiffTime)
1730				<< tcu::TestLog::Float("Max90Diff", "90%-Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiff9DecileTime)
1731				<< tcu::TestLog::Float("MedianDiff", "Median difference to approximated", "us", QP_KEY_TAG_TIME, stats.medianDiffTime)
1732				<< tcu::TestLog::Float("MaxRelDiff", "Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.maxRelDiffTime * 100.0f)
1733				<< tcu::TestLog::Float("Max90RelDiff", "90%-Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.max9DecileRelDiffTime * 100.0f)
1734				<< tcu::TestLog::Float("MedianRelDiff", "Median relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.medianRelDiffTime * 100.0f);
1735		}
1736	}
1737
1738	// Contributions
1739	if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
1740	{
1741		const tcu::ScopedLogSection	section(log, "Contribution", "Contributions");
1742
1743		logMapContribution(log, samples, resultStats);
1744		logUnmapContribution(log, samples, resultStats);
1745		logWriteContribution(log, samples, resultStats);
1746		logFlushContribution(log, samples, resultStats);
1747		logAllocContribution(log, samples, resultStats);
1748	}
1749
1750	// Print results
1751	{
1752		const tcu::ScopedLogSection	section(log, "Results", "Results");
1753
1754		const int	medianBufferSize					= (samples.front().bufferSize + samples.back().bufferSize) / 2;
1755		const float	approximatedTransferTime			= (theilSenFitting.offset + theilSenFitting.coefficient * medianBufferSize) / 1000.0f / 1000.0f;
1756		const float	approximatedTransferTimeNoConstant	= (theilSenFitting.coefficient * medianBufferSize) / 1000.0f / 1000.0f;
1757		const float	sampleLinearity						= calculateSampleFitLinearity(samples);
1758		const float	sampleTemporalStability				= calculateSampleTemporalStability(samples);
1759
1760		approximatedTransferRateNoConstant				= medianBufferSize / approximatedTransferTimeNoConstant;
1761		approximatedTransferRate						= medianBufferSize / approximatedTransferTime;
1762
1763		log	<< tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f)
1764			<< tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f)
1765			<< tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset)
1766			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower)
1767			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper)
1768			<< tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f)
1769			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
1770			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
1771			<< tcu::TestLog::Float("ApproximatedTransferRate", "Approximated transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRate / 1024.0f / 1024.0f)
1772			<< tcu::TestLog::Float("ApproximatedTransferRateNoConstant", "Approximated transfer rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRateNoConstant / 1024.0f / 1024.0f)
1773			<< tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime)
1774			<< tcu::TestLog::Float("SampleMedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f);
1775	}
1776
1777	// return approximated transfer rate
1778	{
1779		UploadSampleAnalyzeResult result;
1780
1781		result.transferRateMedian = resultStats.medianRate;
1782		result.transferRateAtRange = approximatedTransferRate;
1783		result.transferRateAtInfinity = approximatedTransferRateNoConstant;
1784
1785		return result;
1786	}
1787}
1788
1789template <typename SampleType>
1790static RenderSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples)
1791{
1792	// Assume data is linear with some outliers, fit a line
1793	const LineParametersWithConfidence									theilSenFitting						= fitLineToSamples(samples);
1794	const typename SampleTypeTraits<SampleType>::StatsType	resultStats							= calculateSampleStatistics(theilSenFitting, samples);
1795	float													approximatedProcessingRate;
1796	float													approximatedProcessingRateNoConstant;
1797
1798	// output raw samples
1799	{
1800		const tcu::ScopedLogSection	section(log, "Samples", "Samples");
1801		logSampleList(log, theilSenFitting, samples);
1802	}
1803
1804	// Contributions
1805	if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
1806	{
1807		const tcu::ScopedLogSection	section(log, "Contribution", "Contributions");
1808
1809		logFirstRenderContribution(log, samples, resultStats);
1810		logUploadContribution(log, samples, resultStats);
1811		logRenderContribution(log, samples, resultStats);
1812		logSecondRenderContribution(log, samples, resultStats);
1813		logReadContribution(log, samples, resultStats);
1814		logTotalContribution(log, samples, resultStats);
1815	}
1816
1817	// print results
1818	{
1819		const tcu::ScopedLogSection	section(log, "Results", "Results");
1820
1821		const int	medianDataSize						= (samples.front().renderDataSize + samples.back().renderDataSize) / 2;
1822		const float	approximatedRenderTime				= (theilSenFitting.offset + theilSenFitting.coefficient * medianDataSize) / 1000.0f / 1000.0f;
1823		const float	approximatedRenderTimeNoConstant	= (theilSenFitting.coefficient * medianDataSize) / 1000.0f / 1000.0f;
1824		const float	sampleLinearity						= calculateSampleFitLinearity(samples);
1825		const float	sampleTemporalStability				= calculateSampleTemporalStability(samples);
1826
1827		approximatedProcessingRateNoConstant			= medianDataSize / approximatedRenderTimeNoConstant;
1828		approximatedProcessingRate						= medianDataSize / approximatedRenderTime;
1829
1830		log	<< tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f)
1831			<< tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f)
1832			<< tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset)
1833			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower)
1834			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper)
1835			<< tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f)
1836			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
1837			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
1838			<< tcu::TestLog::Float("ApproximatedProcessRate", "Approximated processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRate / 1024.0f / 1024.0f)
1839			<< tcu::TestLog::Float("ApproximatedProcessRateNoConstant", "Approximated processing rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRateNoConstant / 1024.0f / 1024.0f)
1840			<< tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime)
1841			<< tcu::TestLog::Float("SampleMedianProcess", "Median processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f);
1842	}
1843
1844	// return approximated render rate
1845	{
1846		RenderSampleAnalyzeResult result;
1847
1848		result.renderRateMedian		= resultStats.medianRate;
1849		result.renderRateAtRange	= approximatedProcessingRate;
1850		result.renderRateAtInfinity = approximatedProcessingRateNoConstant;
1851
1852		return result;
1853	}
1854	return RenderSampleAnalyzeResult();
1855}
1856
1857static void generateTwoPassRandomIterationOrder (std::vector<int>& iterationOrder, int numSamples)
1858{
1859	de::Random	rnd			(0xabc);
1860	const int	midPoint	= (numSamples+1) / 2;		// !< ceil(m_numSamples / 2)
1861
1862	DE_ASSERT((int)iterationOrder.size() == numSamples);
1863
1864	// Two "passes" over range, randomize order in both passes
1865	// This allows to us detect if iterations are not independent
1866	// (first run and later run samples differ significantly?)
1867
1868	for (int sampleNdx = 0; sampleNdx < midPoint; ++sampleNdx)
1869		iterationOrder[sampleNdx] = sampleNdx * 2;
1870	for (int sampleNdx = midPoint; sampleNdx < numSamples; ++sampleNdx)
1871		iterationOrder[sampleNdx] = (sampleNdx - midPoint) * 2 + 1;
1872
1873	for (int ndx = 0; ndx < midPoint; ++ndx)
1874		std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(0, midPoint - 1)]);
1875	for (int ndx = midPoint; ndx < (int)iterationOrder.size(); ++ndx)
1876		std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(midPoint, (int)iterationOrder.size()-1)]);
1877}
1878
1879template <typename SampleType>
1880class BasicBufferCase : public TestCase
1881{
1882public:
1883
1884	enum Flags
1885	{
1886		FLAG_ALLOCATE_LARGER_BUFFER = 0x01,
1887	};
1888							BasicBufferCase		(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags);
1889							~BasicBufferCase	(void);
1890
1891	virtual void			init				(void);
1892	virtual void			deinit				(void);
1893
1894protected:
1895	IterateResult			iterate				(void);
1896
1897	virtual bool			runSample			(int iteration, UploadSampleResult<SampleType>& sample) = 0;
1898	virtual void			logAndSetTestResult	(const std::vector<UploadSampleResult<SampleType> >& results) = 0;
1899
1900	void					disableGLWarmup		(void);
1901	void					waitGLResults		(void);
1902
1903	enum
1904	{
1905		DUMMY_RENDER_AREA_SIZE = 32
1906	};
1907
1908	glu::ShaderProgram*		m_dummyProgram;
1909	deInt32					m_dummyProgramPosLoc;
1910	deUint32				m_bufferID;
1911
1912	const int				m_numSamples;
1913	const int				m_bufferSizeMin;
1914	const int				m_bufferSizeMax;
1915	const bool				m_allocateLargerBuffer;
1916
1917private:
1918	int						m_iteration;
1919	std::vector<int>		m_iterationOrder;
1920	std::vector<UploadSampleResult<SampleType> > m_results;
1921
1922	bool					m_useGL;
1923	int						m_bufferRandomizerTimer;
1924};
1925
1926template <typename SampleType>
1927BasicBufferCase<SampleType>::BasicBufferCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags)
1928	: TestCase					(context, tcu::NODETYPE_PERFORMANCE, name, desc)
1929	, m_dummyProgram			(DE_NULL)
1930	, m_dummyProgramPosLoc		(-1)
1931	, m_bufferID				(0)
1932	, m_numSamples				(numSamples)
1933	, m_bufferSizeMin			(bufferSizeMin)
1934	, m_bufferSizeMax			(bufferSizeMax)
1935	, m_allocateLargerBuffer	((flags & FLAG_ALLOCATE_LARGER_BUFFER) != 0)
1936	, m_iteration				(0)
1937	, m_iterationOrder			(numSamples)
1938	, m_results					(numSamples)
1939	, m_useGL					(true)
1940	, m_bufferRandomizerTimer	(0)
1941{
1942	// "randomize" iteration order. Deterministic, patternless
1943	generateTwoPassRandomIterationOrder(m_iterationOrder, m_numSamples);
1944
1945	// choose buffer sizes
1946	for (int sampleNdx = 0; sampleNdx < m_numSamples; ++sampleNdx)
1947	{
1948		const int rawBufferSize			= (int)deFloatFloor(bufferSizeMin + (bufferSizeMax - bufferSizeMin) * ((float)(sampleNdx + 1) / m_numSamples));
1949		const int bufferSize			= deAlign32(rawBufferSize, 16);
1950		const int allocatedBufferSize	= deAlign32((m_allocateLargerBuffer) ? ((int)(bufferSize * 1.5f)) : (bufferSize), 16);
1951
1952		m_results[sampleNdx].bufferSize		= bufferSize;
1953		m_results[sampleNdx].allocatedSize	= allocatedBufferSize;
1954		m_results[sampleNdx].writtenSize	= -1;
1955	}
1956}
1957
1958template <typename SampleType>
1959BasicBufferCase<SampleType>::~BasicBufferCase (void)
1960{
1961	deinit();
1962}
1963
1964template <typename SampleType>
1965void BasicBufferCase<SampleType>::init (void)
1966{
1967	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1968
1969	if (!m_useGL)
1970		return;
1971
1972	// \note Viewport size is not checked, it won't matter if the render target actually is smaller hhan DUMMY_RENDER_AREA_SIZE
1973
1974	// dummy shader
1975
1976	m_dummyProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_dummyVertexShader) << glu::FragmentSource(s_dummyFragnentShader));
1977	if (!m_dummyProgram->isOk())
1978	{
1979		m_testCtx.getLog() << *m_dummyProgram;
1980		throw tcu::TestError("failed to build shader program");
1981	}
1982
1983	m_dummyProgramPosLoc = gl.getAttribLocation(m_dummyProgram->getProgram(), "a_position");
1984	if (m_dummyProgramPosLoc == -1)
1985		throw tcu::TestError("a_position location was -1");
1986}
1987
1988template <typename SampleType>
1989void BasicBufferCase<SampleType>::deinit (void)
1990{
1991	if (m_bufferID)
1992	{
1993		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
1994		m_bufferID = 0;
1995	}
1996
1997	delete m_dummyProgram;
1998	m_dummyProgram = DE_NULL;
1999}
2000
2001template <typename SampleType>
2002TestCase::IterateResult BasicBufferCase<SampleType>::iterate (void)
2003{
2004	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2005	static bool				buffersWarmedUp	= false;
2006
2007	static const deUint32	usages[] =
2008	{
2009		GL_STREAM_DRAW, GL_STREAM_READ, GL_STREAM_COPY,
2010		GL_STATIC_DRAW, GL_STATIC_READ, GL_STATIC_COPY,
2011		GL_DYNAMIC_DRAW, GL_DYNAMIC_READ, GL_DYNAMIC_COPY,
2012	};
2013
2014	// Allocate some random sized buffers and remove them to
2015	// make sure the first samples too have some buffers removed
2016	// just before their allocation. This is only needed by the
2017	// the first test.
2018
2019	if (m_useGL && !buffersWarmedUp)
2020	{
2021		const int					numRandomBuffers				= 6;
2022		const int					numRepeats						= 10;
2023		const int					maxBufferSize					= 16777216;
2024		const std::vector<deUint8>	zeroData						(maxBufferSize, 0x00);
2025		de::Random					rnd								(0x1234);
2026		deUint32					bufferIDs[numRandomBuffers]		= {0};
2027
2028		gl.useProgram(m_dummyProgram->getProgram());
2029		gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2030		gl.enableVertexAttribArray(m_dummyProgramPosLoc);
2031
2032		for (int ndx = 0; ndx < numRepeats; ++ndx)
2033		{
2034			// Create buffer and maybe draw from it
2035			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2036			{
2037				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2038				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2039
2040				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2041				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2042				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2043
2044				if (rnd.getBool())
2045				{
2046					gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2047					gl.drawArrays(GL_POINTS, 0, 1);
2048					gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2049				}
2050			}
2051
2052			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2053				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2054
2055			waitGLResults();
2056			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2057
2058			m_testCtx.touchWatchdog();
2059		}
2060
2061		buffersWarmedUp = true;
2062		return CONTINUE;
2063	}
2064	else if (m_useGL && m_bufferRandomizerTimer++ % 8 == 0)
2065	{
2066		// Do some random buffer operations to every now and then
2067		// to make sure the previous test iterations won't affect
2068		// following test runs.
2069
2070		const int					numRandomBuffers				= 3;
2071		const int					maxBufferSize					= 16777216;
2072		const std::vector<deUint8>	zeroData						(maxBufferSize, 0x00);
2073		de::Random					rnd								(0x1234 + 0xabc * m_bufferRandomizerTimer);
2074
2075		// BufferData
2076		{
2077			deUint32 bufferIDs[numRandomBuffers] = {0};
2078
2079			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2080			{
2081				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2082				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2083
2084				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2085				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2086				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2087			}
2088
2089			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2090				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2091		}
2092
2093		GLU_EXPECT_NO_ERROR(gl.getError(), "buffer ops");
2094
2095		// Do some memory mappings
2096		{
2097			deUint32 bufferIDs[numRandomBuffers] = {0};
2098
2099			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2100			{
2101				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2102				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2103				void*			ptr;
2104
2105				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2106				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2107				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2108
2109				gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2110				gl.drawArrays(GL_POINTS, 0, 1);
2111				gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2112
2113				if (rnd.getBool())
2114					waitGLResults();
2115
2116				ptr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, randomSize, GL_MAP_WRITE_BIT);
2117				if (ptr)
2118				{
2119					medianTimeMemcpy(ptr, &zeroData[0], randomSize);
2120					gl.unmapBuffer(GL_ARRAY_BUFFER);
2121				}
2122			}
2123
2124			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2125				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2126
2127			waitGLResults();
2128		}
2129
2130		GLU_EXPECT_NO_ERROR(gl.getError(), "buffer maps");
2131		return CONTINUE;
2132	}
2133	else
2134	{
2135		const int	currentIteration	= m_iteration;
2136		const int	sampleNdx			= m_iterationOrder[currentIteration];
2137		const bool	sampleRunSuccessful	= runSample(currentIteration, m_results[sampleNdx]);
2138
2139		GLU_EXPECT_NO_ERROR(gl.getError(), "post runSample()");
2140
2141		// Retry failed samples
2142		if (!sampleRunSuccessful)
2143			return CONTINUE;
2144
2145		if (++m_iteration >= m_numSamples)
2146		{
2147			logAndSetTestResult(m_results);
2148			return STOP;
2149		}
2150		else
2151			return CONTINUE;
2152	}
2153}
2154
2155template <typename SampleType>
2156void BasicBufferCase<SampleType>::disableGLWarmup (void)
2157{
2158	m_useGL = false;
2159}
2160
2161template <typename SampleType>
2162void BasicBufferCase<SampleType>::waitGLResults (void)
2163{
2164	tcu::Surface dummySurface(DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2165	glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess());
2166}
2167
2168template <typename SampleType>
2169class BasicUploadCase : public BasicBufferCase<SampleType>
2170{
2171public:
2172	enum CaseType
2173	{
2174		CASE_NO_BUFFERS = 0,
2175		CASE_NEW_BUFFER,
2176		CASE_UNSPECIFIED_BUFFER,
2177		CASE_SPECIFIED_BUFFER,
2178		CASE_USED_BUFFER,
2179		CASE_USED_LARGER_BUFFER,
2180
2181		CASE_LAST
2182	};
2183
2184	enum CaseFlags
2185	{
2186		FLAG_DONT_LOG_BUFFER_INFO				= 0x01,
2187		FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT	= 0x02,
2188	};
2189
2190	enum ResultType
2191	{
2192		RESULT_MEDIAN_TRANSFER_RATE = 0,
2193		RESULT_ASYMPTOTIC_TRANSFER_RATE,
2194	};
2195
2196						BasicUploadCase		(Context& context,
2197											 const char* name,
2198											 const char* desc,
2199											 int bufferSizeMin,
2200											 int bufferSizeMax,
2201											 int numSamples,
2202											 deUint32 bufferUsage,
2203											 CaseType caseType,
2204											 ResultType resultType,
2205											 int flags = 0);
2206
2207						~BasicUploadCase	(void);
2208
2209	virtual void		init				(void);
2210	virtual void		deinit				(void);
2211
2212private:
2213	bool				runSample			(int iteration, UploadSampleResult<SampleType>& sample);
2214	void				createBuffer		(int bufferSize, int iteration);
2215	void				deleteBuffer		(int bufferSize);
2216	void				useBuffer			(int bufferSize);
2217
2218	virtual void		testBufferUpload	(UploadSampleResult<SampleType>& result, int writeSize) = 0;
2219	void				logAndSetTestResult	(const std::vector<UploadSampleResult<SampleType> >& results);
2220
2221	deUint32			m_dummyBufferID;
2222
2223protected:
2224	const CaseType		m_caseType;
2225	const ResultType	m_resultType;
2226	const deUint32		m_bufferUsage;
2227	const bool			m_logBufferInfo;
2228	const bool			m_bufferUnspecifiedContent;
2229	std::vector<deUint8> m_zeroData;
2230
2231	using BasicBufferCase<SampleType>::m_testCtx;
2232	using BasicBufferCase<SampleType>::m_context;
2233
2234	using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE;
2235	using BasicBufferCase<SampleType>::m_dummyProgram;
2236	using BasicBufferCase<SampleType>::m_dummyProgramPosLoc;
2237	using BasicBufferCase<SampleType>::m_bufferID;
2238	using BasicBufferCase<SampleType>::m_numSamples;
2239	using BasicBufferCase<SampleType>::m_bufferSizeMin;
2240	using BasicBufferCase<SampleType>::m_bufferSizeMax;
2241	using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
2242};
2243
2244template <typename SampleType>
2245BasicUploadCase<SampleType>::BasicUploadCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, deUint32 bufferUsage, CaseType caseType, ResultType resultType, int flags)
2246	: BasicBufferCase<SampleType>	(context, name, desc, bufferSizeMin, bufferSizeMax, numSamples, (caseType == CASE_USED_LARGER_BUFFER) ? (BasicBufferCase<SampleType>::FLAG_ALLOCATE_LARGER_BUFFER) : (0))
2247	, m_dummyBufferID				(0)
2248	, m_caseType					(caseType)
2249	, m_resultType					(resultType)
2250	, m_bufferUsage					(bufferUsage)
2251	, m_logBufferInfo				((flags & FLAG_DONT_LOG_BUFFER_INFO) == 0)
2252	, m_bufferUnspecifiedContent	((flags & FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT) != 0)
2253	, m_zeroData					()
2254{
2255	DE_ASSERT(m_caseType < CASE_LAST);
2256}
2257
2258template <typename SampleType>
2259BasicUploadCase<SampleType>::~BasicUploadCase (void)
2260{
2261	deinit();
2262}
2263
2264template <typename SampleType>
2265void BasicUploadCase<SampleType>::init (void)
2266{
2267	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2268
2269	BasicBufferCase<SampleType>::init();
2270
2271	// zero buffer as upload source
2272	m_zeroData.resize(m_bufferSizeMax, 0x00);
2273
2274	// dummy buffer
2275
2276	gl.genBuffers(1, &m_dummyBufferID);
2277	GLU_EXPECT_NO_ERROR(gl.getError(), "Gen buf");
2278
2279	// log basic info
2280
2281	m_testCtx.getLog()
2282		<< tcu::TestLog::Message
2283		<< "Testing performance with " << m_numSamples << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n"
2284		<< "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]."
2285		<< tcu::TestLog::EndMessage;
2286
2287	if (m_logBufferInfo)
2288	{
2289		switch (m_caseType)
2290		{
2291			case CASE_NO_BUFFERS:
2292				break;
2293
2294			case CASE_NEW_BUFFER:
2295				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is generated but not specified (i.e glBufferData() not called)." << tcu::TestLog::EndMessage;
2296				break;
2297
2298			case CASE_UNSPECIFIED_BUFFER:
2299				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is allocated with glBufferData(NULL)." << tcu::TestLog::EndMessage;
2300				break;
2301
2302			case CASE_SPECIFIED_BUFFER:
2303				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer contents are specified prior testing with glBufferData(data)." << tcu::TestLog::EndMessage;
2304				break;
2305
2306			case CASE_USED_BUFFER:
2307				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer has been used in drawing before testing." << tcu::TestLog::EndMessage;
2308				break;
2309
2310			case CASE_USED_LARGER_BUFFER:
2311				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is larger and has been used in drawing before testing." << tcu::TestLog::EndMessage;
2312				break;
2313
2314			default:
2315				DE_ASSERT(false);
2316				break;
2317		}
2318	}
2319
2320	if (m_resultType == RESULT_MEDIAN_TRANSFER_RATE)
2321		m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples." << tcu::TestLog::EndMessage;
2322	else if (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE)
2323		m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the asymptotic transfer rate as the buffer size approaches infinity." << tcu::TestLog::EndMessage;
2324	else
2325		DE_ASSERT(false);
2326}
2327
2328template <typename SampleType>
2329void BasicUploadCase<SampleType>::deinit (void)
2330{
2331	if (m_dummyBufferID)
2332	{
2333		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_dummyBufferID);
2334		m_dummyBufferID = 0;
2335	}
2336
2337	m_zeroData.clear();
2338
2339	BasicBufferCase<SampleType>::deinit();
2340}
2341
2342template <typename SampleType>
2343bool BasicUploadCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample)
2344{
2345	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2346	const int				allocatedBufferSize	= sample.allocatedSize;
2347	const int				bufferSize			= sample.bufferSize;
2348
2349	if (m_caseType != CASE_NO_BUFFERS)
2350		createBuffer(iteration, allocatedBufferSize);
2351
2352	// warmup CPU before the test to make sure the power management governor
2353	// keeps us in the "high performance" mode
2354	{
2355		deYield();
2356		tcu::warmupCPU();
2357		deYield();
2358	}
2359
2360	testBufferUpload(sample, bufferSize);
2361	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
2362
2363	if (m_caseType != CASE_NO_BUFFERS)
2364		deleteBuffer(bufferSize);
2365
2366	return true;
2367}
2368
2369template <typename SampleType>
2370void BasicUploadCase<SampleType>::createBuffer (int iteration, int bufferSize)
2371{
2372	DE_ASSERT(!m_bufferID);
2373	DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2374
2375	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2376
2377	// create buffer
2378
2379	if (m_caseType == CASE_NO_BUFFERS)
2380		return;
2381
2382	// create empty buffer
2383
2384	gl.genBuffers(1, &m_bufferID);
2385	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2386	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2387
2388	if (m_caseType == CASE_NEW_BUFFER)
2389	{
2390		// upload something else first, this should reduce noise in samples
2391
2392		de::Random					rng				(0xbadc * iteration);
2393		const int					sizeDelta		= rng.getInt(0, 2097140);
2394		const int					dummyUploadSize = deAlign32(1048576 + sizeDelta, 4*4); // Vary buffer size to make sure it is always reallocated
2395		const std::vector<deUint8>	dummyData		(dummyUploadSize, 0x20);
2396
2397		gl.bindBuffer(GL_ARRAY_BUFFER, m_dummyBufferID);
2398		gl.bufferData(GL_ARRAY_BUFFER, dummyUploadSize, &dummyData[0], m_bufferUsage);
2399
2400		// make sure upload won't interfere with the test
2401		useBuffer(dummyUploadSize);
2402
2403		// don't kill the buffer so that the following upload cannot potentially reuse the buffer
2404
2405		return;
2406	}
2407
2408	// specify it
2409
2410	if (m_caseType == CASE_UNSPECIFIED_BUFFER)
2411		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2412	else
2413	{
2414		const std::vector<deUint8> dummyData(bufferSize, 0x20);
2415		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage);
2416	}
2417
2418	if (m_caseType == CASE_UNSPECIFIED_BUFFER || m_caseType == CASE_SPECIFIED_BUFFER)
2419		return;
2420
2421	// use it and make sure it is uploaded
2422
2423	useBuffer(bufferSize);
2424	DE_ASSERT(m_caseType == CASE_USED_BUFFER || m_caseType == CASE_USED_LARGER_BUFFER);
2425}
2426
2427template <typename SampleType>
2428void BasicUploadCase<SampleType>::deleteBuffer (int bufferSize)
2429{
2430	DE_ASSERT(m_bufferID);
2431	DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2432
2433	// render from the buffer to make sure it actually made it to the gpu. This is to
2434	// make sure that if the upload actually happens later or is happening right now in
2435	// the background, it will not interfere with further test runs
2436
2437	// if buffer contains unspecified content, sourcing data from it results in undefined
2438	// results, possibly including program termination. Specify all data to prevent such
2439	// case from happening
2440
2441	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2442
2443	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2444
2445	if (m_bufferUnspecifiedContent)
2446	{
2447		const std::vector<deUint8> dummyData(bufferSize, 0x20);
2448		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage);
2449
2450		GLU_EXPECT_NO_ERROR(gl.getError(), "re-specify buffer");
2451	}
2452
2453	useBuffer(bufferSize);
2454
2455	gl.deleteBuffers(1, &m_bufferID);
2456	m_bufferID = 0;
2457}
2458
2459template <typename SampleType>
2460void BasicUploadCase<SampleType>::useBuffer (int bufferSize)
2461{
2462	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2463
2464	gl.useProgram(m_dummyProgram->getProgram());
2465
2466	gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2467	gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2468	gl.enableVertexAttribArray(m_dummyProgramPosLoc);
2469
2470	// use whole buffer to make sure buffer is uploaded by drawing first and last
2471	DE_ASSERT(bufferSize % (int)sizeof(float[4]) == 0);
2472	gl.drawArrays(GL_POINTS, 0, 1);
2473	gl.drawArrays(GL_POINTS, bufferSize / (int)sizeof(float[4]) - 1, 1);
2474
2475	BasicBufferCase<SampleType>::waitGLResults();
2476}
2477
2478template <typename SampleType>
2479void BasicUploadCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results)
2480{
2481	const UploadSampleAnalyzeResult	analysis	= analyzeSampleResults(m_testCtx.getLog(), results, true);
2482
2483	// with small buffers, report the median transfer rate of the samples
2484	// with large buffers, report the expected preformance of infinitely large buffers
2485	const float						rate		= (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE) ? (analysis.transferRateAtInfinity) : (analysis.transferRateMedian);
2486
2487	if (rate == std::numeric_limits<float>::infinity())
2488	{
2489		// sample times are 1) invalid or 2) timer resolution too low
2490		// report speed 0 bytes / s since real value cannot be determined
2491		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
2492	}
2493	else
2494	{
2495		// report transfer rate in MB / s
2496		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
2497	}
2498}
2499
2500class ReferenceMemcpyCase : public BasicUploadCase<SingleOperationDuration>
2501{
2502public:
2503				ReferenceMemcpyCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase);
2504				~ReferenceMemcpyCase	(void);
2505
2506	void		init					(void);
2507	void		deinit					(void);
2508private:
2509	void		testBufferUpload		(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2510
2511	std::vector<deUint8> m_dstBuf;
2512};
2513
2514ReferenceMemcpyCase::ReferenceMemcpyCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase)
2515	: BasicUploadCase<SingleOperationDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, 0, CASE_NO_BUFFERS, (largeBuffersCase) ? (RESULT_ASYMPTOTIC_TRANSFER_RATE) : (RESULT_MEDIAN_TRANSFER_RATE))
2516	, m_dstBuf									()
2517{
2518	disableGLWarmup();
2519}
2520
2521ReferenceMemcpyCase::~ReferenceMemcpyCase (void)
2522{
2523}
2524
2525void ReferenceMemcpyCase::init (void)
2526{
2527	// Describe what the test tries to do
2528	m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of memcpy()." << tcu::TestLog::EndMessage;
2529
2530	m_dstBuf.resize(m_bufferSizeMax, 0x00);
2531
2532	BasicUploadCase<SingleOperationDuration>::init();
2533}
2534
2535void ReferenceMemcpyCase::deinit (void)
2536{
2537	m_dstBuf.clear();
2538	BasicUploadCase<SingleOperationDuration>::deinit();
2539}
2540
2541void ReferenceMemcpyCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2542{
2543	// write
2544	result.duration.totalDuration = medianTimeMemcpy(&m_dstBuf[0], &m_zeroData[0], bufferSize);
2545	result.duration.fitResponseDuration = result.duration.totalDuration;
2546
2547	result.writtenSize = bufferSize;
2548}
2549
2550class BufferDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2551{
2552public:
2553				BufferDataUploadCase	(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType);
2554				~BufferDataUploadCase	(void);
2555
2556	void		init					(void);
2557private:
2558	void		testBufferUpload		(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2559};
2560
2561BufferDataUploadCase::BufferDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType)
2562	: BasicUploadCase<SingleOperationDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, caseType, RESULT_MEDIAN_TRANSFER_RATE)
2563{
2564}
2565
2566BufferDataUploadCase::~BufferDataUploadCase (void)
2567{
2568}
2569
2570void BufferDataUploadCase::init (void)
2571{
2572	// Describe what the test tries to do
2573	m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferData() function." << tcu::TestLog::EndMessage;
2574
2575	BasicUploadCase<SingleOperationDuration>::init();
2576}
2577
2578void BufferDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2579{
2580	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2581
2582	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2583
2584	// upload
2585	{
2586		deUint64 startTime;
2587		deUint64 endTime;
2588
2589		startTime = deGetMicroseconds();
2590		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
2591		endTime = deGetMicroseconds();
2592
2593		result.duration.totalDuration = endTime - startTime;
2594		result.duration.fitResponseDuration = result.duration.totalDuration;
2595		result.writtenSize = bufferSize;
2596	}
2597}
2598
2599class BufferSubDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2600{
2601public:
2602	enum Flags
2603	{
2604		FLAG_FULL_UPLOAD			= 0x01,
2605		FLAG_PARTIAL_UPLOAD			= 0x02,
2606		FLAG_INVALIDATE_BEFORE_USE	= 0x04,
2607	};
2608
2609				BufferSubDataUploadCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags);
2610				~BufferSubDataUploadCase	(void);
2611
2612	void		init						(void);
2613private:
2614	void		testBufferUpload			(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2615
2616	const bool	m_fullUpload;
2617	const bool	m_invalidateBeforeUse;
2618};
2619
2620BufferSubDataUploadCase::BufferSubDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags)
2621	: BasicUploadCase<SingleOperationDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, parentCase, RESULT_MEDIAN_TRANSFER_RATE)
2622	, m_fullUpload								((flags & FLAG_FULL_UPLOAD) != 0)
2623	, m_invalidateBeforeUse						((flags & FLAG_INVALIDATE_BEFORE_USE) != 0)
2624{
2625	DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != 0);
2626	DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD));
2627}
2628
2629BufferSubDataUploadCase::~BufferSubDataUploadCase (void)
2630{
2631}
2632
2633void BufferSubDataUploadCase::init (void)
2634{
2635	// Describe what the test tries to do
2636	m_testCtx.getLog()
2637		<< tcu::TestLog::Message
2638		<< "Testing glBufferSubData() function call performance. "
2639		<< ((m_fullUpload) ? ("The whole buffer is updated with glBufferSubData. ") : ("Half of the buffer data is updated with glBufferSubData. "))
2640		<< ((m_invalidateBeforeUse) ? ("The buffer is cleared with glBufferData(..., NULL) before glBufferSubData upload.") : ("")) << "\n"
2641		<< tcu::TestLog::EndMessage;
2642
2643	BasicUploadCase<SingleOperationDuration>::init();
2644}
2645
2646void BufferSubDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2647{
2648	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2649
2650	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2651
2652	// "invalidate", upload null
2653	if (m_invalidateBeforeUse)
2654		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2655
2656	// upload
2657	{
2658		deUint64 startTime;
2659		deUint64 endTime;
2660
2661		startTime = deGetMicroseconds();
2662
2663		if (m_fullUpload)
2664			gl.bufferSubData(GL_ARRAY_BUFFER, 0, bufferSize, &m_zeroData[0]);
2665		else
2666		{
2667			// upload to buffer center
2668			gl.bufferSubData(GL_ARRAY_BUFFER, bufferSize / 4, bufferSize / 2, &m_zeroData[0]);
2669		}
2670
2671		endTime = deGetMicroseconds();
2672
2673		result.duration.totalDuration = endTime - startTime;
2674		result.duration.fitResponseDuration = result.duration.totalDuration;
2675
2676		if (m_fullUpload)
2677			result.writtenSize = bufferSize;
2678		else
2679			result.writtenSize = bufferSize / 2;
2680	}
2681}
2682
2683class MapBufferRangeCase : public BasicUploadCase<MapBufferRangeDuration>
2684{
2685public:
2686	enum Flags
2687	{
2688		FLAG_PARTIAL						= 0x01,
2689		FLAG_MANUAL_INVALIDATION			= 0x02,
2690		FLAG_USE_UNUSED_UNSPECIFIED_BUFFER	= 0x04,
2691		FLAG_USE_UNUSED_SPECIFIED_BUFFER	= 0x08,
2692	};
2693
2694					MapBufferRangeCase			(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags);
2695					~MapBufferRangeCase			(void);
2696
2697	void			init						(void);
2698private:
2699	static CaseType getBaseCaseType				(int caseFlags);
2700	static int		getBaseFlags				(deUint32 mapFlags, int caseFlags);
2701
2702	void			testBufferUpload			(UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize);
2703	void			attemptBufferMap			(UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize);
2704
2705	const bool		m_manualInvalidation;
2706	const bool		m_fullUpload;
2707	const bool		m_useUnusedUnspecifiedBuffer;
2708	const bool		m_useUnusedSpecifiedBuffer;
2709	const deUint32	m_mapFlags;
2710	int				m_unmapFailures;
2711};
2712
2713MapBufferRangeCase::MapBufferRangeCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags)
2714	: BasicUploadCase<MapBufferRangeDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags))
2715	, m_manualInvalidation						((caseFlags&FLAG_MANUAL_INVALIDATION) != 0)
2716	, m_fullUpload								((caseFlags&FLAG_PARTIAL) == 0)
2717	, m_useUnusedUnspecifiedBuffer				((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2718	, m_useUnusedSpecifiedBuffer				((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
2719	, m_mapFlags								(mapFlags)
2720	, m_unmapFailures							(0)
2721{
2722	DE_ASSERT(!(m_useUnusedUnspecifiedBuffer && m_useUnusedSpecifiedBuffer));
2723	DE_ASSERT(!((m_useUnusedUnspecifiedBuffer || m_useUnusedSpecifiedBuffer) && m_manualInvalidation));
2724}
2725
2726MapBufferRangeCase::~MapBufferRangeCase (void)
2727{
2728}
2729
2730void MapBufferRangeCase::init (void)
2731{
2732	// Describe what the test tries to do
2733	m_testCtx.getLog()
2734		<< tcu::TestLog::Message
2735		<< "Testing glMapBufferRange() and glUnmapBuffer() function call performance.\n"
2736		<< ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
2737		<< ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : (""))
2738		<< ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : (""))
2739		<< ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : (""))
2740		<< ((m_manualInvalidation) ? ("The buffer is cleared with glBufferData(..., NULL) before mapping.\n") : (""))
2741		<< "Map bits:\n"
2742		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
2743		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
2744		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
2745		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
2746		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
2747		<< tcu::TestLog::EndMessage;
2748
2749	BasicUploadCase<MapBufferRangeDuration>::init();
2750}
2751
2752MapBufferRangeCase::CaseType MapBufferRangeCase::getBaseCaseType (int caseFlags)
2753{
2754	if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
2755		return CASE_USED_BUFFER;
2756	else
2757		return CASE_NEW_BUFFER;
2758}
2759
2760int MapBufferRangeCase::getBaseFlags (deUint32 mapFlags, int caseFlags)
2761{
2762	int flags = FLAG_DONT_LOG_BUFFER_INFO;
2763
2764	// If buffer contains unspecified data when it is sourced (i.e drawn)
2765	// results are undefined, and system errors may occur. Signal parent
2766	// class to take this into account
2767	if (caseFlags & FLAG_PARTIAL)
2768	{
2769		if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0			||
2770			(caseFlags & FLAG_MANUAL_INVALIDATION) != 0				||
2771			(caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2772		{
2773			flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
2774		}
2775	}
2776
2777	return flags;
2778}
2779
2780void MapBufferRangeCase::testBufferUpload (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize)
2781{
2782	const int unmapFailureThreshold = 4;
2783
2784	for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
2785	{
2786		try
2787		{
2788			attemptBufferMap(result, bufferSize);
2789			return;
2790		}
2791		catch (UnmapFailureError&)
2792		{
2793		}
2794	}
2795
2796	throw tcu::TestError("Unmapping failures exceeded limit");
2797}
2798
2799void MapBufferRangeCase::attemptBufferMap (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize)
2800{
2801	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2802
2803	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2804
2805	if (m_fullUpload)
2806		result.writtenSize = bufferSize;
2807	else
2808		result.writtenSize = bufferSize / 2;
2809
2810	// Create unused buffer
2811
2812	if (m_manualInvalidation || m_useUnusedUnspecifiedBuffer)
2813	{
2814		deUint64 startTime;
2815		deUint64 endTime;
2816
2817		// "invalidate" or allocate, upload null
2818		startTime = deGetMicroseconds();
2819		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2820		endTime = deGetMicroseconds();
2821
2822		result.duration.allocDuration = endTime - startTime;
2823	}
2824	else if (m_useUnusedSpecifiedBuffer)
2825	{
2826		deUint64 startTime;
2827		deUint64 endTime;
2828
2829		// Specify buffer contents
2830		startTime = deGetMicroseconds();
2831		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
2832		endTime = deGetMicroseconds();
2833
2834		result.duration.allocDuration = endTime - startTime;
2835	}
2836	else
2837	{
2838		// No alloc, no time
2839		result.duration.allocDuration = 0;
2840	}
2841
2842	// upload
2843	{
2844		void* mapPtr;
2845
2846		// Map
2847		{
2848			deUint64 startTime;
2849			deUint64 endTime;
2850
2851			startTime = deGetMicroseconds();
2852			if (m_fullUpload)
2853				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, result.writtenSize, m_mapFlags);
2854			else
2855			{
2856				// upload to buffer center
2857				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, result.writtenSize, m_mapFlags);
2858			}
2859			endTime = deGetMicroseconds();
2860
2861			if (!mapPtr)
2862				throw tcu::Exception("MapBufferRange returned NULL");
2863
2864			result.duration.mapDuration = endTime - startTime;
2865		}
2866
2867		// Write
2868		{
2869			result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
2870		}
2871
2872		// Unmap
2873		{
2874			deUint64		startTime;
2875			deUint64		endTime;
2876			glw::GLboolean	unmapSuccessful;
2877
2878			startTime = deGetMicroseconds();
2879			unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
2880			endTime = deGetMicroseconds();
2881
2882			// if unmapping fails, just try again later
2883			if (!unmapSuccessful)
2884				throw UnmapFailureError();
2885
2886			result.duration.unmapDuration = endTime - startTime;
2887		}
2888
2889		result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.allocDuration;
2890		result.duration.fitResponseDuration = result.duration.totalDuration;
2891	}
2892}
2893
2894class MapBufferRangeFlushCase : public BasicUploadCase<MapBufferRangeFlushDuration>
2895{
2896public:
2897	enum Flags
2898	{
2899		FLAG_PARTIAL						= 0x01,
2900		FLAG_FLUSH_IN_PARTS					= 0x02,
2901		FLAG_USE_UNUSED_UNSPECIFIED_BUFFER	= 0x04,
2902		FLAG_USE_UNUSED_SPECIFIED_BUFFER	= 0x08,
2903		FLAG_FLUSH_PARTIAL					= 0x10,
2904	};
2905
2906					MapBufferRangeFlushCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags);
2907					~MapBufferRangeFlushCase	(void);
2908
2909	void			init						(void);
2910private:
2911	static CaseType getBaseCaseType				(int caseFlags);
2912	static int		getBaseFlags				(deUint32 mapFlags, int caseFlags);
2913
2914	void			testBufferUpload			(UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize);
2915	void			attemptBufferMap			(UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize);
2916
2917	const bool		m_fullUpload;
2918	const bool		m_flushInParts;
2919	const bool		m_flushPartial;
2920	const bool		m_useUnusedUnspecifiedBuffer;
2921	const bool		m_useUnusedSpecifiedBuffer;
2922	const deUint32	m_mapFlags;
2923	int				m_unmapFailures;
2924};
2925
2926MapBufferRangeFlushCase::MapBufferRangeFlushCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags)
2927	: BasicUploadCase<MapBufferRangeFlushDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags))
2928	, m_fullUpload									((caseFlags&FLAG_PARTIAL) == 0)
2929	, m_flushInParts								((caseFlags&FLAG_FLUSH_IN_PARTS) != 0)
2930	, m_flushPartial								((caseFlags&FLAG_FLUSH_PARTIAL) != 0)
2931	, m_useUnusedUnspecifiedBuffer					((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2932	, m_useUnusedSpecifiedBuffer					((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
2933	, m_mapFlags									(mapFlags)
2934	, m_unmapFailures								(0)
2935{
2936	DE_ASSERT(!(m_flushPartial && m_flushInParts));
2937	DE_ASSERT(!(m_flushPartial && !m_fullUpload));
2938}
2939
2940MapBufferRangeFlushCase::~MapBufferRangeFlushCase (void)
2941{
2942}
2943
2944void MapBufferRangeFlushCase::init (void)
2945{
2946	// Describe what the test tries to do
2947	m_testCtx.getLog()
2948		<< tcu::TestLog::Message
2949		<< "Testing glMapBufferRange(), glFlushMappedBufferRange() and glUnmapBuffer() function call performance.\n"
2950		<< ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
2951		<< ((m_flushInParts) ?
2952			("The mapped range is partitioned to 4 subranges and each partition is flushed separately.") :
2953			(m_flushPartial) ?
2954				("Half of the buffer range is flushed.") :
2955				("The whole mapped range is flushed in one flush call.")) << "\n"
2956		<< ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : (""))
2957		<< ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : (""))
2958		<< ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : (""))
2959		<< "Map bits:\n"
2960		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
2961		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
2962		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
2963		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
2964		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
2965		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
2966		<< tcu::TestLog::EndMessage;
2967
2968	BasicUploadCase<MapBufferRangeFlushDuration>::init();
2969}
2970
2971MapBufferRangeFlushCase::CaseType MapBufferRangeFlushCase::getBaseCaseType (int caseFlags)
2972{
2973	if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
2974		return CASE_USED_BUFFER;
2975	else
2976		return CASE_NEW_BUFFER;
2977}
2978
2979int MapBufferRangeFlushCase::getBaseFlags (deUint32 mapFlags, int caseFlags)
2980{
2981	int flags = FLAG_DONT_LOG_BUFFER_INFO;
2982
2983	// If buffer contains unspecified data when it is sourced (i.e drawn)
2984	// results are undefined, and system errors may occur. Signal parent
2985	// class to take this into account
2986	if (caseFlags & FLAG_PARTIAL)
2987	{
2988		if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0			||
2989			(caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0	||
2990			(caseFlags & FLAG_FLUSH_PARTIAL) != 0)
2991		{
2992			flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
2993		}
2994	}
2995
2996	return flags;
2997}
2998
2999void MapBufferRangeFlushCase::testBufferUpload (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize)
3000{
3001	const int unmapFailureThreshold = 4;
3002
3003	for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
3004	{
3005		try
3006		{
3007			attemptBufferMap(result, bufferSize);
3008			return;
3009		}
3010		catch (UnmapFailureError&)
3011		{
3012		}
3013	}
3014
3015	throw tcu::TestError("Unmapping failures exceeded limit");
3016}
3017
3018void MapBufferRangeFlushCase::attemptBufferMap (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize)
3019{
3020	const glw::Functions&	gl			= m_context.getRenderContext().getFunctions();
3021	const int				mappedSize	= (m_fullUpload) ? (bufferSize) : (bufferSize / 2);
3022
3023	if (m_fullUpload && !m_flushPartial)
3024		result.writtenSize = bufferSize;
3025	else
3026		result.writtenSize = bufferSize / 2;
3027
3028	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3029
3030	// Create unused buffer
3031
3032	if (m_useUnusedUnspecifiedBuffer)
3033	{
3034		deUint64 startTime;
3035		deUint64 endTime;
3036
3037		// Don't specify contents
3038		startTime = deGetMicroseconds();
3039		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
3040		endTime = deGetMicroseconds();
3041
3042		result.duration.allocDuration = endTime - startTime;
3043	}
3044	else if (m_useUnusedSpecifiedBuffer)
3045	{
3046		deUint64 startTime;
3047		deUint64 endTime;
3048
3049		// Specify buffer contents
3050		startTime = deGetMicroseconds();
3051		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3052		endTime = deGetMicroseconds();
3053
3054		result.duration.allocDuration = endTime - startTime;
3055	}
3056	else
3057	{
3058		// No alloc, no time
3059		result.duration.allocDuration = 0;
3060	}
3061
3062	// upload
3063	{
3064		void* mapPtr;
3065
3066		// Map
3067		{
3068			deUint64 startTime;
3069			deUint64 endTime;
3070
3071			startTime = deGetMicroseconds();
3072			if (m_fullUpload)
3073				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, mappedSize, m_mapFlags);
3074			else
3075			{
3076				// upload to buffer center
3077				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, mappedSize, m_mapFlags);
3078			}
3079			endTime = deGetMicroseconds();
3080
3081			if (!mapPtr)
3082				throw tcu::Exception("MapBufferRange returned NULL");
3083
3084			result.duration.mapDuration = endTime - startTime;
3085		}
3086
3087		// Write
3088		{
3089			if (!m_flushPartial)
3090				result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
3091			else
3092				result.duration.writeDuration = medianTimeMemcpy((deUint8*)mapPtr + bufferSize / 4, &m_zeroData[0], result.writtenSize);
3093		}
3094
3095		// Flush
3096		{
3097			deUint64	startTime;
3098			deUint64	endTime;
3099
3100			startTime = deGetMicroseconds();
3101
3102			if (m_flushPartial)
3103				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, mappedSize/4, mappedSize/2);
3104			else if (!m_flushInParts)
3105				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, mappedSize);
3106			else
3107			{
3108				const int p1 = 0;
3109				const int p2 = mappedSize / 3;
3110				const int p3 = mappedSize / 2;
3111				const int p4 = mappedSize * 2 / 4;
3112				const int p5 = mappedSize;
3113
3114				// flush in mixed order
3115				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p2,	p3-p2);
3116				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p1,	p2-p1);
3117				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p4,	p5-p4);
3118				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p3,	p4-p3);
3119			}
3120
3121			endTime = deGetMicroseconds();
3122
3123			result.duration.flushDuration = endTime - startTime;
3124		}
3125
3126		// Unmap
3127		{
3128			deUint64		startTime;
3129			deUint64		endTime;
3130			glw::GLboolean	unmapSuccessful;
3131
3132			startTime = deGetMicroseconds();
3133			unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
3134			endTime = deGetMicroseconds();
3135
3136			// if unmapping fails, just try again later
3137			if (!unmapSuccessful)
3138				throw UnmapFailureError();
3139
3140			result.duration.unmapDuration = endTime - startTime;
3141		}
3142
3143		result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.flushDuration + result.duration.unmapDuration + result.duration.allocDuration;
3144		result.duration.fitResponseDuration = result.duration.totalDuration;
3145	}
3146}
3147
3148template <typename SampleType>
3149class ModifyAfterBasicCase : public BasicBufferCase<SampleType>
3150{
3151public:
3152						ModifyAfterBasicCase	(Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest);
3153						~ModifyAfterBasicCase	(void);
3154
3155	void				init					(void);
3156	void				deinit					(void);
3157
3158protected:
3159	void				drawBufferRange			(int begin, int end);
3160
3161private:
3162	enum
3163	{
3164		NUM_SAMPLES = 20,
3165	};
3166
3167
3168	bool				runSample				(int iteration, UploadSampleResult<SampleType>& sample);
3169	bool				prepareAndRunTest		(int iteration, UploadSampleResult<SampleType>& result, int bufferSize);
3170	void				logAndSetTestResult		(const std::vector<UploadSampleResult<SampleType> >& results);
3171
3172	virtual void		testWithBufferSize		(UploadSampleResult<SampleType>& result, int bufferSize) = 0;
3173
3174	int					m_unmappingErrors;
3175
3176protected:
3177	const bool			m_bufferUnspecifiedAfterTest;
3178	const deUint32		m_bufferUsage;
3179	std::vector<deUint8> m_zeroData;
3180
3181	using BasicBufferCase<SampleType>::m_testCtx;
3182	using BasicBufferCase<SampleType>::m_context;
3183
3184	using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE;
3185	using BasicBufferCase<SampleType>::m_dummyProgram;
3186	using BasicBufferCase<SampleType>::m_dummyProgramPosLoc;
3187	using BasicBufferCase<SampleType>::m_bufferID;
3188	using BasicBufferCase<SampleType>::m_numSamples;
3189	using BasicBufferCase<SampleType>::m_bufferSizeMin;
3190	using BasicBufferCase<SampleType>::m_bufferSizeMax;
3191	using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
3192};
3193
3194template <typename SampleType>
3195ModifyAfterBasicCase<SampleType>::ModifyAfterBasicCase (Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest)
3196	: BasicBufferCase<SampleType>	(context, name, description, bufferSizeMin, bufferSizeMax, NUM_SAMPLES, 0)
3197	, m_unmappingErrors				(0)
3198	, m_bufferUnspecifiedAfterTest	(bufferUnspecifiedAfterTest)
3199	, m_bufferUsage					(usage)
3200	, m_zeroData					()
3201{
3202}
3203
3204template <typename SampleType>
3205ModifyAfterBasicCase<SampleType>::~ModifyAfterBasicCase (void)
3206{
3207	BasicBufferCase<SampleType>::deinit();
3208}
3209
3210template <typename SampleType>
3211void ModifyAfterBasicCase<SampleType>::init (void)
3212{
3213	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3214
3215	// init parent
3216
3217	BasicBufferCase<SampleType>::init();
3218
3219	// upload source
3220	m_zeroData.resize(m_bufferSizeMax, 0x00);
3221
3222	// log basic info
3223
3224	m_testCtx.getLog()
3225		<< tcu::TestLog::Message
3226		<< "Testing performance with " << (int)NUM_SAMPLES << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n"
3227		<< "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]."
3228		<< tcu::TestLog::EndMessage;
3229
3230	// log which transfer rate is the test result and buffer info
3231
3232	m_testCtx.getLog()
3233		<< tcu::TestLog::Message
3234		<< "Test result is the median transfer rate of the test samples.\n"
3235		<< "Buffer usage = " << glu::getUsageName(m_bufferUsage)
3236		<< tcu::TestLog::EndMessage;
3237
3238	// Set state for drawing so that we don't have to change these during the iteration
3239	{
3240		gl.useProgram(m_dummyProgram->getProgram());
3241		gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
3242		gl.enableVertexAttribArray(m_dummyProgramPosLoc);
3243	}
3244}
3245
3246template <typename SampleType>
3247void ModifyAfterBasicCase<SampleType>::deinit (void)
3248{
3249	m_zeroData.clear();
3250
3251	BasicBufferCase<SampleType>::deinit();
3252}
3253
3254template <typename SampleType>
3255void ModifyAfterBasicCase<SampleType>::drawBufferRange (int begin, int end)
3256{
3257	DE_ASSERT(begin % (int)sizeof(float[4]) == 0);
3258	DE_ASSERT(end % (int)sizeof(float[4]) == 0);
3259
3260	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3261
3262	// use given range
3263	gl.drawArrays(GL_POINTS, begin / (int)sizeof(float[4]), 1);
3264	gl.drawArrays(GL_POINTS, end / (int)sizeof(float[4]) - 1, 1);
3265}
3266
3267template <typename SampleType>
3268bool ModifyAfterBasicCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample)
3269{
3270	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
3271	const int				bufferSize			= sample.bufferSize;
3272	bool					testOk;
3273
3274	testOk = prepareAndRunTest(iteration, sample, bufferSize);
3275	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
3276
3277	if (!testOk)
3278	{
3279		const int unmapFailureThreshold = 4;
3280
3281		// only unmapping error can cause iteration failure
3282		if (++m_unmappingErrors >= unmapFailureThreshold)
3283			throw tcu::TestError("Too many unmapping errors, cannot continue.");
3284
3285		// just try again
3286		return false;
3287	}
3288
3289	return true;
3290}
3291
3292template <typename SampleType>
3293bool ModifyAfterBasicCase<SampleType>::prepareAndRunTest (int iteration, UploadSampleResult<SampleType>& result, int bufferSize)
3294{
3295	DE_UNREF(iteration);
3296
3297	DE_ASSERT(!m_bufferID);
3298	DE_ASSERT(deIsAligned32(bufferSize, 4*4)); // aligned to vec4
3299
3300	const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
3301	bool						testRunOk		= true;
3302	bool						unmappingFailed	= false;
3303
3304	// Upload initial buffer to the GPU...
3305	gl.genBuffers(1, &m_bufferID);
3306	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3307	gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3308
3309	// ...use it...
3310	gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
3311	drawBufferRange(0, bufferSize);
3312
3313	// ..and make sure it is uploaded
3314	BasicBufferCase<SampleType>::waitGLResults();
3315
3316	// warmup CPU before the test to make sure the power management governor
3317	// keeps us in the "high performance" mode
3318	{
3319		deYield();
3320		tcu::warmupCPU();
3321		deYield();
3322	}
3323
3324	// test
3325	try
3326	{
3327		// buffer is uploaded to the GPU. Draw from it.
3328		drawBufferRange(0, bufferSize);
3329
3330		// and test upload
3331		testWithBufferSize(result, bufferSize);
3332	}
3333	catch (UnmapFailureError&)
3334	{
3335		testRunOk = false;
3336		unmappingFailed = true;
3337	}
3338
3339	// clean up: make sure buffer is not in upload queue and delete it
3340
3341	// sourcing unspecified data causes undefined results, possibly program termination
3342	if (m_bufferUnspecifiedAfterTest || unmappingFailed)
3343		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3344
3345	drawBufferRange(0, bufferSize);
3346	BasicBufferCase<SampleType>::waitGLResults();
3347
3348	gl.deleteBuffers(1, &m_bufferID);
3349	m_bufferID = 0;
3350
3351	return testRunOk;
3352}
3353
3354template <typename SampleType>
3355void ModifyAfterBasicCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results)
3356{
3357	const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, false);
3358
3359	// Return median transfer rate of the samples
3360
3361	if (analysis.transferRateMedian == std::numeric_limits<float>::infinity())
3362	{
3363		// sample times are 1) invalid or 2) timer resolution too low
3364		// report speed 0 bytes / s since real value cannot be determined
3365		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
3366	}
3367	else
3368	{
3369		// report transfer rate in MB / s
3370		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(analysis.transferRateMedian / 1024.0f / 1024.0f, 2).c_str());
3371	}
3372}
3373
3374class ModifyAfterWithBufferDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3375{
3376public:
3377
3378	enum CaseFlags
3379	{
3380		FLAG_RESPECIFY_SIZE		= 0x1,
3381		FLAG_UPLOAD_REPEATED	= 0x2,
3382	};
3383
3384					ModifyAfterWithBufferDataCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags);
3385					~ModifyAfterWithBufferDataCase	(void);
3386
3387	void			init							(void);
3388	void			deinit							(void);
3389private:
3390	void			testWithBufferSize				(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
3391
3392	enum
3393	{
3394		NUM_REPEATS = 2
3395	};
3396
3397	const bool		m_respecifySize;
3398	const bool		m_repeatedUpload;
3399	const float		m_sizeDifferenceFactor;
3400};
3401
3402ModifyAfterWithBufferDataCase::ModifyAfterWithBufferDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags)
3403	: ModifyAfterBasicCase<SingleOperationDuration> (context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3404	, m_respecifySize								((flags & FLAG_RESPECIFY_SIZE) != 0)
3405	, m_repeatedUpload								((flags & FLAG_UPLOAD_REPEATED) != 0)
3406	, m_sizeDifferenceFactor						(1.3f)
3407{
3408	DE_ASSERT(!(m_repeatedUpload && m_respecifySize));
3409}
3410
3411ModifyAfterWithBufferDataCase::~ModifyAfterWithBufferDataCase (void)
3412{
3413	deinit();
3414}
3415
3416void ModifyAfterWithBufferDataCase::init (void)
3417{
3418	// Log the purpose of the test
3419
3420	if (m_repeatedUpload)
3421		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3422	else
3423		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage;
3424
3425	m_testCtx.getLog()
3426		<< tcu::TestLog::Message
3427		<< ((m_respecifySize) ?
3428			("Buffer size is increased and contents are modified with BufferData().\n") :
3429			("Buffer contents are modified with BufferData().\n"))
3430		<< tcu::TestLog::EndMessage;
3431
3432	// init parent
3433	ModifyAfterBasicCase<SingleOperationDuration>::init();
3434
3435	// make sure our zeroBuffer is large enough
3436	if (m_respecifySize)
3437	{
3438		const int largerBufferSize = deAlign32((int)(m_bufferSizeMax * m_sizeDifferenceFactor), 4*4);
3439		m_zeroData.resize(largerBufferSize, 0x00);
3440	}
3441}
3442
3443void ModifyAfterWithBufferDataCase::deinit (void)
3444{
3445	ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3446}
3447
3448void ModifyAfterWithBufferDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
3449{
3450	// always draw the same amount to make compares between cases sensible
3451	const int					drawStart			= deAlign32(bufferSize / 4, 4*4);
3452	const int					drawEnd				= deAlign32(bufferSize * 3 / 4, 4*4);
3453
3454	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3455	const int					largerBufferSize	= deAlign32((int)(bufferSize * m_sizeDifferenceFactor), 4*4);
3456	const int					newBufferSize		= (m_respecifySize) ? (largerBufferSize) : (bufferSize);
3457	deUint64					startTime;
3458	deUint64					endTime;
3459
3460	// repeat upload-draw
3461	if (m_repeatedUpload)
3462	{
3463		for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3464		{
3465			gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3466			drawBufferRange(drawStart, drawEnd);
3467		}
3468	}
3469
3470	// test upload
3471	startTime = deGetMicroseconds();
3472	gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3473	endTime = deGetMicroseconds();
3474
3475	result.duration.totalDuration = endTime - startTime;
3476	result.duration.fitResponseDuration = result.duration.totalDuration;
3477	result.writtenSize = newBufferSize;
3478}
3479
3480class ModifyAfterWithBufferSubDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3481{
3482public:
3483
3484	enum CaseFlags
3485	{
3486		FLAG_PARTIAL			= 0x1,
3487		FLAG_UPLOAD_REPEATED	= 0x2,
3488	};
3489
3490					ModifyAfterWithBufferSubDataCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags);
3491					~ModifyAfterWithBufferSubDataCase	(void);
3492
3493	void			init								(void);
3494	void			deinit								(void);
3495private:
3496	void			testWithBufferSize					(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
3497
3498	enum
3499	{
3500		NUM_REPEATS = 2
3501	};
3502
3503	const bool		m_partialUpload;
3504	const bool		m_repeatedUpload;
3505};
3506
3507ModifyAfterWithBufferSubDataCase::ModifyAfterWithBufferSubDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags)
3508	: ModifyAfterBasicCase<SingleOperationDuration>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3509	, m_partialUpload								((flags & FLAG_PARTIAL) != 0)
3510	, m_repeatedUpload								((flags & FLAG_UPLOAD_REPEATED) != 0)
3511{
3512}
3513
3514ModifyAfterWithBufferSubDataCase::~ModifyAfterWithBufferSubDataCase (void)
3515{
3516	deinit();
3517}
3518
3519void ModifyAfterWithBufferSubDataCase::init (void)
3520{
3521	// Log the purpose of the test
3522
3523	if (m_repeatedUpload)
3524		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3525	else
3526		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage;
3527
3528	m_testCtx.getLog()
3529		<< tcu::TestLog::Message
3530		<< ((m_partialUpload) ?
3531			("Half of the buffer contents are modified.\n") :
3532			("Buffer contents are fully respecified.\n"))
3533		<< tcu::TestLog::EndMessage;
3534
3535	ModifyAfterBasicCase<SingleOperationDuration>::init();
3536}
3537
3538void ModifyAfterWithBufferSubDataCase::deinit (void)
3539{
3540	ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3541}
3542
3543void ModifyAfterWithBufferSubDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
3544{
3545	// always draw the same amount to make compares between cases sensible
3546	const int					drawStart			= deAlign32(bufferSize / 4, 4*4);
3547	const int					drawEnd				= deAlign32(bufferSize * 3 / 4, 4*4);
3548
3549	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3550	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3551	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3552	deUint64					startTime;
3553	deUint64					endTime;
3554
3555	// make upload-draw stream
3556	if (m_repeatedUpload)
3557	{
3558		for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3559		{
3560			gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
3561			drawBufferRange(drawStart, drawEnd);
3562		}
3563	}
3564
3565	// test upload
3566	startTime = deGetMicroseconds();
3567	gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
3568	endTime = deGetMicroseconds();
3569
3570	result.duration.totalDuration = endTime - startTime;
3571	result.duration.fitResponseDuration = result.duration.totalDuration;
3572	result.writtenSize = subdataSize;
3573}
3574
3575class ModifyAfterWithMapBufferRangeCase : public ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>
3576{
3577public:
3578
3579	enum CaseFlags
3580	{
3581		FLAG_PARTIAL = 0x1,
3582	};
3583
3584					ModifyAfterWithMapBufferRangeCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags);
3585					~ModifyAfterWithMapBufferRangeCase	(void);
3586
3587	void			init								(void);
3588	void			deinit								(void);
3589private:
3590	static bool		isBufferUnspecifiedAfterUpload		(int flags, deUint32 mapFlags);
3591	void			testWithBufferSize					(UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize);
3592
3593	const bool		m_partialUpload;
3594	const deUint32	m_mapFlags;
3595};
3596
3597ModifyAfterWithMapBufferRangeCase::ModifyAfterWithMapBufferRangeCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags)
3598	: ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags))
3599	, m_partialUpload										((flags & FLAG_PARTIAL) != 0)
3600	, m_mapFlags											(glMapFlags)
3601{
3602}
3603
3604ModifyAfterWithMapBufferRangeCase::~ModifyAfterWithMapBufferRangeCase (void)
3605{
3606	deinit();
3607}
3608
3609void ModifyAfterWithMapBufferRangeCase::init (void)
3610{
3611	// Log the purpose of the test
3612
3613	m_testCtx.getLog()
3614		<< tcu::TestLog::Message
3615		<< "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n"
3616		<< ((m_partialUpload) ?
3617			("Half of the buffer is mapped.\n") :
3618			("Whole buffer is mapped.\n"))
3619		<< "Map bits:\n"
3620		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3621		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3622		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3623		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3624		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3625		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
3626		<< tcu::TestLog::EndMessage;
3627
3628	ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::init();
3629}
3630
3631void ModifyAfterWithMapBufferRangeCase::deinit (void)
3632{
3633	ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::deinit();
3634}
3635
3636bool ModifyAfterWithMapBufferRangeCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags)
3637{
3638	if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
3639		return true;
3640
3641	return false;
3642}
3643
3644void ModifyAfterWithMapBufferRangeCase::testWithBufferSize (UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize)
3645{
3646	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3647	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3648	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3649	void*						mapPtr;
3650
3651	// map
3652	{
3653		deUint64 startTime;
3654		deUint64 endTime;
3655
3656		startTime = deGetMicroseconds();
3657		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
3658		endTime = deGetMicroseconds();
3659
3660		if (!mapPtr)
3661			throw tcu::TestError("mapBufferRange returned null");
3662
3663		result.duration.mapDuration = endTime - startTime;
3664	}
3665
3666	// write
3667	{
3668		result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
3669	}
3670
3671	// unmap
3672	{
3673		deUint64		startTime;
3674		deUint64		endTime;
3675		glw::GLboolean	unmapSucceeded;
3676
3677		startTime = deGetMicroseconds();
3678		unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
3679		endTime = deGetMicroseconds();
3680
3681		if (unmapSucceeded != GL_TRUE)
3682			throw UnmapFailureError();
3683
3684		result.duration.unmapDuration = endTime - startTime;
3685	}
3686
3687	result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration;
3688	result.duration.fitResponseDuration = result.duration.totalDuration;
3689	result.writtenSize = subdataSize;
3690}
3691
3692class ModifyAfterWithMapBufferFlushCase : public ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>
3693{
3694public:
3695
3696	enum CaseFlags
3697	{
3698		FLAG_PARTIAL = 0x1,
3699	};
3700
3701					ModifyAfterWithMapBufferFlushCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags);
3702					~ModifyAfterWithMapBufferFlushCase	(void);
3703
3704	void			init								(void);
3705	void			deinit								(void);
3706private:
3707	static bool		isBufferUnspecifiedAfterUpload		(int flags, deUint32 mapFlags);
3708	void			testWithBufferSize					(UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize);
3709
3710	const bool		m_partialUpload;
3711	const deUint32	m_mapFlags;
3712};
3713
3714ModifyAfterWithMapBufferFlushCase::ModifyAfterWithMapBufferFlushCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags)
3715	: ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags))
3716	, m_partialUpload											((flags & FLAG_PARTIAL) != 0)
3717	, m_mapFlags												(glMapFlags)
3718{
3719}
3720
3721ModifyAfterWithMapBufferFlushCase::~ModifyAfterWithMapBufferFlushCase (void)
3722{
3723	deinit();
3724}
3725
3726void ModifyAfterWithMapBufferFlushCase::init (void)
3727{
3728	// Log the purpose of the test
3729
3730	m_testCtx.getLog()
3731		<< tcu::TestLog::Message
3732		<< "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n"
3733		<< ((m_partialUpload) ?
3734			("Half of the buffer is mapped.\n") :
3735			("Whole buffer is mapped.\n"))
3736		<< "Map bits:\n"
3737		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3738		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3739		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3740		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3741		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3742		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
3743		<< tcu::TestLog::EndMessage;
3744
3745	ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::init();
3746}
3747
3748void ModifyAfterWithMapBufferFlushCase::deinit (void)
3749{
3750	ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::deinit();
3751}
3752
3753bool ModifyAfterWithMapBufferFlushCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags)
3754{
3755	if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
3756		return true;
3757
3758	return false;
3759}
3760
3761void ModifyAfterWithMapBufferFlushCase::testWithBufferSize (UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize)
3762{
3763	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3764	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3765	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3766	void*						mapPtr;
3767
3768	// map
3769	{
3770		deUint64 startTime;
3771		deUint64 endTime;
3772
3773		startTime = deGetMicroseconds();
3774		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
3775		endTime = deGetMicroseconds();
3776
3777		if (!mapPtr)
3778			throw tcu::TestError("mapBufferRange returned null");
3779
3780		result.duration.mapDuration = endTime - startTime;
3781	}
3782
3783	// write
3784	{
3785		result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
3786	}
3787
3788	// flush
3789	{
3790		deUint64 startTime;
3791		deUint64 endTime;
3792
3793		startTime = deGetMicroseconds();
3794		gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, subdataSize);
3795		endTime = deGetMicroseconds();
3796
3797		result.duration.flushDuration = endTime - startTime;
3798	}
3799
3800	// unmap
3801	{
3802		deUint64		startTime;
3803		deUint64		endTime;
3804		glw::GLboolean	unmapSucceeded;
3805
3806		startTime = deGetMicroseconds();
3807		unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
3808		endTime = deGetMicroseconds();
3809
3810		if (unmapSucceeded != GL_TRUE)
3811			throw UnmapFailureError();
3812
3813		result.duration.unmapDuration = endTime - startTime;
3814	}
3815
3816	result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.flushDuration;
3817	result.duration.fitResponseDuration = result.duration.totalDuration;
3818	result.writtenSize = subdataSize;
3819}
3820
3821enum DrawMethod
3822{
3823	DRAWMETHOD_DRAW_ARRAYS = 0,
3824	DRAWMETHOD_DRAW_ELEMENTS,
3825
3826	DRAWMETHOD_LAST
3827};
3828
3829enum TargetBuffer
3830{
3831	TARGETBUFFER_VERTEX = 0,
3832	TARGETBUFFER_INDEX,
3833
3834	TARGETBUFFER_LAST
3835};
3836
3837enum BufferState
3838{
3839	BUFFERSTATE_NEW = 0,
3840	BUFFERSTATE_EXISTING,
3841
3842	BUFFERSTATE_LAST
3843};
3844
3845enum UploadMethod
3846{
3847	UPLOADMETHOD_BUFFER_DATA = 0,
3848	UPLOADMETHOD_BUFFER_SUB_DATA,
3849	UPLOADMETHOD_MAP_BUFFER_RANGE,
3850
3851	UPLOADMETHOD_LAST
3852};
3853
3854enum UnrelatedBufferType
3855{
3856	UNRELATEDBUFFERTYPE_NONE = 0,
3857	UNRELATEDBUFFERTYPE_VERTEX,
3858
3859	UNRELATEDBUFFERTYPE_LAST
3860};
3861
3862enum UploadRange
3863{
3864	UPLOADRANGE_FULL = 0,
3865	UPLOADRANGE_PARTIAL,
3866
3867	UPLOADRANGE_LAST
3868};
3869
3870struct LayeredGridSpec
3871{
3872	int gridWidth;
3873	int gridHeight;
3874	int gridLayers;
3875};
3876
3877static int getLayeredGridNumVertices (const LayeredGridSpec& scene)
3878{
3879	return scene.gridWidth * scene.gridHeight * scene.gridLayers * 6;
3880}
3881
3882static void generateLayeredGridVertexAttribData4C4V (std::vector<tcu::Vec4>& vertexData, const LayeredGridSpec& scene)
3883{
3884	// interleave color & vertex data
3885	const tcu::Vec4 green	(0.0f, 1.0f, 0.0f, 0.7f);
3886	const tcu::Vec4 yellow	(1.0f, 1.0f, 0.0f, 0.8f);
3887
3888	vertexData.resize(getLayeredGridNumVertices(scene) * 2);
3889
3890	for (int cellY = 0; cellY < scene.gridHeight; ++cellY)
3891	for (int cellX = 0; cellX < scene.gridWidth; ++cellX)
3892	for (int cellZ = 0; cellZ < scene.gridLayers; ++cellZ)
3893	{
3894		const tcu::Vec4	color		= (((cellX + cellY + cellZ) % 2) == 0) ? (green) : (yellow);
3895		const float		cellLeft	= (float(cellX  ) / scene.gridWidth  - 0.5f) * 2.0f;
3896		const float		cellRight	= (float(cellX+1) / scene.gridWidth  - 0.5f) * 2.0f;
3897		const float		cellTop		= (float(cellY+1) / scene.gridHeight - 0.5f) * 2.0f;
3898		const float		cellBottom	= (float(cellY  ) / scene.gridHeight - 0.5f) * 2.0f;
3899
3900		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  0] = color;
3901		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  1] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
3902
3903		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  2] = color;
3904		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  3] = tcu::Vec4(cellLeft, cellBottom, 0.0f, 1.0f);
3905
3906		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  4] = color;
3907		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  5] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
3908
3909		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  6] = color;
3910		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  7] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
3911
3912		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  8] = color;
3913		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  9] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
3914
3915		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 10] = color;
3916		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 11] = tcu::Vec4(cellRight, cellTop, 0.0f, 1.0f);
3917	}
3918}
3919
3920static void generateLayeredGridIndexData (std::vector<deUint32>& indexData, const LayeredGridSpec& scene)
3921{
3922	indexData.resize(getLayeredGridNumVertices(scene) * 2);
3923
3924	for (int ndx = 0; ndx < scene.gridLayers * scene.gridHeight * scene.gridWidth * 6; ++ndx)
3925		indexData[ndx] = ndx;
3926}
3927
3928class RenderPerformanceTestBase : public TestCase
3929{
3930public:
3931							RenderPerformanceTestBase	(Context& context, const char* name, const char* description);
3932							~RenderPerformanceTestBase	(void);
3933
3934protected:
3935	void					init						(void);
3936	void					deinit						(void);
3937
3938	void					waitGLResults				(void) const;
3939	void					setupVertexAttribs			(void) const;
3940
3941	enum
3942	{
3943		RENDER_AREA_SIZE = 128
3944	};
3945
3946private:
3947	glu::ShaderProgram*		m_renderProgram;
3948	int						m_colorLoc;
3949	int						m_positionLoc;
3950};
3951
3952RenderPerformanceTestBase::RenderPerformanceTestBase (Context& context, const char* name, const char* description)
3953	: TestCase			(context, tcu::NODETYPE_PERFORMANCE, name, description)
3954	, m_renderProgram	(DE_NULL)
3955	, m_colorLoc		(0)
3956	, m_positionLoc		(0)
3957{
3958}
3959
3960RenderPerformanceTestBase::~RenderPerformanceTestBase (void)
3961{
3962	deinit();
3963}
3964
3965void RenderPerformanceTestBase::init (void)
3966{
3967	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3968
3969	m_renderProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_colorVertexShader) << glu::FragmentSource(s_colorFragmentShader));
3970	if (!m_renderProgram->isOk())
3971	{
3972		m_testCtx.getLog() << *m_renderProgram;
3973		throw tcu::TestError("could not build program");
3974	}
3975
3976	m_colorLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_color");
3977	m_positionLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_position");
3978
3979	if (m_colorLoc == -1)
3980		throw tcu::TestError("Location of attribute a_color was -1");
3981	if (m_positionLoc == -1)
3982		throw tcu::TestError("Location of attribute a_position was -1");
3983}
3984
3985void RenderPerformanceTestBase::deinit (void)
3986{
3987	delete m_renderProgram;
3988	m_renderProgram = DE_NULL;
3989}
3990
3991void RenderPerformanceTestBase::setupVertexAttribs (void) const
3992{
3993	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3994
3995	// buffers are bound
3996
3997	gl.enableVertexAttribArray(m_colorLoc);
3998	gl.enableVertexAttribArray(m_positionLoc);
3999
4000	gl.vertexAttribPointer(m_colorLoc,    4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), (const tcu::Vec4*)DE_NULL + 0);
4001	gl.vertexAttribPointer(m_positionLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), (const tcu::Vec4*)DE_NULL + 1);
4002
4003	gl.useProgram(m_renderProgram->getProgram());
4004
4005	GLU_EXPECT_NO_ERROR(gl.getError(), "set up rendering");
4006}
4007
4008void RenderPerformanceTestBase::waitGLResults (void) const
4009{
4010	tcu::Surface dummySurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4011	glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess());
4012}
4013
4014template <typename SampleType>
4015class RenderCase : public RenderPerformanceTestBase
4016{
4017public:
4018									RenderCase						(Context& context, const char* name, const char* description, DrawMethod drawMethod);
4019									~RenderCase						(void);
4020
4021protected:
4022	void							init							(void);
4023	void							deinit							(void);
4024
4025private:
4026	IterateResult					iterate							(void);
4027
4028protected:
4029	struct SampleResult
4030	{
4031		LayeredGridSpec					scene;
4032		RenderSampleResult<SampleType>	result;
4033	};
4034
4035	int								getMinWorkloadSize				(void) const;
4036	int								getMaxWorkloadSize				(void) const;
4037	int								getMinWorkloadDataSize			(void) const;
4038	int								getMaxWorkloadDataSize			(void) const;
4039	int								getVertexDataSize				(void) const;
4040	int								getNumSamples					(void) const;
4041	void							uploadScene						(const LayeredGridSpec& scene);
4042
4043	virtual void					runSample						(SampleResult& sample) = 0;
4044	virtual void					logAndSetTestResult				(const std::vector<SampleResult>& results);
4045
4046	void							mapResultsToRenderRateFormat	(std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const;
4047
4048	const DrawMethod				m_drawMethod;
4049
4050private:
4051	glw::GLuint						m_attributeBufferID;
4052	glw::GLuint						m_indexBufferID;
4053	int								m_iterationNdx;
4054	std::vector<int>				m_iterationOrder;
4055	std::vector<SampleResult>		m_results;
4056	int								m_numUnmapFailures;
4057};
4058
4059template <typename SampleType>
4060RenderCase<SampleType>::RenderCase (Context& context, const char* name, const char* description, DrawMethod drawMethod)
4061	: RenderPerformanceTestBase	(context, name, description)
4062	, m_drawMethod				(drawMethod)
4063	, m_attributeBufferID		(0)
4064	, m_indexBufferID			(0)
4065	, m_iterationNdx			(0)
4066	, m_numUnmapFailures		(0)
4067{
4068	DE_ASSERT(drawMethod < DRAWMETHOD_LAST);
4069}
4070
4071template <typename SampleType>
4072RenderCase<SampleType>::~RenderCase (void)
4073{
4074	deinit();
4075}
4076
4077template <typename SampleType>
4078void RenderCase<SampleType>::init (void)
4079{
4080	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
4081
4082	RenderPerformanceTestBase::init();
4083
4084	// requirements
4085
4086	if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
4087		m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
4088		throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target");
4089
4090	// gl state
4091
4092	gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4093
4094	// enable bleding to prevent grid layers from being discarded
4095	gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
4096	gl.blendEquation(GL_FUNC_ADD);
4097	gl.enable(GL_BLEND);
4098
4099	// generate iterations
4100
4101	{
4102		const int gridSizes[] = { 20, 26, 32, 38, 44, 50, 56, 62, 68, 74, 80,  86,  92,  98,  104, 110, 116, 122, 128 };
4103
4104		for (int gridNdx = 0; gridNdx < DE_LENGTH_OF_ARRAY(gridSizes); ++gridNdx)
4105		{
4106			m_results.push_back(SampleResult());
4107
4108			m_results.back().scene.gridHeight = gridSizes[gridNdx];
4109			m_results.back().scene.gridWidth = gridSizes[gridNdx];
4110			m_results.back().scene.gridLayers = 5;
4111
4112			m_results.back().result.numVertices = getLayeredGridNumVertices(m_results.back().scene);
4113
4114			// test cases set these, initialize to dummy values
4115			m_results.back().result.renderDataSize = -1;
4116			m_results.back().result.uploadedDataSize = -1;
4117			m_results.back().result.unrelatedDataSize = -1;
4118		}
4119	}
4120
4121	// randomize iteration order
4122	{
4123		m_iterationOrder.resize(m_results.size());
4124		generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_iterationOrder.size());
4125	}
4126}
4127
4128template <typename SampleType>
4129void RenderCase<SampleType>::deinit (void)
4130{
4131	RenderPerformanceTestBase::deinit();
4132
4133	if (m_attributeBufferID)
4134	{
4135		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_attributeBufferID);
4136		m_attributeBufferID = 0;
4137	}
4138
4139	if (m_indexBufferID)
4140	{
4141		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBufferID);
4142		m_indexBufferID = 0;
4143	}
4144}
4145
4146template <typename SampleType>
4147typename RenderCase<SampleType>::IterateResult RenderCase<SampleType>::iterate (void)
4148{
4149	const int		unmapFailureThreshold	= 3;
4150	const int		currentIteration		= m_iterationNdx;
4151	const int		currentConfigNdx		= m_iterationOrder[currentIteration];
4152	SampleResult&	currentSample			= m_results[currentConfigNdx];
4153
4154	try
4155	{
4156		runSample(currentSample);
4157		++m_iterationNdx;
4158	}
4159	catch (const UnmapFailureError& ex)
4160	{
4161		DE_UNREF(ex);
4162		++m_numUnmapFailures;
4163	}
4164
4165	if (m_numUnmapFailures > unmapFailureThreshold)
4166		throw tcu::TestError("Got too many unmap errors");
4167
4168	if (m_iterationNdx < (int)m_iterationOrder.size())
4169		return CONTINUE;
4170
4171	logAndSetTestResult(m_results);
4172	return STOP;
4173}
4174
4175template <typename SampleType>
4176int RenderCase<SampleType>::getMinWorkloadSize (void) const
4177{
4178	int result = getLayeredGridNumVertices(m_results[0].scene);
4179
4180	for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4181	{
4182		const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4183		result = de::min(result, workloadSize);
4184	}
4185
4186	return result;
4187}
4188
4189template <typename SampleType>
4190int RenderCase<SampleType>::getMaxWorkloadSize (void) const
4191{
4192	int result = getLayeredGridNumVertices(m_results[0].scene);
4193
4194	for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4195	{
4196		const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4197		result = de::max(result, workloadSize);
4198	}
4199
4200	return result;
4201}
4202
4203template <typename SampleType>
4204int RenderCase<SampleType>::getMinWorkloadDataSize (void) const
4205{
4206	return getMinWorkloadSize() * getVertexDataSize();
4207}
4208
4209template <typename SampleType>
4210int RenderCase<SampleType>::getMaxWorkloadDataSize (void) const
4211{
4212	return getMaxWorkloadSize() * getVertexDataSize();
4213}
4214
4215template <typename SampleType>
4216int RenderCase<SampleType>::getVertexDataSize (void) const
4217{
4218	const int numVectors	= 2;
4219	const int vec4Size		= 4 * sizeof(float);
4220
4221	return numVectors * vec4Size;
4222}
4223
4224template <typename SampleType>
4225int RenderCase<SampleType>::getNumSamples (void) const
4226{
4227	return (int)m_results.size();
4228}
4229
4230template <typename SampleType>
4231void RenderCase<SampleType>::uploadScene (const LayeredGridSpec& scene)
4232{
4233	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
4234
4235	// vertex buffer
4236	{
4237		std::vector<tcu::Vec4> vertexData;
4238
4239		generateLayeredGridVertexAttribData4C4V(vertexData, scene);
4240
4241		if (m_attributeBufferID == 0)
4242			gl.genBuffers(1, &m_attributeBufferID);
4243		gl.bindBuffer(GL_ARRAY_BUFFER, m_attributeBufferID);
4244		gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4245	}
4246
4247	// index buffer
4248	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4249	{
4250		std::vector<deUint32> indexData;
4251
4252		generateLayeredGridIndexData(indexData, scene);
4253
4254		if (m_indexBufferID == 0)
4255			gl.genBuffers(1, &m_indexBufferID);
4256		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBufferID);
4257		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4258	}
4259
4260	GLU_EXPECT_NO_ERROR(gl.getError(), "create buffers");
4261}
4262
4263template <typename SampleType>
4264void RenderCase<SampleType>::logAndSetTestResult (const std::vector<SampleResult>& results)
4265{
4266	std::vector<RenderSampleResult<SampleType> > mappedResults;
4267
4268	mapResultsToRenderRateFormat(mappedResults, results);
4269
4270	{
4271		const RenderSampleAnalyzeResult	analysis	= analyzeSampleResults(m_testCtx.getLog(), mappedResults);
4272		const float						rate		= analysis.renderRateAtRange;
4273
4274		if (rate == std::numeric_limits<float>::infinity())
4275		{
4276			// sample times are 1) invalid or 2) timer resolution too low
4277			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
4278		}
4279		else
4280		{
4281			// report transfer rate in millions of MiB/s
4282			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
4283		}
4284	}
4285}
4286
4287template <typename SampleType>
4288void RenderCase<SampleType>::mapResultsToRenderRateFormat (std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const
4289{
4290	dst.resize(src.size());
4291
4292	for (int ndx = 0; ndx < (int)src.size(); ++ndx)
4293		dst[ndx] = src[ndx].result;
4294}
4295
4296class ReferenceRenderTimeCase : public RenderCase<RenderReadDuration>
4297{
4298public:
4299			ReferenceRenderTimeCase		(Context& context, const char* name, const char* description, DrawMethod drawMethod);
4300
4301private:
4302	void	init						(void);
4303	void	runSample					(SampleResult& sample);
4304};
4305
4306ReferenceRenderTimeCase::ReferenceRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod)
4307	: RenderCase<RenderReadDuration>	(context, name, description, drawMethod)
4308{
4309}
4310
4311void ReferenceRenderTimeCase::init (void)
4312{
4313	const char* const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4314
4315	// init parent
4316	RenderCase<RenderReadDuration>::init();
4317
4318	// log
4319	m_testCtx.getLog()
4320		<< tcu::TestLog::Message
4321		<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4322		<< getNumSamples() << " test samples. Sample order is randomized.\n"
4323		<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4324		<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4325		<< "Workload sizes are in the range ["
4326			<< getMinWorkloadSize() << ",  "
4327			<< getMaxWorkloadSize() << "] vertices (["
4328			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4329			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4330		<< "Test result is the approximated total processing rate in MiB / s.\n"
4331		<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4332		<< "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results."
4333		<< tcu::TestLog::EndMessage;
4334}
4335
4336void ReferenceRenderTimeCase::runSample (SampleResult& sample)
4337{
4338	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
4339	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4340	const int				numVertices		= getLayeredGridNumVertices(sample.scene);
4341	const glu::Buffer		arrayBuffer		(m_context.getRenderContext());
4342	const glu::Buffer		indexBuffer		(m_context.getRenderContext());
4343	std::vector<tcu::Vec4>	vertexData;
4344	std::vector<deUint32>	indexData;
4345	deUint64				startTime;
4346	deUint64				endTime;
4347
4348	// generate and upload buffers
4349
4350	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4351	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4352	gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4353
4354	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4355	{
4356		generateLayeredGridIndexData(indexData, sample.scene);
4357		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4358		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4359	}
4360
4361	setupVertexAttribs();
4362
4363	// make sure data is uploaded
4364
4365	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4366		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4367	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4368		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4369	else
4370		DE_ASSERT(false);
4371	waitGLResults();
4372
4373	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4374	gl.clear(GL_COLOR_BUFFER_BIT);
4375	waitGLResults();
4376
4377	tcu::warmupCPU();
4378
4379	// Measure both draw and associated readpixels
4380	{
4381		startTime = deGetMicroseconds();
4382
4383		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4384			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4385		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4386			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4387		else
4388			DE_ASSERT(false);
4389
4390		endTime = deGetMicroseconds();
4391
4392		sample.result.duration.renderDuration = endTime - startTime;
4393	}
4394
4395	{
4396		startTime = deGetMicroseconds();
4397		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4398		endTime = deGetMicroseconds();
4399
4400		sample.result.duration.readDuration = endTime - startTime;
4401	}
4402
4403	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
4404	sample.result.uploadedDataSize = 0;
4405	sample.result.unrelatedDataSize = 0;
4406	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4407	sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4408	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
4409}
4410
4411class UnrelatedUploadRenderTimeCase : public RenderCase<UnrelatedUploadRenderReadDuration>
4412{
4413public:
4414									UnrelatedUploadRenderTimeCase	(Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod);
4415
4416private:
4417	void							init							(void);
4418	void							runSample						(SampleResult& sample);
4419
4420	const UploadMethod				m_unrelatedUploadMethod;
4421};
4422
4423UnrelatedUploadRenderTimeCase::UnrelatedUploadRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod)
4424	: RenderCase<UnrelatedUploadRenderReadDuration>	(context, name, description, drawMethod)
4425	, m_unrelatedUploadMethod						(unrelatedUploadMethod)
4426{
4427	DE_ASSERT(m_unrelatedUploadMethod < UPLOADMETHOD_LAST);
4428}
4429
4430void UnrelatedUploadRenderTimeCase::init (void)
4431{
4432	const char* const	targetFunctionName	= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4433	tcu::MessageBuilder	message				(&m_testCtx.getLog());
4434
4435	// init parent
4436	RenderCase<UnrelatedUploadRenderReadDuration>::init();
4437
4438	// log
4439
4440	message
4441		<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4442		<< "Uploading an unrelated buffer just before issuing the rendering command with "
4443			<< ((m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_DATA)		? ("bufferData")		:
4444				(m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_SUB_DATA)	? ("bufferSubData")		:
4445				(m_unrelatedUploadMethod != UPLOADMETHOD_MAP_BUFFER_RANGE)	? ("mapBufferRange")	:
4446				((const char*)DE_NULL))
4447			<< ".\n"
4448		<< getNumSamples() << " test samples. Sample order is randomized.\n"
4449		<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4450		<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4451		<< "Workload sizes are in the range ["
4452			<< getMinWorkloadSize() << ",  "
4453			<< getMaxWorkloadSize() << "] vertices (["
4454			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4455			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4456		<< "Unrelated upload sizes are in the range ["
4457			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ", "
4458			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "]\n"
4459		<< "Test result is the approximated total processing rate in MiB / s.\n"
4460		<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4461		<< "Note that the data size and the time used in the unrelated upload is not included in the results.\n"
4462		<< "Note! Test result may not be useful as is but instead should be compared against the reference.* group and upload_and_draw.*_and_unrelated_upload group results.\n"
4463		<< tcu::TestLog::EndMessage;
4464}
4465
4466void UnrelatedUploadRenderTimeCase::runSample (SampleResult& sample)
4467{
4468	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
4469	tcu::Surface			resultSurface		(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4470	const int				numVertices			= getLayeredGridNumVertices(sample.scene);
4471	const glu::Buffer		arrayBuffer			(m_context.getRenderContext());
4472	const glu::Buffer		indexBuffer			(m_context.getRenderContext());
4473	const glu::Buffer		unrelatedBuffer		(m_context.getRenderContext());
4474	int						unrelatedUploadSize	= -1;
4475	int						renderUploadSize;
4476	std::vector<tcu::Vec4>	vertexData;
4477	std::vector<deUint32>	indexData;
4478	deUint64				startTime;
4479	deUint64				endTime;
4480
4481	// generate and upload buffers
4482
4483	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4484	renderUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4485
4486	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4487	gl.bufferData(GL_ARRAY_BUFFER, renderUploadSize, &vertexData[0], GL_STATIC_DRAW);
4488
4489	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4490	{
4491		generateLayeredGridIndexData(indexData, sample.scene);
4492		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4493		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4494	}
4495
4496	setupVertexAttribs();
4497
4498	// make sure data is uploaded
4499
4500	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4501		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4502	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4503		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4504	else
4505		DE_ASSERT(false);
4506	waitGLResults();
4507
4508	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4509	gl.clear(GL_COLOR_BUFFER_BIT);
4510	waitGLResults();
4511
4512	tcu::warmupCPU();
4513
4514	// Unrelated upload
4515	if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_DATA)
4516	{
4517		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4518
4519		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4520		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
4521	}
4522	else if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
4523	{
4524		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4525
4526		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4527		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
4528		gl.bufferSubData(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, &vertexData[0]);
4529	}
4530	else if (m_unrelatedUploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
4531	{
4532		void*			mapPtr;
4533		glw::GLboolean	unmapSuccessful;
4534
4535		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4536
4537		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4538		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
4539
4540		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
4541		if (!mapPtr)
4542			throw tcu::Exception("MapBufferRange returned NULL");
4543
4544		deMemcpy(mapPtr, &vertexData[0], unrelatedUploadSize);
4545
4546		// if unmapping fails, just try again later
4547		unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
4548		if (!unmapSuccessful)
4549			throw UnmapFailureError();
4550	}
4551	else
4552		DE_ASSERT(false);
4553
4554	DE_ASSERT(unrelatedUploadSize != -1);
4555
4556	// Measure both draw and associated readpixels
4557	{
4558		startTime = deGetMicroseconds();
4559
4560		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4561			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4562		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4563			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4564		else
4565			DE_ASSERT(false);
4566
4567		endTime = deGetMicroseconds();
4568
4569		sample.result.duration.renderDuration = endTime - startTime;
4570	}
4571
4572	{
4573		startTime = deGetMicroseconds();
4574		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4575		endTime = deGetMicroseconds();
4576
4577		sample.result.duration.readDuration = endTime - startTime;
4578	}
4579
4580	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
4581	sample.result.uploadedDataSize = renderUploadSize;
4582	sample.result.unrelatedDataSize = unrelatedUploadSize;
4583	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4584	sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4585	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
4586}
4587
4588class ReferenceReadPixelsTimeCase : public TestCase
4589{
4590public:
4591					ReferenceReadPixelsTimeCase		(Context& context, const char* name, const char* description);
4592
4593private:
4594	void			init							(void);
4595	IterateResult	iterate							(void);
4596	void			logAndSetTestResult				(void);
4597
4598	enum
4599	{
4600		RENDER_AREA_SIZE = 128
4601	};
4602
4603	const int			m_numSamples;
4604	int					m_sampleNdx;
4605	std::vector<int>	m_samples;
4606};
4607
4608ReferenceReadPixelsTimeCase::ReferenceReadPixelsTimeCase (Context& context, const char* name, const char* description)
4609	: TestCase		(context, tcu::NODETYPE_PERFORMANCE, name, description)
4610	, m_numSamples	(20)
4611	, m_sampleNdx	(0)
4612	, m_samples		(m_numSamples)
4613{
4614}
4615
4616void ReferenceReadPixelsTimeCase::init (void)
4617{
4618	m_testCtx.getLog()
4619		<< tcu::TestLog::Message
4620		<< "Measuring the time used in a single readPixels call with " << m_numSamples << " test samples.\n"
4621		<< "Test result is the median of the samples in microseconds.\n"
4622		<< "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results."
4623		<< tcu::TestLog::EndMessage;
4624}
4625
4626ReferenceReadPixelsTimeCase::IterateResult ReferenceReadPixelsTimeCase::iterate (void)
4627{
4628	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
4629	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4630	deUint64				startTime;
4631	deUint64				endTime;
4632
4633	deYield();
4634	tcu::warmupCPU();
4635	deYield();
4636
4637	// "Render" something and wait for it
4638	gl.clearColor(0.0f, 1.0f, m_sampleNdx / float(m_numSamples), 1.0f);
4639	gl.clear(GL_COLOR_BUFFER_BIT);
4640
4641	// wait for results
4642	glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4643
4644	// measure time used in readPixels
4645	startTime = deGetMicroseconds();
4646	glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4647	endTime = deGetMicroseconds();
4648
4649	m_samples[m_sampleNdx] = (int)(endTime - startTime);
4650
4651	if (++m_sampleNdx < m_numSamples)
4652		return CONTINUE;
4653
4654	logAndSetTestResult();
4655	return STOP;
4656}
4657
4658void ReferenceReadPixelsTimeCase::logAndSetTestResult (void)
4659{
4660	// Log sample list
4661	{
4662		m_testCtx.getLog()
4663			<< tcu::TestLog::SampleList("Samples", "Samples")
4664			<< tcu::TestLog::SampleInfo
4665			<< tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
4666			<< tcu::TestLog::EndSampleInfo;
4667
4668		for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
4669			m_testCtx.getLog()
4670				<< tcu::TestLog::Sample
4671				<< m_samples[sampleNdx]
4672				<< tcu::TestLog::EndSample;
4673
4674		m_testCtx.getLog() << tcu::TestLog::EndSampleList;
4675	}
4676
4677	// Log median
4678	{
4679		float median;
4680		float limit60Low;
4681		float limit60Up;
4682
4683		std::sort(m_samples.begin(), m_samples.end());
4684		median		= linearSample(m_samples, 0.5f);
4685		limit60Low	= linearSample(m_samples, 0.2f);
4686		limit60Up	= linearSample(m_samples, 0.8f);
4687
4688		m_testCtx.getLog()
4689			<< tcu::TestLog::Float("Median", "Median", "us", QP_KEY_TAG_TIME, median)
4690			<< tcu::TestLog::Message
4691			<< "60 % of samples within range:\n"
4692			<< tcu::TestLog::EndMessage
4693			<< tcu::TestLog::Float("Low60Range", "Lower", "us", QP_KEY_TAG_TIME, limit60Low)
4694			<< tcu::TestLog::Float("High60Range", "Upper", "us", QP_KEY_TAG_TIME, limit60Up);
4695
4696		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(median, 2).c_str());
4697	}
4698}
4699
4700template <typename SampleType>
4701class GenericUploadRenderTimeCase : public RenderCase<SampleType>
4702{
4703public:
4704	typedef typename RenderCase<SampleType>::SampleResult SampleResult;
4705
4706							GenericUploadRenderTimeCase	(Context&				context,
4707														 const char*			name,
4708														 const char*			description,
4709														 DrawMethod				method,
4710														 TargetBuffer			targetBuffer,
4711														 UploadMethod			uploadMethod,
4712														 BufferState			bufferState,
4713														 UploadRange			uploadRange,
4714														 UnrelatedBufferType	unrelatedBufferType);
4715
4716private:
4717	void						init					(void);
4718	void						runSample				(SampleResult& sample);
4719
4720	using RenderCase<SampleType>::RENDER_AREA_SIZE;
4721
4722	const TargetBuffer			m_targetBuffer;
4723	const BufferState			m_bufferState;
4724	const UploadMethod			m_uploadMethod;
4725	const UnrelatedBufferType	m_unrelatedBufferType;
4726	const UploadRange			m_uploadRange;
4727
4728	using RenderCase<SampleType>::m_context;
4729	using RenderCase<SampleType>::m_testCtx;
4730	using RenderCase<SampleType>::m_drawMethod;
4731};
4732
4733template <typename SampleType>
4734GenericUploadRenderTimeCase<SampleType>::GenericUploadRenderTimeCase (Context&				context,
4735																	  const char*			name,
4736																	  const char*			description,
4737																	  DrawMethod			method,
4738																	  TargetBuffer			targetBuffer,
4739																	  UploadMethod			uploadMethod,
4740																	  BufferState			bufferState,
4741																	  UploadRange			uploadRange,
4742																	  UnrelatedBufferType	unrelatedBufferType)
4743	: RenderCase<SampleType>	(context, name, description, method)
4744	, m_targetBuffer			(targetBuffer)
4745	, m_bufferState				(bufferState)
4746	, m_uploadMethod			(uploadMethod)
4747	, m_unrelatedBufferType		(unrelatedBufferType)
4748	, m_uploadRange				(uploadRange)
4749{
4750	DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
4751	DE_ASSERT(m_bufferState < BUFFERSTATE_LAST);
4752	DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
4753	DE_ASSERT(m_unrelatedBufferType < UNRELATEDBUFFERTYPE_LAST);
4754	DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
4755}
4756
4757template <typename SampleType>
4758void GenericUploadRenderTimeCase<SampleType>::init (void)
4759{
4760	// init parent
4761	RenderCase<SampleType>::init();
4762
4763	// log
4764	{
4765		const char* const	targetFunctionName		= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4766		const int			perVertexSize			= (m_targetBuffer == TARGETBUFFER_INDEX) ? (sizeof(deUint32)) : (sizeof(tcu::Vec4[2]));
4767		const int			fullMinUploadSize		= RenderCase<SampleType>::getMinWorkloadSize() * perVertexSize;
4768		const int			fullMaxUploadSize		= RenderCase<SampleType>::getMaxWorkloadSize() * perVertexSize;
4769		const int			minUploadSize			= (m_uploadRange == UPLOADRANGE_FULL) ? (fullMinUploadSize) : (deAlign32(fullMinUploadSize/2, 4));
4770		const int			maxUploadSize			= (m_uploadRange == UPLOADRANGE_FULL) ? (fullMaxUploadSize) : (deAlign32(fullMaxUploadSize/2, 4));
4771		const int			minUnrelatedUploadSize	= RenderCase<SampleType>::getMinWorkloadSize() * sizeof(tcu::Vec4[2]);
4772		const int			maxUnrelatedUploadSize	= RenderCase<SampleType>::getMaxWorkloadSize() * sizeof(tcu::Vec4[2]);
4773
4774		m_testCtx.getLog()
4775			<< tcu::TestLog::Message
4776			<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4777			<< "The "
4778				<< ((m_targetBuffer == TARGETBUFFER_INDEX) ? ("index") : ("vertex attrib"))
4779				<< " buffer "
4780				<< ((m_bufferState == BUFFERSTATE_NEW) ? ("") : ("contents "))
4781				<< "sourced by the rendering command "
4782				<< ((m_bufferState == BUFFERSTATE_NEW)		? ("is uploaded ") :
4783					(m_uploadRange == UPLOADRANGE_FULL)		? ("are specified ") :
4784					(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("are updated (partial upload) ") :
4785					((const char*)DE_NULL))
4786				<< "just before issuing the rendering command.\n"
4787			<< ((m_bufferState == BUFFERSTATE_EXISTING) ? ("The buffer has been used in rendering.\n") : ("The buffer is generated just before uploading.\n"))
4788			<< "Buffer "
4789				<< ((m_bufferState == BUFFERSTATE_NEW)		? ("is uploaded") :
4790					(m_uploadRange == UPLOADRANGE_FULL)		? ("contents are specified") :
4791					(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("contents are partially updated") :
4792					((const char*)DE_NULL))
4793				<< " with "
4794				<< ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange"))
4795				<< " command. Usage of the target buffer is DYNAMIC_DRAW.\n"
4796			<< ((m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("Mapping buffer with bits MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT | MAP_INVALIDATE_BUFFER_BIT | MAP_UNSYNCHRONIZED_BIT\n") : (""))
4797			<< ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Uploading an unrelated buffer just before issuing the rendering command with bufferData.\n") : (""))
4798			<< RenderCase<SampleType>::getNumSamples() << " test samples. Sample order is randomized.\n"
4799			<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4800			<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4801			<< "Workload sizes are in the range ["
4802				<< RenderCase<SampleType>::getMinWorkloadSize() << ",  "
4803				<< RenderCase<SampleType>::getMaxWorkloadSize() << "] vertices "
4804				<< "(["
4805				<< getHumanReadableByteSize(RenderCase<SampleType>::getMinWorkloadDataSize()) << ","
4806				<< getHumanReadableByteSize(RenderCase<SampleType>::getMaxWorkloadDataSize()) << "] to be processed).\n"
4807			<< "Upload sizes are in the range ["
4808				<< getHumanReadableByteSize(minUploadSize) << ","
4809				<< getHumanReadableByteSize(maxUploadSize) << "].\n"
4810			<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
4811				("Unrelated upload sizes are in the range [" + getHumanReadableByteSize(minUnrelatedUploadSize) + ", " + getHumanReadableByteSize(maxUnrelatedUploadSize) + "]\n") :
4812				(""))
4813			<< "Test result is the approximated processing rate in MiB / s.\n"
4814			<< "Note that while upload time is measured, the time used is not included in the results.\n"
4815			<< ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Note that the data size and the time used in the unrelated upload is not included in the results.\n") : (""))
4816			<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4817			<< "Note! Test result may not be useful as is but instead should be compared against the reference.* group and other upload_and_draw.* group results.\n"
4818			<< tcu::TestLog::EndMessage;
4819	}
4820}
4821
4822template <typename SampleType>
4823void GenericUploadRenderTimeCase<SampleType>::runSample (SampleResult& sample)
4824{
4825	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
4826	const glu::Buffer		arrayBuffer			(m_context.getRenderContext());
4827	const glu::Buffer		indexBuffer			(m_context.getRenderContext());
4828	const glu::Buffer		unrelatedBuffer		(m_context.getRenderContext());
4829	const int				numVertices			= getLayeredGridNumVertices(sample.scene);
4830	tcu::Surface			resultSurface		(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4831	deUint64				startTime;
4832	deUint64				endTime;
4833	std::vector<tcu::Vec4>	vertexData;
4834	std::vector<deUint32>	indexData;
4835
4836	// create data
4837
4838	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4839	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4840		generateLayeredGridIndexData(indexData, sample.scene);
4841
4842	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4843	gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4844	RenderCase<SampleType>::setupVertexAttribs();
4845
4846	// target should be an exisiting buffer? Draw from it once to make sure it exists on the gpu
4847
4848	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_EXISTING)
4849	{
4850		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_DYNAMIC_DRAW);
4851		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4852	}
4853	else if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_NEW)
4854	{
4855		// do not touch the vertex buffer
4856	}
4857	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_EXISTING)
4858	{
4859		// hint that the target buffer will be modified soon
4860		const glw::GLenum vertexDataUsage	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
4861		const glw::GLenum indexDataUsage	= (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
4862
4863		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], vertexDataUsage);
4864		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], indexDataUsage);
4865		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4866	}
4867	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_NEW)
4868	{
4869		if (m_targetBuffer == TARGETBUFFER_VERTEX)
4870		{
4871			// make the index buffer present on the gpu
4872			// use another vertex buffer to keep original buffer in unused state
4873			const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
4874
4875			gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
4876			RenderCase<SampleType>::setupVertexAttribs();
4877
4878			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4879			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4880			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4881
4882			// restore original state
4883			gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4884			RenderCase<SampleType>::setupVertexAttribs();
4885		}
4886		else if (m_targetBuffer == TARGETBUFFER_INDEX)
4887		{
4888			// make the vertex buffer present on the gpu
4889			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4890			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4891		}
4892		else
4893			DE_ASSERT(false);
4894	}
4895	else
4896		DE_ASSERT(false);
4897
4898	RenderCase<SampleType>::waitGLResults();
4899	GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
4900
4901	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4902	gl.clear(GL_COLOR_BUFFER_BIT);
4903	RenderCase<SampleType>::waitGLResults();
4904
4905	tcu::warmupCPU();
4906
4907	// upload
4908
4909	{
4910		glw::GLenum		target;
4911		glw::GLsizeiptr	size;
4912		glw::GLintptr	offset = 0;
4913		const void*		source;
4914
4915		if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
4916		{
4917			target	= GL_ARRAY_BUFFER;
4918			size	= (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
4919			source	= &vertexData[0];
4920		}
4921		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
4922		{
4923			target	= GL_ELEMENT_ARRAY_BUFFER;
4924			size	= (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32));
4925			source	= &indexData[0];
4926		}
4927		else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
4928		{
4929			DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
4930
4931			target	= GL_ARRAY_BUFFER;
4932			size	= (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
4933			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
4934			source	= (const deUint8*)&vertexData[0] + offset;
4935		}
4936		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
4937		{
4938			DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
4939
4940			// upload to 25% - 75% range
4941			target	= GL_ELEMENT_ARRAY_BUFFER;
4942			size	= (glw::GLsizeiptr)deAlign32((glw::GLsizeiptr)((int)(indexData.size() * sizeof(deUint32))) / 2, 4);
4943			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
4944			source	= (const deUint8*)&indexData[0] + offset;
4945		}
4946		else
4947		{
4948			DE_ASSERT(false);
4949			return;
4950		}
4951
4952		startTime = deGetMicroseconds();
4953
4954		if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
4955			gl.bufferData(target, size, source, GL_DYNAMIC_DRAW);
4956		else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
4957		{
4958			// create buffer storage
4959			if (m_bufferState == BUFFERSTATE_NEW)
4960				gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
4961			gl.bufferSubData(target, offset, size, source);
4962		}
4963		else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
4964		{
4965			void*			mapPtr;
4966			glw::GLboolean	unmapSuccessful;
4967
4968			// create buffer storage
4969			if (m_bufferState == BUFFERSTATE_NEW)
4970				gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
4971
4972			mapPtr = gl.mapBufferRange(target, offset, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
4973			if (!mapPtr)
4974				throw tcu::Exception("MapBufferRange returned NULL");
4975
4976			deMemcpy(mapPtr, source, (int)size);
4977
4978			// if unmapping fails, just try again later
4979			unmapSuccessful = gl.unmapBuffer(target);
4980			if (!unmapSuccessful)
4981				throw UnmapFailureError();
4982		}
4983		else
4984			DE_ASSERT(false);
4985
4986		endTime = deGetMicroseconds();
4987
4988		sample.result.uploadedDataSize = (int)size;
4989		sample.result.duration.uploadDuration = endTime - startTime;
4990	}
4991
4992	// unrelated
4993	if (m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX)
4994	{
4995		const int unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4996
4997		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4998		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
4999		// Attibute pointers are not modified, no need restore state
5000
5001		sample.result.unrelatedDataSize = unrelatedUploadSize;
5002	}
5003
5004	// draw
5005	{
5006		startTime = deGetMicroseconds();
5007
5008		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5009			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5010		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5011			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5012		else
5013			DE_ASSERT(false);
5014
5015		endTime = deGetMicroseconds();
5016
5017		sample.result.duration.renderDuration = endTime - startTime;
5018	}
5019
5020	// read
5021	{
5022		startTime = deGetMicroseconds();
5023		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5024		endTime = deGetMicroseconds();
5025
5026		sample.result.duration.readDuration = endTime - startTime;
5027	}
5028
5029	// set results
5030
5031	sample.result.renderDataSize = RenderCase<SampleType>::getVertexDataSize() * sample.result.numVertices;
5032
5033	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
5034	sample.result.duration.totalDuration = sample.result.duration.uploadDuration + sample.result.duration.renderDuration + sample.result.duration.readDuration;
5035	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
5036}
5037
5038class BufferInUseRenderTimeCase : public RenderCase<RenderUploadRenderReadDuration>
5039{
5040public:
5041	enum MapFlags
5042	{
5043		MAPFLAG_NONE = 0,
5044		MAPFLAG_INVALIDATE_BUFFER,
5045		MAPFLAG_INVALIDATE_RANGE,
5046
5047		MAPFLAG_LAST
5048	};
5049	enum UploadBufferTarget
5050	{
5051		UPLOADBUFFERTARGET_DIFFERENT_BUFFER = 0,
5052		UPLOADBUFFERTARGET_SAME_BUFFER,
5053
5054		UPLOADBUFFERTARGET_LAST
5055	};
5056								BufferInUseRenderTimeCase	(Context&			context,
5057															 const char*		name,
5058															 const char*		description,
5059															 DrawMethod			method,
5060															 MapFlags			mapFlags,
5061															 TargetBuffer		targetBuffer,
5062															 UploadMethod		uploadMethod,
5063															 UploadRange		uploadRange,
5064															 UploadBufferTarget	uploadTarget);
5065
5066private:
5067	void						init						(void);
5068	void						runSample					(SampleResult& sample);
5069
5070	const TargetBuffer			m_targetBuffer;
5071	const UploadMethod			m_uploadMethod;
5072	const UploadRange			m_uploadRange;
5073	const MapFlags				m_mapFlags;
5074	const UploadBufferTarget	m_uploadBufferTarget;
5075};
5076
5077BufferInUseRenderTimeCase::BufferInUseRenderTimeCase (Context&				context,
5078													  const char*			name,
5079													  const char*			description,
5080													  DrawMethod			method,
5081													  MapFlags				mapFlags,
5082													  TargetBuffer			targetBuffer,
5083													  UploadMethod			uploadMethod,
5084													  UploadRange			uploadRange,
5085													  UploadBufferTarget	uploadTarget)
5086	: RenderCase<RenderUploadRenderReadDuration>	(context, name, description, method)
5087	, m_targetBuffer								(targetBuffer)
5088	, m_uploadMethod								(uploadMethod)
5089	, m_uploadRange									(uploadRange)
5090	, m_mapFlags									(mapFlags)
5091	, m_uploadBufferTarget							(uploadTarget)
5092{
5093	DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
5094	DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
5095	DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
5096	DE_ASSERT(m_mapFlags < MAPFLAG_LAST);
5097	DE_ASSERT(m_uploadBufferTarget < UPLOADBUFFERTARGET_LAST);
5098}
5099
5100void BufferInUseRenderTimeCase::init (void)
5101{
5102	RenderCase<RenderUploadRenderReadDuration>::init();
5103
5104	// log
5105	{
5106		const char* const	targetFunctionName		= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
5107		const char* const	uploadFunctionName		= (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange");
5108		const bool			isReferenceCase			= (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER);
5109		tcu::MessageBuilder	message					(&m_testCtx.getLog());
5110
5111		message	<< "Measuring the time used in " << targetFunctionName << " call, a buffer upload, "
5112				<< targetFunctionName << " call using the uploaded buffer and readPixels call with different upload sizes.\n";
5113
5114		if (isReferenceCase)
5115			message << "Rendering:\n"
5116					<< "    before test: create and use buffers B and C\n"
5117					<< "    first draw: render using buffer B\n"
5118					<< ((m_uploadRange == UPLOADRANGE_FULL)		? ("    upload: respecify buffer C contents\n")	:
5119						(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("    upload: modify buffer C contents\n")	:
5120						((const char*)DE_NULL))
5121					<< "    second draw: render using buffer C\n"
5122					<< "    read: readPixels\n";
5123		else
5124			message << "Rendering:\n"
5125					<< "    before test: create and use buffer B\n"
5126					<< "    first draw: render using buffer B\n"
5127					<< ((m_uploadRange == UPLOADRANGE_FULL)		? ("    upload: respecify buffer B contents\n")	:
5128						(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("    upload: modify buffer B contents\n")	:
5129						((const char*)DE_NULL))
5130					<< "    second draw: render using buffer B\n"
5131					<< "    read: readPixels\n";
5132
5133		message	<< "Uploading using " << uploadFunctionName
5134					<< ((m_mapFlags == MAPFLAG_INVALIDATE_RANGE)	? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT")	:
5135						(m_mapFlags == MAPFLAG_INVALIDATE_BUFFER)	? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_BUFFER_BIT")	:
5136						(m_mapFlags == MAPFLAG_NONE)				? ("")														:
5137						((const char*)DE_NULL))
5138					<< "\n"
5139				<< getNumSamples() << " test samples. Sample order is randomized.\n"
5140				<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
5141				<< "Workload sizes are in the range ["
5142					<< getMinWorkloadSize() << ",  "
5143					<< getMaxWorkloadSize() << "] vertices "
5144					<< "(["
5145					<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
5146					<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
5147				<< "Test result is the approximated processing rate in MiB / s of the second draw call and the readPixels call.\n";
5148
5149		if (isReferenceCase)
5150			message	<< "Note! Test result should only be used as a baseline reference result for buffer.render_after_upload.draw_modify_draw test group results.";
5151		else
5152			message	<< "Note! Test result may not be useful as is but instead should be compared against the buffer.render_after_upload.reference.draw_upload_draw group results.\n";
5153
5154		message << tcu::TestLog::EndMessage;
5155	}
5156}
5157
5158void BufferInUseRenderTimeCase::runSample (SampleResult& sample)
5159{
5160	const glw::Functions&	gl						= m_context.getRenderContext().getFunctions();
5161	const glu::Buffer		arrayBuffer				(m_context.getRenderContext());
5162	const glu::Buffer		indexBuffer				(m_context.getRenderContext());
5163	const glu::Buffer		alternativeUploadBuffer	(m_context.getRenderContext());
5164	const int				numVertices				= getLayeredGridNumVertices(sample.scene);
5165	tcu::Surface			resultSurface			(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5166	deUint64				startTime;
5167	deUint64				endTime;
5168	std::vector<tcu::Vec4>	vertexData;
5169	std::vector<deUint32>	indexData;
5170
5171	// create data
5172
5173	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
5174	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5175		generateLayeredGridIndexData(indexData, sample.scene);
5176
5177	// make buffers used
5178
5179	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5180	gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5181	setupVertexAttribs();
5182
5183	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5184	{
5185		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5186		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5187	}
5188	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5189	{
5190		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5191		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW);
5192		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5193	}
5194	else
5195		DE_ASSERT(false);
5196
5197	// another pair of buffers for reference case
5198	if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5199	{
5200		if (m_targetBuffer == TARGETBUFFER_VERTEX)
5201		{
5202			gl.bindBuffer(GL_ARRAY_BUFFER, *alternativeUploadBuffer);
5203			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5204
5205			setupVertexAttribs();
5206			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5207		}
5208		else if (m_targetBuffer == TARGETBUFFER_INDEX)
5209		{
5210			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *alternativeUploadBuffer);
5211			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW);
5212			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5213		}
5214		else
5215			DE_ASSERT(false);
5216
5217		// restore state
5218		gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5219		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5220		setupVertexAttribs();
5221	}
5222
5223	waitGLResults();
5224	GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
5225
5226	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
5227	gl.clear(GL_COLOR_BUFFER_BIT);
5228	waitGLResults();
5229
5230	tcu::warmupCPU();
5231
5232	// first draw
5233	{
5234		startTime = deGetMicroseconds();
5235
5236		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5237			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5238		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5239			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5240		else
5241			DE_ASSERT(false);
5242
5243		endTime = deGetMicroseconds();
5244
5245		sample.result.duration.firstRenderDuration = endTime - startTime;
5246	}
5247
5248	// upload
5249	{
5250		glw::GLenum		target;
5251		glw::GLsizeiptr	size;
5252		glw::GLintptr	offset = 0;
5253		const void*		source;
5254
5255		if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
5256		{
5257			target	= GL_ARRAY_BUFFER;
5258			size	= (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
5259			source	= &vertexData[0];
5260		}
5261		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
5262		{
5263			target	= GL_ELEMENT_ARRAY_BUFFER;
5264			size	= (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32));
5265			source	= &indexData[0];
5266		}
5267		else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5268		{
5269			target	= GL_ARRAY_BUFFER;
5270			size	= (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
5271			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
5272			source	= (const deUint8*)&vertexData[0] + offset;
5273		}
5274		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5275		{
5276			// upload to 25% - 75% range
5277			target	= GL_ELEMENT_ARRAY_BUFFER;
5278			size	= (glw::GLsizeiptr)deAlign32((glw::GLsizeiptr)((int)(indexData.size() * sizeof(deUint32))) / 2, 4);
5279			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
5280			source	= (const deUint8*)&indexData[0] + offset;
5281		}
5282		else
5283		{
5284			DE_ASSERT(false);
5285			return;
5286		}
5287
5288		// reference case? don't modify the buffer in use
5289		if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5290			gl.bindBuffer(target, *alternativeUploadBuffer);
5291
5292		startTime = deGetMicroseconds();
5293
5294		if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5295			gl.bufferData(target, size, source, GL_STREAM_DRAW);
5296		else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5297			gl.bufferSubData(target, offset, size, source);
5298		else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5299		{
5300			const int		mapFlags	= (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER)	? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT)	:
5301										  (m_mapFlags == MAPFLAG_INVALIDATE_RANGE)	? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT)	:
5302										  (-1);
5303			void*			mapPtr;
5304			glw::GLboolean	unmapSuccessful;
5305
5306			mapPtr = gl.mapBufferRange(target, offset, size, mapFlags);
5307			if (!mapPtr)
5308				throw tcu::Exception("MapBufferRange returned NULL");
5309
5310			deMemcpy(mapPtr, source, (int)size);
5311
5312			// if unmapping fails, just try again later
5313			unmapSuccessful = gl.unmapBuffer(target);
5314			if (!unmapSuccessful)
5315				throw UnmapFailureError();
5316		}
5317		else
5318			DE_ASSERT(false);
5319
5320		endTime = deGetMicroseconds();
5321
5322		sample.result.uploadedDataSize = (int)size;
5323		sample.result.duration.uploadDuration = endTime - startTime;
5324	}
5325
5326	// second draw
5327	{
5328		// Source vertex data from alternative buffer in refernce case
5329		if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER && m_targetBuffer == TARGETBUFFER_VERTEX)
5330			setupVertexAttribs();
5331
5332		startTime = deGetMicroseconds();
5333
5334		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5335			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5336		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5337			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5338		else
5339			DE_ASSERT(false);
5340
5341		endTime = deGetMicroseconds();
5342
5343		sample.result.duration.secondRenderDuration = endTime - startTime;
5344	}
5345
5346	// read
5347	{
5348		startTime = deGetMicroseconds();
5349		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5350		endTime = deGetMicroseconds();
5351
5352		sample.result.duration.readDuration = endTime - startTime;
5353	}
5354
5355	// set results
5356
5357	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
5358
5359	sample.result.duration.renderReadDuration	= sample.result.duration.secondRenderDuration + sample.result.duration.readDuration;
5360	sample.result.duration.totalDuration		= sample.result.duration.firstRenderDuration +
5361												  sample.result.duration.uploadDuration +
5362												  sample.result.duration.secondRenderDuration +
5363												  sample.result.duration.readDuration;
5364	sample.result.duration.fitResponseDuration	= sample.result.duration.renderReadDuration;
5365}
5366
5367class UploadWaitDrawCase : public RenderPerformanceTestBase
5368{
5369public:
5370	struct Sample
5371	{
5372		int			numFrames;
5373		deUint64	uploadCallEndTime;
5374	};
5375	struct Result
5376	{
5377		deUint64	uploadDuration;
5378		deUint64	renderDuration;
5379		deUint64	readDuration;
5380		deUint64	renderReadDuration;
5381
5382		deUint64	timeBeforeUse;
5383	};
5384
5385							UploadWaitDrawCase				(Context&		context,
5386															 const char*	name,
5387															 const char*	description,
5388															 DrawMethod		drawMethod,
5389															 TargetBuffer	targetBuffer,
5390															 UploadMethod	uploadMethod,
5391															 BufferState	bufferState);
5392							~UploadWaitDrawCase				(void);
5393
5394private:
5395	void					init							(void);
5396	void					deinit							(void);
5397	IterateResult			iterate							(void);
5398
5399	void					uploadBuffer					(Sample& sample, Result& result);
5400	void					drawFromBuffer					(Sample& sample, Result& result);
5401	void					reuseAndDeleteBuffer			(void);
5402	void					logAndSetTestResult				(void);
5403	void					logSamples						(void);
5404	void					drawMisc						(void);
5405	int						findStabilizationSample			(deUint64 (Result::*target), const char* description);
5406	bool					checkSampleTemporalStability	(deUint64 (Result::*target), const char* description);
5407
5408	const DrawMethod		m_drawMethod;
5409	const TargetBuffer		m_targetBuffer;
5410	const UploadMethod		m_uploadMethod;
5411	const BufferState		m_bufferState;
5412
5413	const int				m_numSamplesPerSwap;
5414	const int				m_numMaxSwaps;
5415
5416	int						m_frameNdx;
5417	int						m_sampleNdx;
5418	int						m_numVertices;
5419
5420	std::vector<tcu::Vec4>	m_vertexData;
5421	std::vector<deUint32>	m_indexData;
5422	std::vector<Sample>		m_samples;
5423	std::vector<Result>		m_results;
5424	std::vector<int>		m_iterationOrder;
5425
5426	deUint32				m_vertexBuffer;
5427	deUint32				m_indexBuffer;
5428	deUint32				m_miscBuffer;
5429	int						m_numMiscVertices;
5430};
5431
5432UploadWaitDrawCase::UploadWaitDrawCase (Context&		context,
5433										const char*		name,
5434										const char*		description,
5435										DrawMethod		drawMethod,
5436										TargetBuffer	targetBuffer,
5437										UploadMethod	uploadMethod,
5438										BufferState		bufferState)
5439	: RenderPerformanceTestBase	(context, name, description)
5440	, m_drawMethod				(drawMethod)
5441	, m_targetBuffer			(targetBuffer)
5442	, m_uploadMethod			(uploadMethod)
5443	, m_bufferState				(bufferState)
5444	, m_numSamplesPerSwap		(10)
5445	, m_numMaxSwaps				(4)
5446	, m_frameNdx				(0)
5447	, m_sampleNdx				(0)
5448	, m_numVertices				(-1)
5449	, m_vertexBuffer			(0)
5450	, m_indexBuffer				(0)
5451	, m_miscBuffer				(0)
5452	, m_numMiscVertices			(-1)
5453{
5454}
5455
5456UploadWaitDrawCase::~UploadWaitDrawCase (void)
5457{
5458	deinit();
5459}
5460
5461void UploadWaitDrawCase::init (void)
5462{
5463	const glw::Functions&	gl						= m_context.getRenderContext().getFunctions();
5464	const int				vertexAttribSize		= (int)sizeof(tcu::Vec4) * 2; // color4, position4
5465	const int				vertexIndexSize			= (int)sizeof(deUint32);
5466	const int				vertexUploadDataSize	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (vertexAttribSize) : (vertexIndexSize);
5467
5468	RenderPerformanceTestBase::init();
5469
5470	// requirements
5471
5472	if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
5473		m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
5474		throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target");
5475
5476	// gl state
5477
5478	gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5479
5480	// enable bleding to prevent grid layers from being discarded
5481
5482	gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
5483	gl.blendEquation(GL_FUNC_ADD);
5484	gl.enable(GL_BLEND);
5485
5486	// scene
5487
5488	{
5489		LayeredGridSpec scene;
5490
5491		// create ~8MB workload with similar characteristics as in the other test
5492		// => makes comparison to other results more straightforward
5493		scene.gridWidth = 93;
5494		scene.gridHeight = 93;
5495		scene.gridLayers = 5;
5496
5497		generateLayeredGridVertexAttribData4C4V(m_vertexData, scene);
5498		generateLayeredGridIndexData(m_indexData, scene);
5499		m_numVertices = getLayeredGridNumVertices(scene);
5500	}
5501
5502	// buffers
5503
5504	if (m_bufferState == BUFFERSTATE_NEW)
5505	{
5506		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5507		{
5508			// reads from two buffers, prepare the static buffer
5509
5510			if (m_targetBuffer == TARGETBUFFER_VERTEX)
5511			{
5512				// index buffer is static, use another vertex buffer to keep original buffer in unused state
5513				const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
5514
5515				gl.genBuffers(1, &m_indexBuffer);
5516				gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
5517				gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5518				gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW);
5519				gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], GL_STATIC_DRAW);
5520
5521				setupVertexAttribs();
5522				gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5523			}
5524			else if (m_targetBuffer == TARGETBUFFER_INDEX)
5525			{
5526				// vertex buffer is static
5527				gl.genBuffers(1, &m_vertexBuffer);
5528				gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5529				gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW);
5530
5531				setupVertexAttribs();
5532				gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5533			}
5534			else
5535				DE_ASSERT(false);
5536		}
5537	}
5538	else if (m_bufferState == BUFFERSTATE_EXISTING)
5539	{
5540		const glw::GLenum vertexUsage	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
5541		const glw::GLenum indexUsage	= (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
5542
5543		gl.genBuffers(1, &m_vertexBuffer);
5544		gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5545		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], vertexUsage);
5546
5547		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5548		{
5549			gl.genBuffers(1, &m_indexBuffer);
5550			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5551			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], indexUsage);
5552		}
5553
5554		setupVertexAttribs();
5555
5556		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5557			gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5558		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5559			gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5560		else
5561			DE_ASSERT(false);
5562	}
5563	else
5564		DE_ASSERT(false);
5565
5566	// misc draw buffer
5567	{
5568		std::vector<tcu::Vec4>	vertexData;
5569		LayeredGridSpec			scene;
5570
5571		// create ~1.5MB workload with similar characteristics
5572		scene.gridWidth = 40;
5573		scene.gridHeight = 40;
5574		scene.gridLayers = 5;
5575
5576		generateLayeredGridVertexAttribData4C4V(vertexData, scene);
5577
5578		gl.genBuffers(1, &m_miscBuffer);
5579		gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
5580		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(sizeof(tcu::Vec4) * vertexData.size()), &vertexData[0], GL_STATIC_DRAW);
5581
5582		m_numMiscVertices = getLayeredGridNumVertices(scene);
5583	}
5584
5585	// iterations
5586	{
5587		m_samples.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap);
5588		m_results.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap);
5589
5590		for (int numSwaps = 0; numSwaps <= m_numMaxSwaps; ++numSwaps)
5591		for (int sampleNdx = 0; sampleNdx < m_numSamplesPerSwap; ++sampleNdx)
5592		{
5593			const int index = numSwaps*m_numSamplesPerSwap + sampleNdx;
5594
5595			m_samples[index].numFrames = numSwaps;
5596		}
5597
5598		m_iterationOrder.resize(m_samples.size());
5599		generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_samples.size());
5600	}
5601
5602	// log
5603	m_testCtx.getLog()
5604		<< tcu::TestLog::Message
5605		<< "Measuring time used in " << ((m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements")) << " and readPixels call.\n"
5606		<< "Drawing using a buffer that has been uploaded N frames ago. Testing with N within range [0, " << m_numMaxSwaps << "].\n"
5607		<< "Uploaded buffer is a " << ((m_targetBuffer == TARGETBUFFER_VERTEX) ? ("vertex attribute") : ("index")) << " buffer.\n"
5608		<< "Uploading using "
5609			<< ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)		? ("bufferData")																							:
5610				(m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)	? ("bufferSubData")																							:
5611				(m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)	? ("mapBufferRange, flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT")	:
5612				((const char*)DE_NULL))
5613			<< "\n"
5614		<< "Upload size is " << getHumanReadableByteSize(m_numVertices * vertexUploadDataSize) << ".\n"
5615		<< ((m_bufferState == BUFFERSTATE_EXISTING) ? ("All test samples use the same buffer object.\n") : (""))
5616		<< "Test result is the number of frames (swaps) required for the render time to stabilize.\n"
5617		<< "Assuming combined time used in the draw call and readPixels call is stabilizes to a constant value.\n"
5618		<< tcu::TestLog::EndMessage;
5619}
5620
5621void UploadWaitDrawCase::deinit (void)
5622{
5623	RenderPerformanceTestBase::deinit();
5624
5625	if (m_vertexBuffer)
5626	{
5627		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_vertexBuffer);
5628		m_vertexBuffer = 0;
5629	}
5630	if (m_indexBuffer)
5631	{
5632		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBuffer);
5633		m_indexBuffer = 0;
5634	}
5635	if (m_miscBuffer)
5636	{
5637		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_miscBuffer);
5638		m_miscBuffer = 0;
5639	}
5640}
5641
5642UploadWaitDrawCase::IterateResult UploadWaitDrawCase::iterate (void)
5643{
5644	const glw::Functions&	gl								= m_context.getRenderContext().getFunctions();
5645	const int				betweenIterationDummyFrameCount = 5; // draw misc between test samples
5646	const int				frameNdx						= m_frameNdx++;
5647	const int				currentSampleNdx				= m_iterationOrder[m_sampleNdx];
5648
5649	// Simulate work for about 8ms
5650	busyWait(8000);
5651
5652	// Dummy rendering during dummy frames
5653	if (frameNdx != m_samples[currentSampleNdx].numFrames)
5654	{
5655		// draw similar from another buffer
5656		drawMisc();
5657	}
5658
5659	if (frameNdx == 0)
5660	{
5661		// upload and start the clock
5662		uploadBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
5663	}
5664
5665	if (frameNdx == m_samples[currentSampleNdx].numFrames) // \note: not else if, m_samples[currentSampleNdx].numFrames can be 0
5666	{
5667		// draw using the uploaded buffer
5668		drawFromBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
5669
5670		// re-use buffer for something else to make sure test iteration do not affect each other
5671		if (m_bufferState == BUFFERSTATE_NEW)
5672			reuseAndDeleteBuffer();
5673	}
5674	else if (frameNdx == m_samples[currentSampleNdx].numFrames + betweenIterationDummyFrameCount)
5675	{
5676		// next sample
5677		++m_sampleNdx;
5678		m_frameNdx = 0;
5679	}
5680
5681	GLU_EXPECT_NO_ERROR(gl.getError(), "post-iterate");
5682
5683	if (m_sampleNdx < (int)m_samples.size())
5684		return CONTINUE;
5685
5686	logAndSetTestResult();
5687	return STOP;
5688}
5689
5690void UploadWaitDrawCase::uploadBuffer (Sample& sample, Result& result)
5691{
5692	const glw::Functions&	gl			= m_context.getRenderContext().getFunctions();
5693	deUint64				startTime;
5694	deUint64				endTime;
5695	glw::GLenum				target;
5696	glw::GLsizeiptr			size;
5697	const void*				source;
5698
5699	// data source
5700
5701	if (m_targetBuffer == TARGETBUFFER_VERTEX)
5702	{
5703		DE_ASSERT((m_vertexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
5704
5705		target	= GL_ARRAY_BUFFER;
5706		size	= (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4));
5707		source	= &m_vertexData[0];
5708	}
5709	else if (m_targetBuffer == TARGETBUFFER_INDEX)
5710	{
5711		DE_ASSERT((m_indexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
5712
5713		target	= GL_ELEMENT_ARRAY_BUFFER;
5714		size	= (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32));
5715		source	= &m_indexData[0];
5716	}
5717	else
5718	{
5719		DE_ASSERT(false);
5720		return;
5721	}
5722
5723	// gen buffer
5724
5725	if (m_bufferState == BUFFERSTATE_NEW)
5726	{
5727		if (m_targetBuffer == TARGETBUFFER_VERTEX)
5728		{
5729			gl.genBuffers(1, &m_vertexBuffer);
5730			gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5731		}
5732		else if (m_targetBuffer == TARGETBUFFER_INDEX)
5733		{
5734			gl.genBuffers(1, &m_indexBuffer);
5735			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5736		}
5737		else
5738			DE_ASSERT(false);
5739
5740		if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA ||
5741			m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5742		{
5743			gl.bufferData(target, size, DE_NULL, GL_STATIC_DRAW);
5744		}
5745	}
5746	else if (m_bufferState == BUFFERSTATE_EXISTING)
5747	{
5748		if (m_targetBuffer == TARGETBUFFER_VERTEX)
5749			gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5750		else if (m_targetBuffer == TARGETBUFFER_INDEX)
5751			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5752		else
5753			DE_ASSERT(false);
5754	}
5755	else
5756		DE_ASSERT(false);
5757
5758	// upload
5759
5760	startTime = deGetMicroseconds();
5761
5762	if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5763		gl.bufferData(target, size, source, GL_STATIC_DRAW);
5764	else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5765		gl.bufferSubData(target, 0, size, source);
5766	else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5767	{
5768		void*			mapPtr;
5769		glw::GLboolean	unmapSuccessful;
5770
5771		mapPtr = gl.mapBufferRange(target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
5772		if (!mapPtr)
5773			throw tcu::Exception("MapBufferRange returned NULL");
5774
5775		deMemcpy(mapPtr, source, (int)size);
5776
5777		// if unmapping fails, just try again later
5778		unmapSuccessful = gl.unmapBuffer(target);
5779		if (!unmapSuccessful)
5780			throw UnmapFailureError();
5781	}
5782	else
5783		DE_ASSERT(false);
5784
5785	endTime = deGetMicroseconds();
5786
5787	sample.uploadCallEndTime = endTime;
5788	result.uploadDuration = endTime - startTime;
5789}
5790
5791void UploadWaitDrawCase::drawFromBuffer (Sample& sample, Result& result)
5792{
5793	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
5794	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5795	deUint64				startTime;
5796	deUint64				endTime;
5797
5798	DE_ASSERT(m_vertexBuffer != 0);
5799	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5800		DE_ASSERT(m_indexBuffer == 0);
5801	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5802		DE_ASSERT(m_indexBuffer != 0);
5803	else
5804		DE_ASSERT(false);
5805
5806	// draw
5807	{
5808		gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5809		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5810			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5811
5812		setupVertexAttribs();
5813
5814		// microseconds passed since return from upload call
5815		result.timeBeforeUse = deGetMicroseconds() - sample.uploadCallEndTime;
5816
5817		startTime = deGetMicroseconds();
5818
5819		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5820			gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5821		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5822			gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5823		else
5824			DE_ASSERT(false);
5825
5826		endTime = deGetMicroseconds();
5827
5828		result.renderDuration = endTime - startTime;
5829	}
5830
5831	// read
5832	{
5833		startTime = deGetMicroseconds();
5834		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5835		endTime = deGetMicroseconds();
5836
5837		result.readDuration = endTime - startTime;
5838	}
5839
5840	result.renderReadDuration = result.renderDuration + result.readDuration;
5841}
5842
5843void UploadWaitDrawCase::reuseAndDeleteBuffer (void)
5844{
5845	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
5846
5847	if (m_targetBuffer == TARGETBUFFER_INDEX)
5848	{
5849		// respecify and delete index buffer
5850		static const deUint32 indices[3] = {1, 3, 8};
5851
5852		DE_ASSERT(m_indexBuffer != 0);
5853
5854		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
5855		gl.drawElements(GL_TRIANGLES, 3, GL_UNSIGNED_INT, DE_NULL);
5856		gl.deleteBuffers(1, &m_indexBuffer);
5857		m_indexBuffer = 0;
5858	}
5859	else if (m_targetBuffer == TARGETBUFFER_VERTEX)
5860	{
5861		// respecify and delete vertex buffer
5862		static const tcu::Vec4 coloredTriangle[6] =
5863		{
5864			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.4f, -0.4f, 0.0f, 1.0f),
5865			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.2f,  0.4f, 0.0f, 1.0f),
5866			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4( 0.8f, -0.1f, 0.0f, 1.0f),
5867		};
5868
5869		DE_ASSERT(m_vertexBuffer != 0);
5870
5871		gl.bufferData(GL_ARRAY_BUFFER, sizeof(coloredTriangle), coloredTriangle, GL_STATIC_DRAW);
5872		gl.drawArrays(GL_TRIANGLES, 0, 3);
5873		gl.deleteBuffers(1, &m_vertexBuffer);
5874		m_vertexBuffer = 0;
5875	}
5876
5877	waitGLResults();
5878}
5879
5880void UploadWaitDrawCase::logAndSetTestResult (void)
5881{
5882	int		uploadStabilization;
5883	int		renderReadStabilization;
5884	int		renderStabilization;
5885	int		readStabilization;
5886	bool	temporallyStable;
5887
5888	{
5889		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Samples", "Result samples");
5890		logSamples();
5891	}
5892
5893	{
5894		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Stabilization", "Sample stability");
5895
5896		// log stabilization points
5897		renderReadStabilization	= findStabilizationSample(&Result::renderReadDuration, "Combined draw and read");
5898		uploadStabilization		= findStabilizationSample(&Result::uploadDuration, "Upload time");
5899		renderStabilization		= findStabilizationSample(&Result::renderDuration, "Draw call time");
5900		readStabilization		= findStabilizationSample(&Result::readDuration, "ReadPixels time");
5901
5902		temporallyStable		= true;
5903		temporallyStable		&= checkSampleTemporalStability(&Result::renderReadDuration, "Combined draw and read");
5904		temporallyStable		&= checkSampleTemporalStability(&Result::uploadDuration, "Upload time");
5905		temporallyStable		&= checkSampleTemporalStability(&Result::renderDuration, "Draw call time");
5906		temporallyStable		&= checkSampleTemporalStability(&Result::readDuration, "ReadPixels time");
5907	}
5908
5909	{
5910		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Results", "Results");
5911
5912		// Check result sanily
5913		if (uploadStabilization != 0)
5914			m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Upload times are not stable, test result may not be accurate." << tcu::TestLog::EndMessage;
5915		if (!temporallyStable)
5916			m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Time samples do not seem to be temporally stable, sample times seem to drift to one direction during test execution." << tcu::TestLog::EndMessage;
5917
5918		// render & read
5919		if (renderReadStabilization == -1)
5920			m_testCtx.getLog() << tcu::TestLog::Message << "Combined time used in draw call and ReadPixels did not stabilize." << tcu::TestLog::EndMessage;
5921		else
5922			m_testCtx.getLog() << tcu::TestLog::Integer("RenderReadStabilizationPoint", "Combined draw call and ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, renderReadStabilization);
5923
5924		// draw call
5925		if (renderStabilization == -1)
5926			m_testCtx.getLog() << tcu::TestLog::Message << "Time used in draw call did not stabilize." << tcu::TestLog::EndMessage;
5927		else
5928			m_testCtx.getLog() << tcu::TestLog::Integer("DrawCallStabilizationPoint", "Draw call time stabilization time", "frames", QP_KEY_TAG_TIME, renderStabilization);
5929
5930		// readpixels
5931		if (readStabilization == -1)
5932			m_testCtx.getLog() << tcu::TestLog::Message << "Time used in ReadPixels did not stabilize." << tcu::TestLog::EndMessage;
5933		else
5934			m_testCtx.getLog() << tcu::TestLog::Integer("ReadPixelsStabilizationPoint", "ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, readStabilization);
5935
5936		// Report renderReadStabilization
5937		if (renderReadStabilization != -1)
5938			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(renderReadStabilization).c_str());
5939		else
5940			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(m_numMaxSwaps).c_str()); // don't report -1
5941	}
5942}
5943
5944void UploadWaitDrawCase::logSamples (void)
5945{
5946	// Inverse m_iterationOrder
5947
5948	std::vector<int> runOrder(m_iterationOrder.size());
5949	for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
5950		runOrder[m_iterationOrder[ndx]] = ndx;
5951
5952	// Log samples
5953
5954	m_testCtx.getLog()
5955		<< tcu::TestLog::SampleList("Samples", "Samples")
5956		<< tcu::TestLog::SampleInfo
5957		<< tcu::TestLog::ValueInfo("NumSwaps",		"SwapBuffers before use",			"",		QP_SAMPLE_VALUE_TAG_PREDICTOR)
5958		<< tcu::TestLog::ValueInfo("Delay",			"Time before use",					"us",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
5959		<< tcu::TestLog::ValueInfo("RunOrder",		"Sample run order",					"",		QP_SAMPLE_VALUE_TAG_PREDICTOR)
5960		<< tcu::TestLog::ValueInfo("DrawReadTime",	"Draw call and ReadPixels time",	"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5961		<< tcu::TestLog::ValueInfo("TotalTime",		"Total time",						"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5962		<< tcu::TestLog::ValueInfo("Upload time",	"Upload time",						"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5963		<< tcu::TestLog::ValueInfo("DrawCallTime",	"Draw call time",					"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5964		<< tcu::TestLog::ValueInfo("ReadTime",		"ReadPixels time",					"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5965		<< tcu::TestLog::EndSampleInfo;
5966
5967	for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
5968		m_testCtx.getLog()
5969			<< tcu::TestLog::Sample
5970			<< m_samples[sampleNdx].numFrames
5971			<< (int)m_results[sampleNdx].timeBeforeUse
5972			<< runOrder[sampleNdx]
5973			<< (int)m_results[sampleNdx].renderReadDuration
5974			<< (int)(m_results[sampleNdx].renderReadDuration + m_results[sampleNdx].uploadDuration)
5975			<< (int)m_results[sampleNdx].uploadDuration
5976			<< (int)m_results[sampleNdx].renderDuration
5977			<< (int)m_results[sampleNdx].readDuration
5978			<< tcu::TestLog::EndSample;
5979
5980	m_testCtx.getLog() << tcu::TestLog::EndSampleList;
5981}
5982
5983void UploadWaitDrawCase::drawMisc (void)
5984{
5985	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
5986
5987	gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
5988	setupVertexAttribs();
5989	gl.drawArrays(GL_TRIANGLES, 0, m_numMiscVertices);
5990}
5991
5992struct DistributionCompareResult
5993{
5994	bool	equal;
5995	float	standardDeviations;
5996};
5997
5998template <typename Comparer>
5999static float sumOfRanks (const std::vector<deUint64>& testSamples, const std::vector<deUint64>& allSamples, const Comparer& comparer)
6000{
6001	float sum = 0;
6002
6003	for (int sampleNdx = 0; sampleNdx < (int)testSamples.size(); ++sampleNdx)
6004	{
6005		const deUint64	testSample		= testSamples[sampleNdx];
6006		const int		lowerIndex		= (int)(std::lower_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6007		const int		upperIndex		= (int)(std::upper_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6008		const int		lowerRank		= lowerIndex + 1;	// convert zero-indexed to rank
6009		const int		upperRank		= upperIndex;		// convert zero-indexed to rank, upperIndex is last equal + 1
6010		const float		rankMidpoint	= (lowerRank + upperRank) / 2.0f;
6011
6012		sum += rankMidpoint;
6013	}
6014
6015	return sum;
6016}
6017
6018template <typename Comparer>
6019static DistributionCompareResult distributionCompare (const std::vector<deUint64>& orderedObservationsA, const std::vector<deUint64>& orderedObservationsB, const Comparer& comparer)
6020{
6021	// Mann�Whitney U test
6022
6023	const int				n1			= (int)orderedObservationsA.size();
6024	const int				n2			= (int)orderedObservationsB.size();
6025	std::vector<deUint64>	allSamples	(n1 + n2);
6026
6027	std::copy(orderedObservationsA.begin(), orderedObservationsA.end(), allSamples.begin());
6028	std::copy(orderedObservationsB.begin(), orderedObservationsB.end(), allSamples.begin() + n1);
6029	std::sort(allSamples.begin(), allSamples.end());
6030
6031	{
6032		const float					R1		= sumOfRanks(orderedObservationsA, allSamples, comparer);
6033
6034		const float					U1		= n1*n2 + n1*(n1 + 1)/2 - R1;
6035		const float					U2		= (n1 * n2) - U1;
6036		const float					U		= de::min(U1, U2);
6037
6038		// \note: sample sizes might not be large enough to expect normal distribution but we do it anyway
6039
6040		const float					mU		= n1*n2 / 2.0f;
6041		const float					sigmaU	= deFloatSqrt((n1*n2*(n1+n2+1)) / 12.0f);
6042		const float					z		= (U - mU) / sigmaU;
6043
6044		DistributionCompareResult	result;
6045
6046		result.equal				= (de::abs(z) <= 1.96f); // accept within 95% confidence interval
6047		result.standardDeviations	= z;
6048
6049		return result;
6050	}
6051}
6052
6053template <typename T>
6054struct ThresholdComparer
6055{
6056	float	relativeThreshold;
6057	T		absoluteThreshold;
6058
6059	bool operator() (const T& a, const T& b) const
6060	{
6061		const float diff = de::abs((float)a - (float)b);
6062
6063		// thresholds
6064		if (diff <= (float)absoluteThreshold)
6065			return false;
6066		if (diff <= a*relativeThreshold ||
6067			diff <= b*relativeThreshold)
6068			return false;
6069
6070		// cmp
6071		return a < b;
6072	}
6073};
6074
6075int UploadWaitDrawCase::findStabilizationSample (deUint64 (UploadWaitDrawCase::Result::*target), const char* description)
6076{
6077	std::vector<std::vector<deUint64> >	sampleObservations(m_numMaxSwaps+1);
6078	ThresholdComparer<deUint64>			comparer;
6079
6080	comparer.relativeThreshold = 0.15f;	// 15%
6081	comparer.absoluteThreshold = 100;	// (us), assumed sampling precision
6082
6083	// get observations and order them
6084
6085	for (int swapNdx = 0; swapNdx <= m_numMaxSwaps; ++swapNdx)
6086	{
6087		int insertNdx = 0;
6088
6089		sampleObservations[swapNdx].resize(m_numSamplesPerSwap);
6090
6091		for (int ndx = 0; ndx < (int)m_samples.size(); ++ndx)
6092			if (m_samples[ndx].numFrames == swapNdx)
6093				sampleObservations[swapNdx][insertNdx++] = m_results[ndx].*target;
6094
6095		DE_ASSERT(insertNdx == m_numSamplesPerSwap);
6096
6097		std::sort(sampleObservations[swapNdx].begin(), sampleObservations[swapNdx].end());
6098	}
6099
6100	// find stabilization point
6101
6102	for (int sampleNdx = m_numMaxSwaps-1; sampleNdx != -1; --sampleNdx )
6103	{
6104		// Distribution is equal to all following distributions
6105		for (int cmpTargetDistribution = sampleNdx+1; cmpTargetDistribution <= m_numMaxSwaps; ++cmpTargetDistribution)
6106		{
6107			// Stable section ends here?
6108			const DistributionCompareResult result = distributionCompare(sampleObservations[sampleNdx], sampleObservations[cmpTargetDistribution], comparer);
6109			if (!result.equal)
6110			{
6111				// Last two samples are not equal? Samples never stabilized
6112				if (sampleNdx == m_numMaxSwaps-1)
6113				{
6114					m_testCtx.getLog()
6115						<< tcu::TestLog::Message
6116						<< description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n"
6117						<< "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6118						<< "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6119						<< "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n"
6120						<< tcu::TestLog::EndMessage;
6121					return -1;
6122				}
6123				else
6124				{
6125					m_testCtx.getLog()
6126						<< tcu::TestLog::Message
6127						<< description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n"
6128						<< "\tSamples with swap count " << sampleNdx << " are not part of the tail of stable results.\n"
6129						<< "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6130						<< "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6131						<< "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n"
6132						<< tcu::TestLog::EndMessage;
6133
6134					return sampleNdx+1;
6135				}
6136			}
6137		}
6138	}
6139
6140	m_testCtx.getLog()
6141		<< tcu::TestLog::Message
6142		<< description << ": All samples seem to have the same distribution"
6143		<< tcu::TestLog::EndMessage;
6144
6145	// all distributions equal
6146	return 0;
6147}
6148
6149bool UploadWaitDrawCase::checkSampleTemporalStability (deUint64 (UploadWaitDrawCase::Result::*target), const char* description)
6150{
6151	// Try to find correlation with sample order and sample times
6152
6153	const int						numDataPoints	= (int)m_iterationOrder.size();
6154	std::vector<tcu::Vec2>			dataPoints		(m_iterationOrder.size());
6155	LineParametersWithConfidence	lineFit;
6156
6157	for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
6158	{
6159		dataPoints[m_iterationOrder[ndx]].x() = (float)ndx;
6160		dataPoints[m_iterationOrder[ndx]].y() = (float)(m_results[m_iterationOrder[ndx]].*target);
6161	}
6162
6163	lineFit = theilSenSiegelLinearRegression(dataPoints, 0.6f);
6164
6165	// Difference of more than 25% of the offset along the whole sample range
6166	if (de::abs(lineFit.coefficient) * numDataPoints > de::abs(lineFit.offset) * 0.25f)
6167	{
6168		m_testCtx.getLog()
6169			<< tcu::TestLog::Message
6170			<< description << ": Correlation with data point observation order and result time. Results are not temporally stable, observations are not independent.\n"
6171			<< "\tCoefficient: " << lineFit.coefficient << " (us / observation)\n"
6172			<< tcu::TestLog::EndMessage;
6173
6174		return false;
6175	}
6176	else
6177		return true;
6178}
6179
6180} // anonymous
6181
6182BufferDataUploadTests::BufferDataUploadTests (Context& context)
6183	: TestCaseGroup(context, "data_upload", "Buffer data upload performance tests")
6184{
6185}
6186
6187BufferDataUploadTests::~BufferDataUploadTests (void)
6188{
6189}
6190
6191void BufferDataUploadTests::init (void)
6192{
6193	static const struct BufferUsage
6194	{
6195		const char* name;
6196		deUint32	usage;
6197		bool		primaryUsage;
6198	} bufferUsages[] =
6199	{
6200		{ "stream_draw",	GL_STREAM_DRAW,		true	},
6201		{ "stream_read",	GL_STREAM_READ,		false	},
6202		{ "stream_copy",	GL_STREAM_COPY,		false	},
6203		{ "static_draw",	GL_STATIC_DRAW,		true	},
6204		{ "static_read",	GL_STATIC_READ,		false	},
6205		{ "static_copy",	GL_STATIC_COPY,		false	},
6206		{ "dynamic_draw",	GL_DYNAMIC_DRAW,	true	},
6207		{ "dynamic_read",	GL_DYNAMIC_READ,	false	},
6208		{ "dynamic_copy",	GL_DYNAMIC_COPY,	false	},
6209	};
6210
6211	tcu::TestCaseGroup* const referenceGroup			= new tcu::TestCaseGroup(m_testCtx, "reference",			"Reference functions");
6212	tcu::TestCaseGroup* const functionCallGroup			= new tcu::TestCaseGroup(m_testCtx, "function_call",		"Function call timing");
6213	tcu::TestCaseGroup* const modifyAfterUseGroup		= new tcu::TestCaseGroup(m_testCtx, "modify_after_use",		"Function call time after buffer has been used");
6214	tcu::TestCaseGroup* const renderAfterUploadGroup	= new tcu::TestCaseGroup(m_testCtx, "render_after_upload",	"Function call time of draw commands after buffer has been modified");
6215
6216	addChild(referenceGroup);
6217	addChild(functionCallGroup);
6218	addChild(modifyAfterUseGroup);
6219	addChild(renderAfterUploadGroup);
6220
6221	// .reference
6222	{
6223		static const struct BufferSizeRange
6224		{
6225			const char* name;
6226			int			minBufferSize;
6227			int			maxBufferSize;
6228			int			numSamples;
6229			bool		largeBuffersCase;
6230		} sizeRanges[] =
6231		{
6232			{ "small_buffers", 0,		1 << 18,	64,		false	}, // !< 0kB - 256kB
6233			{ "large_buffers", 1 << 18,	1 << 24,	32,		true	}, // !< 256kB - 16MB
6234		};
6235
6236		for (int bufferSizeRangeNdx = 0; bufferSizeRangeNdx < DE_LENGTH_OF_ARRAY(sizeRanges); ++bufferSizeRangeNdx)
6237		{
6238			referenceGroup->addChild(new ReferenceMemcpyCase(m_context,
6239															 std::string("memcpy_").append(sizeRanges[bufferSizeRangeNdx].name).c_str(),
6240															 "Test memcpy performance",
6241															 sizeRanges[bufferSizeRangeNdx].minBufferSize,
6242															 sizeRanges[bufferSizeRangeNdx].maxBufferSize,
6243															 sizeRanges[bufferSizeRangeNdx].numSamples,
6244															 sizeRanges[bufferSizeRangeNdx].largeBuffersCase));
6245		}
6246	}
6247
6248	// .function_call
6249	{
6250		const int minBufferSize		= 0;		// !< 0kiB
6251		const int maxBufferSize		= 1 << 24;	// !< 16MiB
6252		const int numDataSamples	= 25;
6253		const int numMapSamples		= 25;
6254
6255		tcu::TestCaseGroup* const bufferDataMethodGroup		= new tcu::TestCaseGroup(m_testCtx, "buffer_data", "Use glBufferData");
6256		tcu::TestCaseGroup* const bufferSubDataMethodGroup	= new tcu::TestCaseGroup(m_testCtx, "buffer_sub_data", "Use glBufferSubData");
6257		tcu::TestCaseGroup* const mapBufferRangeMethodGroup	= new tcu::TestCaseGroup(m_testCtx, "map_buffer_range", "Use glMapBufferRange");
6258
6259		functionCallGroup->addChild(bufferDataMethodGroup);
6260		functionCallGroup->addChild(bufferSubDataMethodGroup);
6261		functionCallGroup->addChild(mapBufferRangeMethodGroup);
6262
6263		// .buffer_data
6264		{
6265			static const struct TargetCase
6266			{
6267				tcu::TestCaseGroup*				group;
6268				BufferDataUploadCase::CaseType	caseType;
6269				bool							allUsages;
6270			} targetCases[] =
6271			{
6272				{ new tcu::TestCaseGroup(m_testCtx, "new_buffer",				"Target new buffer"),							BufferDataUploadCase::CASE_NEW_BUFFER,			true	},
6273				{ new tcu::TestCaseGroup(m_testCtx, "unspecified_buffer",		"Target new unspecified buffer"),				BufferDataUploadCase::CASE_UNSPECIFIED_BUFFER,	true	},
6274				{ new tcu::TestCaseGroup(m_testCtx, "specified_buffer",			"Target new specified buffer"),					BufferDataUploadCase::CASE_SPECIFIED_BUFFER,	true	},
6275				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer",				"Target buffer that was used in draw"),			BufferDataUploadCase::CASE_USED_BUFFER,			true	},
6276				{ new tcu::TestCaseGroup(m_testCtx, "larger_used_buffer",		"Target larger buffer that was used in draw"),	BufferDataUploadCase::CASE_USED_LARGER_BUFFER,	false	},
6277			};
6278
6279			for (int targetNdx = 0; targetNdx < DE_LENGTH_OF_ARRAY(targetCases); ++targetNdx)
6280			{
6281				bufferDataMethodGroup->addChild(targetCases[targetNdx].group);
6282
6283				for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6284					if (bufferUsages[usageNdx].primaryUsage || targetCases[targetNdx].allUsages)
6285						targetCases[targetNdx].group->addChild(new BufferDataUploadCase(m_context,
6286																						std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6287																						std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6288																						minBufferSize,
6289																						maxBufferSize,
6290																						numDataSamples,
6291																						bufferUsages[usageNdx].usage,
6292																						targetCases[targetNdx].caseType));
6293			}
6294		}
6295
6296		// .buffer_sub_data
6297		{
6298			static const struct FlagCase
6299			{
6300				tcu::TestCaseGroup*					group;
6301				BufferSubDataUploadCase::CaseType	parentCase;
6302				bool								allUsages;
6303				int									flags;
6304			} flagCases[] =
6305			{
6306				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_full_upload",					    ""),															BufferSubDataUploadCase::CASE_USED_BUFFER,	true,	BufferSubDataUploadCase::FLAG_FULL_UPLOAD															},
6307				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_full_upload",    "Clear buffer with bufferData(...,NULL) before sub data call"),	BufferSubDataUploadCase::CASE_USED_BUFFER,	false,	BufferSubDataUploadCase::FLAG_FULL_UPLOAD    | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE	},
6308				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_partial_upload",                   ""),															BufferSubDataUploadCase::CASE_USED_BUFFER,	true,	BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD														},
6309				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_partial_upload", "Clear buffer with bufferData(...,NULL) before sub data call"),	BufferSubDataUploadCase::CASE_USED_BUFFER,	false,	BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE	},
6310			};
6311
6312			for (int flagNdx = 0; flagNdx < DE_LENGTH_OF_ARRAY(flagCases); ++flagNdx)
6313			{
6314				bufferSubDataMethodGroup->addChild(flagCases[flagNdx].group);
6315
6316				for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6317					if (bufferUsages[usageNdx].primaryUsage || flagCases[flagNdx].allUsages)
6318							flagCases[flagNdx].group->addChild(new BufferSubDataUploadCase(m_context,
6319																						   std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6320																						   std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6321																						   minBufferSize,
6322																						   maxBufferSize,
6323																						   numDataSamples,
6324																						   bufferUsages[usageNdx].usage,
6325																						   flagCases[flagNdx].parentCase,
6326																						   flagCases[flagNdx].flags));
6327			}
6328		}
6329
6330		// .map_buffer_range
6331		{
6332			static const struct FlagCase
6333			{
6334				const char*	name;
6335				bool		usefulForUnusedBuffers;
6336				bool		allUsages;
6337				int			glFlags;
6338				int			caseFlags;
6339			} flagCases[] =
6340			{
6341				{ "flag_write_full",										true,	true,	GL_MAP_WRITE_BIT,																0																				},
6342				{ "flag_write_partial",										true,	true,	GL_MAP_WRITE_BIT,																MapBufferRangeCase::FLAG_PARTIAL												},
6343				{ "flag_read_write_full",									true,	true,	GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,												0																				},
6344				{ "flag_read_write_partial",								true,	true,	GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,												MapBufferRangeCase::FLAG_PARTIAL												},
6345				{ "flag_invalidate_range_full",								true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									0																				},
6346				{ "flag_invalidate_range_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_PARTIAL												},
6347				{ "flag_invalidate_buffer_full",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,								0																				},
6348				{ "flag_invalidate_buffer_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,								MapBufferRangeCase::FLAG_PARTIAL												},
6349				{ "flag_write_full_manual_invalidate_buffer",				false,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_MANUAL_INVALIDATION									},
6350				{ "flag_write_partial_manual_invalidate_buffer",			false,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_PARTIAL | MapBufferRangeCase::FLAG_MANUAL_INVALIDATION	},
6351				{ "flag_unsynchronized_full",								true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,									0																				},
6352				{ "flag_unsynchronized_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,									MapBufferRangeCase::FLAG_PARTIAL												},
6353				{ "flag_unsynchronized_and_invalidate_buffer_full",			true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,	0																				},
6354				{ "flag_unsynchronized_and_invalidate_buffer_partial",		true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,	MapBufferRangeCase::FLAG_PARTIAL												},
6355			};
6356			static const struct FlushCases
6357			{
6358				const char*	name;
6359				int			glFlags;
6360				int			caseFlags;
6361			} flushCases[] =
6362			{
6363				{ "flag_flush_explicit_map_full",					GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	0												},
6364				{ "flag_flush_explicit_map_partial",				GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_PARTIAL			},
6365				{ "flag_flush_explicit_map_full_flush_in_parts",	GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_FLUSH_IN_PARTS	},
6366				{ "flag_flush_explicit_map_full_flush_partial",		GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_FLUSH_PARTIAL		},
6367			};
6368			static const struct MapTestGroup
6369			{
6370				int					flags;
6371				bool				unusedBufferCase;
6372				tcu::TestCaseGroup* group;
6373			} groups[] =
6374			{
6375				{ MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER,	true,	new tcu::TestCaseGroup(m_testCtx, "new_unspecified_buffer", "Test with unused, unspecified buffers"),				},
6376				{ MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER,		true,	new tcu::TestCaseGroup(m_testCtx, "new_specified_buffer", "Test with unused, specified buffers"),					},
6377				{ 0,														false,	new tcu::TestCaseGroup(m_testCtx, "used_buffer", "Test with used (data has been sourced from a buffer) buffers")	},
6378			};
6379
6380			// we OR same flags to both range and flushRange cases, make sure it is legal
6381			DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER);
6382			DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER);
6383
6384			for (int groupNdx = 0; groupNdx < DE_LENGTH_OF_ARRAY(groups); ++groupNdx)
6385			{
6386				tcu::TestCaseGroup* const bufferTypeGroup = groups[groupNdx].group;
6387
6388				mapBufferRangeMethodGroup->addChild(bufferTypeGroup);
6389
6390				for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flagCases); ++caseNdx)
6391				{
6392					if (groups[groupNdx].unusedBufferCase && !flagCases[caseNdx].usefulForUnusedBuffers)
6393						continue;
6394
6395					tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flagCases[caseNdx].name, "");
6396					bufferTypeGroup->addChild(bufferUsageGroup);
6397
6398					for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6399						if (bufferUsages[usageNdx].primaryUsage || flagCases[caseNdx].allUsages)
6400							bufferUsageGroup->addChild(new MapBufferRangeCase(m_context,
6401																			  bufferUsages[usageNdx].name,
6402																			  std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6403																			  minBufferSize,
6404																			  maxBufferSize,
6405																			  numMapSamples,
6406																			  bufferUsages[usageNdx].usage,
6407																			  flagCases[caseNdx].glFlags,
6408																			  flagCases[caseNdx].caseFlags | groups[groupNdx].flags));
6409				}
6410
6411				for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flushCases); ++caseNdx)
6412				{
6413					tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flushCases[caseNdx].name, "");
6414					bufferTypeGroup->addChild(bufferUsageGroup);
6415
6416					for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6417						if (bufferUsages[usageNdx].primaryUsage)
6418							bufferUsageGroup->addChild(new MapBufferRangeFlushCase(m_context,
6419																				   bufferUsages[usageNdx].name,
6420																				   std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6421																				   minBufferSize,
6422																				   maxBufferSize,
6423																				   numMapSamples,
6424																				   bufferUsages[usageNdx].usage,
6425																				   flushCases[caseNdx].glFlags,
6426																				   flushCases[caseNdx].caseFlags | groups[groupNdx].flags));
6427				}
6428			}
6429		}
6430	}
6431
6432	// .modify_after_use
6433	{
6434		const int minBufferSize	= 0;		// !< 0kiB
6435		const int maxBufferSize	= 1 << 24;	// !< 16MiB
6436
6437		static const struct Usage
6438		{
6439			const char* name;
6440			const char* description;
6441			deUint32	usage;
6442		} usages[] =
6443		{
6444			{ "static_draw",	"Test with GL_STATIC_DRAW",		GL_STATIC_DRAW	},
6445			{ "dynamic_draw",	"Test with GL_DYNAMIC_DRAW",	GL_DYNAMIC_DRAW	},
6446			{ "stream_draw",	"Test with GL_STREAM_DRAW",		GL_STREAM_DRAW },
6447
6448		};
6449
6450		for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(usages); ++usageNdx)
6451		{
6452			tcu::TestCaseGroup* const usageGroup = new tcu::TestCaseGroup(m_testCtx, usages[usageNdx].name, usages[usageNdx].description);
6453			modifyAfterUseGroup->addChild(usageGroup);
6454
6455			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data",							"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0));
6456			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data_different_size",			"Respecify buffer contents and size after use",			minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_RESPECIFY_SIZE));
6457			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data_repeated",					"Respecify buffer contents after upload and use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_UPLOAD_REPEATED));
6458
6459			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_full",					"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0));
6460			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_partial",				"Respecify buffer contents partially use",				minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
6461			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_full_repeated",		"Respecify buffer contents after upload and use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED));
6462			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_partial_repeated",		"Respecify buffer contents partially upload and use",	minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED | ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
6463
6464			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_write_full",					"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT));
6465			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_write_partial",				"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT));
6466			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_read_write_full",				"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
6467			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_read_write_partial",			"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
6468			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_range_full",		"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
6469			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_range_partial",	"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
6470			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_buffer_full",		"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
6471			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_buffer_partial",	"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
6472			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_unsynchronized_full",			"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
6473			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_unsynchronized_partial",		"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
6474
6475			usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase	(m_context, "map_flag_flush_explicit_full",			"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
6476			usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase	(m_context, "map_flag_flush_explicit_partial",		"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferFlushCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
6477		}
6478	}
6479
6480	// .render_after_upload
6481	{
6482		// .reference
6483		{
6484			tcu::TestCaseGroup* const renderReferenceGroup = new tcu::TestCaseGroup(m_testCtx, "reference", "Baseline results");
6485			renderAfterUploadGroup->addChild(renderReferenceGroup);
6486
6487			// .draw
6488			{
6489				tcu::TestCaseGroup* const drawGroup = new tcu::TestCaseGroup(m_testCtx, "draw", "Time usage of functions with non-modified buffers");
6490				renderReferenceGroup->addChild(drawGroup);
6491
6492				// Time consumed by readPixels
6493				drawGroup->addChild(new ReferenceReadPixelsTimeCase	(m_context, "read_pixels",		"Measure time consumed by readPixels() function call"));
6494
6495				// Time consumed by rendering
6496				drawGroup->addChild(new ReferenceRenderTimeCase		(m_context, "draw_arrays",		"Measure time consumed by drawArrays() function call",		DRAWMETHOD_DRAW_ARRAYS));
6497				drawGroup->addChild(new ReferenceRenderTimeCase		(m_context, "draw_elements",	"Measure time consumed by drawElements() function call",	DRAWMETHOD_DRAW_ELEMENTS));
6498			}
6499
6500			// .draw_upload_draw
6501			{
6502				static const struct
6503				{
6504					const char*		name;
6505					const char*		description;
6506					DrawMethod		drawMethod;
6507					TargetBuffer	targetBuffer;
6508					bool			partial;
6509				} uploadTargets[] =
6510				{
6511					{
6512						"draw_arrays_upload_vertices",
6513						"Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.",
6514						DRAWMETHOD_DRAW_ARRAYS,
6515						TARGETBUFFER_VERTEX,
6516						false
6517					},
6518					{
6519						"draw_arrays_upload_vertices_partial",
6520						"Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.",
6521						DRAWMETHOD_DRAW_ARRAYS,
6522						TARGETBUFFER_VERTEX,
6523						true
6524					},
6525					{
6526						"draw_elements_upload_vertices",
6527						"Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.",
6528						DRAWMETHOD_DRAW_ELEMENTS,
6529						TARGETBUFFER_VERTEX,
6530						false
6531					},
6532					{
6533						"draw_elements_upload_indices",
6534						"Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.",
6535						DRAWMETHOD_DRAW_ELEMENTS,
6536						TARGETBUFFER_INDEX,
6537						false
6538					},
6539					{
6540						"draw_elements_upload_indices_partial",
6541						"Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.",
6542						DRAWMETHOD_DRAW_ELEMENTS,
6543						TARGETBUFFER_INDEX,
6544						true
6545					},
6546				};
6547				static const struct
6548				{
6549					const char*							name;
6550					const char*							description;
6551					UploadMethod						uploadMethod;
6552					BufferInUseRenderTimeCase::MapFlags	mapFlags;
6553					bool								supportsPartialUpload;
6554				} uploadMethods[] =
6555				{
6556					{ "buffer_data",						"bufferData",		UPLOADMETHOD_BUFFER_DATA,		BufferInUseRenderTimeCase::MAPFLAG_NONE,				false	},
6557					{ "buffer_sub_data",					"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	BufferInUseRenderTimeCase::MAPFLAG_NONE,				true	},
6558					{ "map_buffer_range_invalidate_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE,	true	},
6559					{ "map_buffer_range_invalidate_buffer",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER,	false	},
6560				};
6561
6562				tcu::TestCaseGroup* const drawUploadDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_upload_draw", "Time usage of functions draw, upload and another draw");
6563				renderReferenceGroup->addChild(drawUploadDrawGroup);
6564
6565				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6566				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6567				{
6568					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6569
6570					if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6571						continue;
6572
6573					drawUploadDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context,
6574																				name.c_str(),
6575																				uploadTargets[uploadTargetNdx].description,
6576																				uploadTargets[uploadTargetNdx].drawMethod,
6577																				uploadMethods[uploadMethodNdx].mapFlags,
6578																				uploadTargets[uploadTargetNdx].targetBuffer,
6579																				uploadMethods[uploadMethodNdx].uploadMethod,
6580																				(uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6581																				BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_DIFFERENT_BUFFER));
6582				}
6583			}
6584		}
6585
6586		// .upload_unrelated_and_draw
6587		{
6588			static const struct
6589			{
6590				const char*		name;
6591				const char*		description;
6592				DrawMethod		drawMethod;
6593			} drawMethods[] =
6594			{
6595				{ "draw_arrays",	"drawArrays",	DRAWMETHOD_DRAW_ARRAYS		},
6596				{ "draw_elements",	"drawElements",	DRAWMETHOD_DRAW_ELEMENTS	},
6597			};
6598
6599			static const struct
6600			{
6601				const char*		name;
6602				UploadMethod	uploadMethod;
6603			} uploadMethods[] =
6604			{
6605				{ "buffer_data",		UPLOADMETHOD_BUFFER_DATA		},
6606				{ "buffer_sub_data",	UPLOADMETHOD_BUFFER_SUB_DATA	},
6607				{ "map_buffer_range",	UPLOADMETHOD_MAP_BUFFER_RANGE	},
6608			};
6609
6610			tcu::TestCaseGroup* const uploadUnrelatedGroup = new tcu::TestCaseGroup(m_testCtx, "upload_unrelated_and_draw", "Time usage of functions after an unrelated upload");
6611			renderAfterUploadGroup->addChild(uploadUnrelatedGroup);
6612
6613			for (int drawMethodNdx = 0; drawMethodNdx < DE_LENGTH_OF_ARRAY(drawMethods); ++drawMethodNdx)
6614			for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6615			{
6616				const std::string name = std::string() + drawMethods[drawMethodNdx].name + "_upload_unrelated_with_" + uploadMethods[uploadMethodNdx].name;
6617				const std::string desc = std::string() + "Measure time consumed by " + drawMethods[drawMethodNdx].description + " function call after an unrelated upload";
6618
6619				// Time consumed by rendering command after an unrelated upload
6620
6621				uploadUnrelatedGroup->addChild(new UnrelatedUploadRenderTimeCase(m_context, name.c_str(), desc.c_str(), drawMethods[drawMethodNdx].drawMethod, uploadMethods[uploadMethodNdx].uploadMethod));
6622			}
6623		}
6624
6625		// .upload_and_draw
6626		{
6627			static const struct
6628			{
6629				const char*			name;
6630				const char*			description;
6631				BufferState			bufferState;
6632				UnrelatedBufferType	unrelatedBuffer;
6633				bool				supportsPartialUpload;
6634			} bufferConfigs[] =
6635			{
6636				{ "used_buffer",						"Upload to an used buffer",											BUFFERSTATE_EXISTING,	UNRELATEDBUFFERTYPE_NONE,	true	},
6637				{ "new_buffer",							"Upload to a new buffer",											BUFFERSTATE_NEW,		UNRELATEDBUFFERTYPE_NONE,	false	},
6638				{ "used_buffer_and_unrelated_upload",	"Upload to an used buffer and an unrelated buffer and then draw",	BUFFERSTATE_EXISTING,	UNRELATEDBUFFERTYPE_VERTEX,	true	},
6639				{ "new_buffer_and_unrelated_upload",	"Upload to a new buffer and an unrelated buffer and then draw",		BUFFERSTATE_NEW,		UNRELATEDBUFFERTYPE_VERTEX,	false	},
6640			};
6641
6642			tcu::TestCaseGroup* const uploadAndDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_and_draw", "Time usage of rendering functions with modified buffers");
6643			renderAfterUploadGroup->addChild(uploadAndDrawGroup);
6644
6645			// .used_buffer
6646			// .new_buffer
6647			// .used_buffer_and_unrelated_upload
6648			// .new_buffer_and_unrelated_upload
6649			for (int stateNdx = 0; stateNdx < DE_LENGTH_OF_ARRAY(bufferConfigs); ++stateNdx)
6650			{
6651				static const struct
6652				{
6653					const char*		name;
6654					const char*		description;
6655					DrawMethod		drawMethod;
6656					TargetBuffer	targetBuffer;
6657					bool			partial;
6658				} uploadTargets[] =
6659				{
6660					{
6661						"draw_arrays_upload_vertices",
6662						"Measure time consumed by vertex attribute upload, drawArrays, and readPixels function calls",
6663						DRAWMETHOD_DRAW_ARRAYS,
6664						TARGETBUFFER_VERTEX,
6665						false
6666					},
6667					{
6668						"draw_arrays_upload_vertices_partial",
6669						"Measure time consumed by partial vertex attribute upload, drawArrays, and readPixels function calls",
6670						DRAWMETHOD_DRAW_ARRAYS,
6671						TARGETBUFFER_VERTEX,
6672						true
6673					},
6674					{
6675						"draw_elements_upload_vertices",
6676						"Measure time consumed by vertex attribute upload, drawElements, and readPixels function calls",
6677						DRAWMETHOD_DRAW_ELEMENTS,
6678						TARGETBUFFER_VERTEX,
6679						false
6680					},
6681					{
6682						"draw_elements_upload_indices",
6683						"Measure time consumed by index upload, drawElements, and readPixels function calls",
6684						DRAWMETHOD_DRAW_ELEMENTS,
6685						TARGETBUFFER_INDEX,
6686						false
6687					},
6688					{
6689						"draw_elements_upload_indices_partial",
6690						"Measure time consumed by partial index upload, drawElements, and readPixels function calls",
6691						DRAWMETHOD_DRAW_ELEMENTS,
6692						TARGETBUFFER_INDEX,
6693						true
6694					},
6695				};
6696				static const struct
6697				{
6698					const char*		name;
6699					const char*		description;
6700					UploadMethod	uploadMethod;
6701					bool			supportsPartialUpload;
6702				} uploadMethods[] =
6703				{
6704					{ "buffer_data",		"bufferData",		UPLOADMETHOD_BUFFER_DATA,		false	},
6705					{ "buffer_sub_data",	"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	true	},
6706					{ "map_buffer_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	true	},
6707				};
6708
6709				tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, bufferConfigs[stateNdx].name, bufferConfigs[stateNdx].description);
6710				uploadAndDrawGroup->addChild(group);
6711
6712				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6713				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6714				{
6715					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6716
6717					if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6718						continue;
6719					if (uploadTargets[uploadTargetNdx].partial && !bufferConfigs[stateNdx].supportsPartialUpload)
6720						continue;
6721
6722					// Don't log unrelated buffer information to samples if there is no such buffer
6723
6724					if (bufferConfigs[stateNdx].unrelatedBuffer == UNRELATEDBUFFERTYPE_NONE)
6725					{
6726						typedef UploadRenderReadDuration				SampleType;
6727						typedef GenericUploadRenderTimeCase<SampleType>	TestType;
6728
6729						group->addChild(new TestType(m_context,
6730													 name.c_str(),
6731													 uploadTargets[uploadTargetNdx].description,
6732													 uploadTargets[uploadTargetNdx].drawMethod,
6733													 uploadTargets[uploadTargetNdx].targetBuffer,
6734													 uploadMethods[uploadMethodNdx].uploadMethod,
6735													 bufferConfigs[stateNdx].bufferState,
6736													 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6737													 bufferConfigs[stateNdx].unrelatedBuffer));
6738					}
6739					else
6740					{
6741						typedef UploadRenderReadDurationWithUnrelatedUploadSize	SampleType;
6742						typedef GenericUploadRenderTimeCase<SampleType>			TestType;
6743
6744						group->addChild(new TestType(m_context,
6745													 name.c_str(),
6746													 uploadTargets[uploadTargetNdx].description,
6747													 uploadTargets[uploadTargetNdx].drawMethod,
6748													 uploadTargets[uploadTargetNdx].targetBuffer,
6749													 uploadMethods[uploadMethodNdx].uploadMethod,
6750													 bufferConfigs[stateNdx].bufferState,
6751													 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6752													 bufferConfigs[stateNdx].unrelatedBuffer));
6753					}
6754				}
6755			}
6756		}
6757
6758		// .draw_modify_draw
6759		{
6760			static const struct
6761			{
6762				const char*		name;
6763				const char*		description;
6764				DrawMethod		drawMethod;
6765				TargetBuffer	targetBuffer;
6766				bool			partial;
6767			} uploadTargets[] =
6768			{
6769				{
6770					"draw_arrays_upload_vertices",
6771					"Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.",
6772					DRAWMETHOD_DRAW_ARRAYS,
6773					TARGETBUFFER_VERTEX,
6774					false
6775				},
6776				{
6777					"draw_arrays_upload_vertices_partial",
6778					"Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.",
6779					DRAWMETHOD_DRAW_ARRAYS,
6780					TARGETBUFFER_VERTEX,
6781					true
6782				},
6783				{
6784					"draw_elements_upload_vertices",
6785					"Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.",
6786					DRAWMETHOD_DRAW_ELEMENTS,
6787					TARGETBUFFER_VERTEX,
6788					false
6789				},
6790				{
6791					"draw_elements_upload_indices",
6792					"Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.",
6793					DRAWMETHOD_DRAW_ELEMENTS,
6794					TARGETBUFFER_INDEX,
6795					false
6796				},
6797				{
6798					"draw_elements_upload_indices_partial",
6799					"Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.",
6800					DRAWMETHOD_DRAW_ELEMENTS,
6801					TARGETBUFFER_INDEX,
6802					true
6803				},
6804			};
6805			static const struct
6806			{
6807				const char*							name;
6808				const char*							description;
6809				UploadMethod						uploadMethod;
6810				BufferInUseRenderTimeCase::MapFlags	mapFlags;
6811				bool								supportsPartialUpload;
6812			} uploadMethods[] =
6813			{
6814				{ "buffer_data",						"bufferData",		UPLOADMETHOD_BUFFER_DATA,		BufferInUseRenderTimeCase::MAPFLAG_NONE,				false	},
6815				{ "buffer_sub_data",					"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	BufferInUseRenderTimeCase::MAPFLAG_NONE,				true	},
6816				{ "map_buffer_range_invalidate_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE,	true	},
6817				{ "map_buffer_range_invalidate_buffer",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER,	false	},
6818			};
6819
6820			tcu::TestCaseGroup* const drawModifyDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_modify_draw", "Time used in rendering functions with modified buffers while original buffer is still in use");
6821			renderAfterUploadGroup->addChild(drawModifyDrawGroup);
6822
6823			for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6824			for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6825			{
6826				const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6827
6828				if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6829					continue;
6830
6831				drawModifyDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context,
6832																			name.c_str(),
6833																			uploadTargets[uploadTargetNdx].description,
6834																			uploadTargets[uploadTargetNdx].drawMethod,
6835																			uploadMethods[uploadMethodNdx].mapFlags,
6836																			uploadTargets[uploadTargetNdx].targetBuffer,
6837																			uploadMethods[uploadMethodNdx].uploadMethod,
6838																			(uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6839																			BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_SAME_BUFFER));
6840			}
6841		}
6842
6843		// .upload_wait_draw
6844		{
6845			static const struct
6846			{
6847				const char*	name;
6848				const char*	description;
6849				BufferState	bufferState;
6850			} bufferStates[] =
6851			{
6852				{ "new_buffer",		"Uploading to just generated name",	BUFFERSTATE_NEW			},
6853				{ "used_buffer",	"Uploading to a used buffer",		BUFFERSTATE_EXISTING	},
6854			};
6855			static const struct
6856			{
6857				const char*		name;
6858				const char*		description;
6859				DrawMethod		drawMethod;
6860				TargetBuffer	targetBuffer;
6861			} uploadTargets[] =
6862			{
6863				{ "draw_arrays_vertices",	"Upload vertex data, draw with drawArrays",		DRAWMETHOD_DRAW_ARRAYS,		TARGETBUFFER_VERTEX	},
6864				{ "draw_elements_vertices",	"Upload vertex data, draw with drawElements",	DRAWMETHOD_DRAW_ELEMENTS,	TARGETBUFFER_VERTEX	},
6865				{ "draw_elements_indices",	"Upload index data, draw with drawElements",	DRAWMETHOD_DRAW_ELEMENTS,	TARGETBUFFER_INDEX	},
6866			};
6867			static const struct
6868			{
6869				const char*		name;
6870				const char*		description;
6871				UploadMethod	uploadMethod;
6872			} uploadMethods[] =
6873			{
6874				{ "buffer_data",		"bufferData",		UPLOADMETHOD_BUFFER_DATA		},
6875				{ "buffer_sub_data",	"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA	},
6876				{ "map_buffer_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE	},
6877			};
6878
6879			tcu::TestCaseGroup* const uploadSwapDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_wait_draw", "Time used in rendering functions after a buffer upload N frames ago");
6880			renderAfterUploadGroup->addChild(uploadSwapDrawGroup);
6881
6882			for (int bufferStateNdx = 0; bufferStateNdx < DE_LENGTH_OF_ARRAY(bufferStates); ++bufferStateNdx)
6883			{
6884				tcu::TestCaseGroup* const bufferGroup = new tcu::TestCaseGroup(m_testCtx, bufferStates[bufferStateNdx].name, bufferStates[bufferStateNdx].description);
6885				uploadSwapDrawGroup->addChild(bufferGroup);
6886
6887				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6888				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6889				{
6890					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6891
6892					bufferGroup->addChild(new UploadWaitDrawCase(m_context,
6893																 name.c_str(),
6894																 uploadTargets[uploadTargetNdx].description,
6895																 uploadTargets[uploadTargetNdx].drawMethod,
6896																 uploadTargets[uploadTargetNdx].targetBuffer,
6897																 uploadMethods[uploadMethodNdx].uploadMethod,
6898																 bufferStates[bufferStateNdx].bufferState));
6899				}
6900			}
6901		}
6902	}
6903}
6904
6905} // Performance
6906} // gles3
6907} // deqp
6908