1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Renderer.hpp"
16
17#include "Clipper.hpp"
18#include "Math.hpp"
19#include "FrameBuffer.hpp"
20#include "Timer.hpp"
21#include "Surface.hpp"
22#include "Half.hpp"
23#include "Primitive.hpp"
24#include "Polygon.hpp"
25#include "SwiftConfig.hpp"
26#include "MutexLock.hpp"
27#include "CPUID.hpp"
28#include "Memory.hpp"
29#include "Resource.hpp"
30#include "Constants.hpp"
31#include "Debug.hpp"
32#include "Reactor/Reactor.hpp"
33
34#undef max
35
36bool disableServer = true;
37
38#ifndef NDEBUG
39unsigned int minPrimitives = 1;
40unsigned int maxPrimitives = 1 << 21;
41#endif
42
43namespace sw
44{
45	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
46	extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
47	extern bool booleanFaceRegister;
48	extern bool fullPixelPositionRegister;
49	extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
50	extern bool secondaryColor;             // Specular lighting is applied after texturing
51	extern bool colorsDefaultToZero;
52
53	extern bool forceWindowed;
54	extern bool complementaryDepthBuffer;
55	extern bool postBlendSRGB;
56	extern bool exactColorRounding;
57	extern TransparencyAntialiasing transparencyAntialiasing;
58	extern bool forceClearRegisters;
59
60	extern bool precacheVertex;
61	extern bool precacheSetup;
62	extern bool precachePixel;
63
64	int batchSize = 128;
65	int threadCount = 1;
66	int unitCount = 1;
67	int clusterCount = 1;
68
69	TranscendentalPrecision logPrecision = ACCURATE;
70	TranscendentalPrecision expPrecision = ACCURATE;
71	TranscendentalPrecision rcpPrecision = ACCURATE;
72	TranscendentalPrecision rsqPrecision = ACCURATE;
73	bool perspectiveCorrection = true;
74
75	struct Parameters
76	{
77		Renderer *renderer;
78		int threadIndex;
79	};
80
81	DrawCall::DrawCall()
82	{
83		queries = 0;
84
85		vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
86		vsDirtyConstI = 16;
87		vsDirtyConstB = 16;
88
89		psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
90		psDirtyConstI = 16;
91		psDirtyConstB = 16;
92
93		references = -1;
94
95		data = (DrawData*)allocate(sizeof(DrawData));
96		data->constants = &constants;
97	}
98
99	DrawCall::~DrawCall()
100	{
101		delete queries;
102
103		deallocate(data);
104	}
105
106	Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
107	{
108		sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
109		sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
110		sw::booleanFaceRegister = conventions.booleanFaceRegister;
111		sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
112		sw::leadingVertexFirst = conventions.leadingVertexFirst;
113		sw::secondaryColor = conventions.secondaryColor;
114		sw::colorsDefaultToZero = conventions.colorsDefaultToZero;
115		sw::exactColorRounding = exactColorRounding;
116
117		setRenderTarget(0, 0);
118		clipper = new Clipper(symmetricNormalizedDepth);
119		blitter = new Blitter;
120
121		updateViewMatrix = true;
122		updateBaseMatrix = true;
123		updateProjectionMatrix = true;
124		updateClipPlanes = true;
125
126		#if PERF_HUD
127			resetTimers();
128		#endif
129
130		for(int i = 0; i < 16; i++)
131		{
132			vertexTask[i] = 0;
133
134			worker[i] = 0;
135			resume[i] = 0;
136			suspend[i] = 0;
137		}
138
139		threadsAwake = 0;
140		resumeApp = new Event();
141
142		currentDraw = 0;
143		nextDraw = 0;
144
145		qHead = 0;
146		qSize = 0;
147
148		for(int i = 0; i < 16; i++)
149		{
150			triangleBatch[i] = 0;
151			primitiveBatch[i] = 0;
152		}
153
154		for(int draw = 0; draw < DRAW_COUNT; draw++)
155		{
156			drawCall[draw] = new DrawCall();
157			drawList[draw] = drawCall[draw];
158		}
159
160		for(int unit = 0; unit < 16; unit++)
161		{
162			primitiveProgress[unit].init();
163		}
164
165		for(int cluster = 0; cluster < 16; cluster++)
166		{
167			pixelProgress[cluster].init();
168		}
169
170		clipFlags = 0;
171
172		swiftConfig = new SwiftConfig(disableServer);
173		updateConfiguration(true);
174
175		sync = new Resource(0);
176	}
177
178	Renderer::~Renderer()
179	{
180		sync->destruct();
181
182		delete clipper;
183		clipper = nullptr;
184
185		delete blitter;
186		blitter = nullptr;
187
188		terminateThreads();
189		delete resumeApp;
190
191		for(int draw = 0; draw < DRAW_COUNT; draw++)
192		{
193			delete drawCall[draw];
194		}
195
196		delete swiftConfig;
197	}
198
199	// This object has to be mem aligned
200	void* Renderer::operator new(size_t size)
201	{
202		ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
203		return sw::allocate(sizeof(Renderer), 16);
204	}
205
206	void Renderer::operator delete(void * mem)
207	{
208		sw::deallocate(mem);
209	}
210
211	void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
212	{
213		#ifndef NDEBUG
214			if(count < minPrimitives || count > maxPrimitives)
215			{
216				return;
217			}
218		#endif
219
220		context->drawType = drawType;
221
222		updateConfiguration();
223		updateClipper();
224
225		int ss = context->getSuperSampleCount();
226		int ms = context->getMultiSampleCount();
227
228		for(int q = 0; q < ss; q++)
229		{
230			unsigned int oldMultiSampleMask = context->multiSampleMask;
231			context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
232
233			if(!context->multiSampleMask)
234			{
235				continue;
236			}
237
238			sync->lock(sw::PRIVATE);
239
240			if(update || oldMultiSampleMask != context->multiSampleMask)
241			{
242				vertexState = VertexProcessor::update(drawType);
243				setupState = SetupProcessor::update();
244				pixelState = PixelProcessor::update();
245
246				vertexRoutine = VertexProcessor::routine(vertexState);
247				setupRoutine = SetupProcessor::routine(setupState);
248				pixelRoutine = PixelProcessor::routine(pixelState);
249			}
250
251			int batch = batchSize / ms;
252
253			int (Renderer::*setupPrimitives)(int batch, int count);
254
255			if(context->isDrawTriangle())
256			{
257				switch(context->fillMode)
258				{
259				case FILL_SOLID:
260					setupPrimitives = &Renderer::setupSolidTriangles;
261					break;
262				case FILL_WIREFRAME:
263					setupPrimitives = &Renderer::setupWireframeTriangle;
264					batch = 1;
265					break;
266				case FILL_VERTEX:
267					setupPrimitives = &Renderer::setupVertexTriangle;
268					batch = 1;
269					break;
270				default:
271					ASSERT(false);
272					return;
273				}
274			}
275			else if(context->isDrawLine())
276			{
277				setupPrimitives = &Renderer::setupLines;
278			}
279			else   // Point draw
280			{
281				setupPrimitives = &Renderer::setupPoints;
282			}
283
284			DrawCall *draw = 0;
285
286			do
287			{
288				for(int i = 0; i < DRAW_COUNT; i++)
289				{
290					if(drawCall[i]->references == -1)
291					{
292						draw = drawCall[i];
293						drawList[nextDraw % DRAW_COUNT] = draw;
294
295						break;
296					}
297				}
298
299				if(!draw)
300				{
301					resumeApp->wait();
302				}
303			}
304			while(!draw);
305
306			DrawData *data = draw->data;
307
308			if(queries.size() != 0)
309			{
310				draw->queries = new std::list<Query*>();
311				bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
312				for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
313				{
314					Query* q = *query;
315					if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
316					{
317						atomicIncrement(&(q->reference));
318						draw->queries->push_back(q);
319					}
320				}
321			}
322
323			draw->drawType = drawType;
324			draw->batchSize = batch;
325
326			vertexRoutine->bind();
327			setupRoutine->bind();
328			pixelRoutine->bind();
329
330			draw->vertexRoutine = vertexRoutine;
331			draw->setupRoutine = setupRoutine;
332			draw->pixelRoutine = pixelRoutine;
333			draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
334			draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
335			draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
336			draw->setupPrimitives = setupPrimitives;
337			draw->setupState = setupState;
338
339			for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
340			{
341				draw->vertexStream[i] = context->input[i].resource;
342				data->input[i] = context->input[i].buffer;
343				data->stride[i] = context->input[i].stride;
344
345				if(draw->vertexStream[i])
346				{
347					draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
348				}
349			}
350
351			if(context->indexBuffer)
352			{
353				data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
354			}
355
356			draw->indexBuffer = context->indexBuffer;
357
358			for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
359			{
360				draw->texture[sampler] = 0;
361			}
362
363			for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
364			{
365				if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
366				{
367					draw->texture[sampler] = context->texture[sampler];
368					draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
369
370					data->mipmap[sampler] = context->sampler[sampler].getTextureData();
371				}
372			}
373
374			if(context->pixelShader)
375			{
376				if(draw->psDirtyConstF)
377				{
378					memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
379					memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
380					draw->psDirtyConstF = 0;
381				}
382
383				if(draw->psDirtyConstI)
384				{
385					memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
386					draw->psDirtyConstI = 0;
387				}
388
389				if(draw->psDirtyConstB)
390				{
391					memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
392					draw->psDirtyConstB = 0;
393				}
394
395				PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
396			}
397			else
398			{
399				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
400				{
401					draw->pUniformBuffers[i] = nullptr;
402				}
403			}
404
405			if(context->pixelShaderVersion() <= 0x0104)
406			{
407				for(int stage = 0; stage < 8; stage++)
408				{
409					if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
410					{
411						data->textureStage[stage] = context->textureStage[stage].uniforms;
412					}
413					else break;
414				}
415			}
416
417			if(context->vertexShader)
418			{
419				if(context->vertexShader->getVersion() >= 0x0300)
420				{
421					for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
422					{
423						if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
424						{
425							draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
426							draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
427
428							data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
429						}
430					}
431				}
432
433				if(draw->vsDirtyConstF)
434				{
435					memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
436					draw->vsDirtyConstF = 0;
437				}
438
439				if(draw->vsDirtyConstI)
440				{
441					memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
442					draw->vsDirtyConstI = 0;
443				}
444
445				if(draw->vsDirtyConstB)
446				{
447					memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
448					draw->vsDirtyConstB = 0;
449				}
450
451				if(context->vertexShader->isInstanceIdDeclared())
452				{
453					data->instanceID = context->instanceID;
454				}
455
456				VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
457				VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
458			}
459			else
460			{
461				data->ff = ff;
462
463				draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
464				draw->vsDirtyConstI = 16;
465				draw->vsDirtyConstB = 16;
466
467				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
468				{
469					draw->vUniformBuffers[i] = nullptr;
470				}
471
472				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
473				{
474					draw->transformFeedbackBuffers[i] = nullptr;
475				}
476			}
477
478			if(pixelState.stencilActive)
479			{
480				data->stencil[0] = stencil;
481				data->stencil[1] = stencilCCW;
482			}
483
484			if(pixelState.fogActive)
485			{
486				data->fog = fog;
487			}
488
489			if(setupState.isDrawPoint)
490			{
491				data->point = point;
492			}
493
494			data->lineWidth = context->lineWidth;
495
496			data->factor = factor;
497
498			if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
499			{
500				float ref = context->alphaReference * (1.0f / 255.0f);
501				float margin = sw::min(ref, 1.0f - ref);
502
503				if(ms == 4)
504				{
505					data->a2c0 = replicate(ref - margin * 0.6f);
506					data->a2c1 = replicate(ref - margin * 0.2f);
507					data->a2c2 = replicate(ref + margin * 0.2f);
508					data->a2c3 = replicate(ref + margin * 0.6f);
509				}
510				else if(ms == 2)
511				{
512					data->a2c0 = replicate(ref - margin * 0.3f);
513					data->a2c1 = replicate(ref + margin * 0.3f);
514				}
515				else ASSERT(false);
516			}
517
518			if(pixelState.occlusionEnabled)
519			{
520				for(int cluster = 0; cluster < clusterCount; cluster++)
521				{
522					data->occlusion[cluster] = 0;
523				}
524			}
525
526			#if PERF_PROFILE
527				for(int cluster = 0; cluster < clusterCount; cluster++)
528				{
529					for(int i = 0; i < PERF_TIMERS; i++)
530					{
531						data->cycles[i][cluster] = 0;
532					}
533				}
534			#endif
535
536			// Viewport
537			{
538				float W = 0.5f * viewport.width;
539				float H = 0.5f * viewport.height;
540				float X0 = viewport.x0 + W;
541				float Y0 = viewport.y0 + H;
542				float N = viewport.minZ;
543				float F = viewport.maxZ;
544				float Z = F - N;
545
546				if(context->isDrawTriangle(false))
547				{
548					N += depthBias;
549				}
550
551				if(complementaryDepthBuffer)
552				{
553					Z = -Z;
554					N = 1 - N;
555				}
556
557				static const float X[5][16] =   // Fragment offsets
558				{
559					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
560					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
561					{-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
562					{+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
563					{+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
564				};
565
566				static const float Y[5][16] =   // Fragment offsets
567				{
568					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
569					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
570					{-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
571					{-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
572					{-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
573				};
574
575				int s = sw::log2(ss);
576
577				data->Wx16 = replicate(W * 16);
578				data->Hx16 = replicate(H * 16);
579				data->X0x16 = replicate(X0 * 16 - 8);
580				data->Y0x16 = replicate(Y0 * 16 - 8);
581				data->XXXX = replicate(X[s][q] / W);
582				data->YYYY = replicate(Y[s][q] / H);
583				data->halfPixelX = replicate(0.5f / W);
584				data->halfPixelY = replicate(0.5f / H);
585				data->viewportHeight = abs(viewport.height);
586				data->slopeDepthBias = slopeDepthBias;
587				data->depthRange = Z;
588				data->depthNear = N;
589				draw->clipFlags = clipFlags;
590
591				if(clipFlags)
592				{
593					if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
594					if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
595					if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
596					if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
597					if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
598					if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
599				}
600			}
601
602			// Target
603			{
604				for(int index = 0; index < RENDERTARGETS; index++)
605				{
606					draw->renderTarget[index] = context->renderTarget[index];
607
608					if(draw->renderTarget[index])
609					{
610						data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
611						data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
612						data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
613					}
614				}
615
616				draw->depthBuffer = context->depthBuffer;
617				draw->stencilBuffer = context->stencilBuffer;
618
619				if(draw->depthBuffer)
620				{
621					data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
622					data->depthPitchB = context->depthBuffer->getInternalPitchB();
623					data->depthSliceB = context->depthBuffer->getInternalSliceB();
624				}
625
626				if(draw->stencilBuffer)
627				{
628					data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, q * ms, MANAGED);
629					data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
630					data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
631				}
632			}
633
634			// Scissor
635			{
636				data->scissorX0 = scissor.x0;
637				data->scissorX1 = scissor.x1;
638				data->scissorY0 = scissor.y0;
639				data->scissorY1 = scissor.y1;
640			}
641
642			draw->primitive = 0;
643			draw->count = count;
644
645			draw->references = (count + batch - 1) / batch;
646
647			schedulerMutex.lock();
648			nextDraw++;
649			schedulerMutex.unlock();
650
651			#ifndef NDEBUG
652			if(threadCount == 1)   // Use main thread for draw execution
653			{
654				threadsAwake = 1;
655				task[0].type = Task::RESUME;
656
657				taskLoop(0);
658			}
659			else
660			#endif
661			{
662				if(!threadsAwake)
663				{
664					suspend[0]->wait();
665
666					threadsAwake = 1;
667					task[0].type = Task::RESUME;
668
669					resume[0]->signal();
670				}
671			}
672		}
673	}
674
675	void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask)
676	{
677		SliceRect rect = clearRect;
678		int samples = dest->getDepth();
679
680		for(rect.slice = 0; rect.slice < samples; rect.slice++)
681		{
682			blitter->clear(value, format, dest, rect, rgbaMask);
683		}
684	}
685
686	void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
687	{
688		blitter->blit(source, sRect, dest, dRect, filter, isStencil);
689	}
690
691	void Renderer::blit3D(Surface *source, Surface *dest)
692	{
693		blitter->blit3D(source, dest);
694	}
695
696	void Renderer::threadFunction(void *parameters)
697	{
698		Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
699		int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
700
701		if(logPrecision < IEEE)
702		{
703			CPUID::setFlushToZero(true);
704			CPUID::setDenormalsAreZero(true);
705		}
706
707		renderer->threadLoop(threadIndex);
708	}
709
710	void Renderer::threadLoop(int threadIndex)
711	{
712		while(!exitThreads)
713		{
714			taskLoop(threadIndex);
715
716			suspend[threadIndex]->signal();
717			resume[threadIndex]->wait();
718		}
719	}
720
721	void Renderer::taskLoop(int threadIndex)
722	{
723		while(task[threadIndex].type != Task::SUSPEND)
724		{
725			scheduleTask(threadIndex);
726			executeTask(threadIndex);
727		}
728	}
729
730	void Renderer::findAvailableTasks()
731	{
732		// Find pixel tasks
733		for(int cluster = 0; cluster < clusterCount; cluster++)
734		{
735			if(!pixelProgress[cluster].executing)
736			{
737				for(int unit = 0; unit < unitCount; unit++)
738				{
739					if(primitiveProgress[unit].references > 0)   // Contains processed primitives
740					{
741						if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
742						{
743							if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
744							{
745								Task &task = taskQueue[qHead];
746								task.type = Task::PIXELS;
747								task.primitiveUnit = unit;
748								task.pixelCluster = cluster;
749
750								pixelProgress[cluster].executing = true;
751
752								// Commit to the task queue
753								qHead = (qHead + 1) % 32;
754								qSize++;
755
756								break;
757							}
758						}
759					}
760				}
761			}
762		}
763
764		// Find primitive tasks
765		if(currentDraw == nextDraw)
766		{
767			return;   // No more primitives to process
768		}
769
770		for(int unit = 0; unit < unitCount; unit++)
771		{
772			DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
773
774			if(draw->primitive >= draw->count)
775			{
776				currentDraw++;
777
778				if(currentDraw == nextDraw)
779				{
780					return;   // No more primitives to process
781				}
782
783				draw = drawList[currentDraw % DRAW_COUNT];
784			}
785
786			if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
787			{
788				int primitive = draw->primitive;
789				int count = draw->count;
790				int batch = draw->batchSize;
791
792				primitiveProgress[unit].drawCall = currentDraw;
793				primitiveProgress[unit].firstPrimitive = primitive;
794				primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
795
796				draw->primitive += batch;
797
798				Task &task = taskQueue[qHead];
799				task.type = Task::PRIMITIVES;
800				task.primitiveUnit = unit;
801
802				primitiveProgress[unit].references = -1;
803
804				// Commit to the task queue
805				qHead = (qHead + 1) % 32;
806				qSize++;
807			}
808		}
809	}
810
811	void Renderer::scheduleTask(int threadIndex)
812	{
813		schedulerMutex.lock();
814
815		if((int)qSize < threadCount - threadsAwake + 1)
816		{
817			findAvailableTasks();
818		}
819
820		if(qSize != 0)
821		{
822			task[threadIndex] = taskQueue[(qHead - qSize) % 32];
823			qSize--;
824
825			if(threadsAwake != threadCount)
826			{
827				int wakeup = qSize - threadsAwake + 1;
828
829				for(int i = 0; i < threadCount && wakeup > 0; i++)
830				{
831					if(task[i].type == Task::SUSPEND)
832					{
833						suspend[i]->wait();
834						task[i].type = Task::RESUME;
835						resume[i]->signal();
836
837						threadsAwake++;
838						wakeup--;
839					}
840				}
841			}
842		}
843		else
844		{
845			task[threadIndex].type = Task::SUSPEND;
846
847			threadsAwake--;
848		}
849
850		schedulerMutex.unlock();
851	}
852
853	void Renderer::executeTask(int threadIndex)
854	{
855		#if PERF_HUD
856			int64_t startTick = Timer::ticks();
857		#endif
858
859		switch(task[threadIndex].type)
860		{
861		case Task::PRIMITIVES:
862			{
863				int unit = task[threadIndex].primitiveUnit;
864
865				int input = primitiveProgress[unit].firstPrimitive;
866				int count = primitiveProgress[unit].primitiveCount;
867				DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
868				int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
869
870				processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
871
872				#if PERF_HUD
873					int64_t time = Timer::ticks();
874					vertexTime[threadIndex] += time - startTick;
875					startTick = time;
876				#endif
877
878				int visible = 0;
879
880				if(!draw->setupState.rasterizerDiscard)
881				{
882					visible = (this->*setupPrimitives)(unit, count);
883				}
884
885				primitiveProgress[unit].visible = visible;
886				primitiveProgress[unit].references = clusterCount;
887
888				#if PERF_HUD
889					setupTime[threadIndex] += Timer::ticks() - startTick;
890				#endif
891			}
892			break;
893		case Task::PIXELS:
894			{
895				int unit = task[threadIndex].primitiveUnit;
896				int visible = primitiveProgress[unit].visible;
897
898				if(visible > 0)
899				{
900					int cluster = task[threadIndex].pixelCluster;
901					Primitive *primitive = primitiveBatch[unit];
902					DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
903					DrawData *data = draw->data;
904					PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
905
906					pixelRoutine(primitive, visible, cluster, data);
907				}
908
909				finishRendering(task[threadIndex]);
910
911				#if PERF_HUD
912					pixelTime[threadIndex] += Timer::ticks() - startTick;
913				#endif
914			}
915			break;
916		case Task::RESUME:
917			break;
918		case Task::SUSPEND:
919			break;
920		default:
921			ASSERT(false);
922		}
923	}
924
925	void Renderer::synchronize()
926	{
927		sync->lock(sw::PUBLIC);
928		sync->unlock();
929	}
930
931	void Renderer::finishRendering(Task &pixelTask)
932	{
933		int unit = pixelTask.primitiveUnit;
934		int cluster = pixelTask.pixelCluster;
935
936		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
937		DrawData &data = *draw.data;
938		int primitive = primitiveProgress[unit].firstPrimitive;
939		int count = primitiveProgress[unit].primitiveCount;
940		int processedPrimitives = primitive + count;
941
942		pixelProgress[cluster].processedPrimitives = processedPrimitives;
943
944		if(pixelProgress[cluster].processedPrimitives >= draw.count)
945		{
946			pixelProgress[cluster].drawCall++;
947			pixelProgress[cluster].processedPrimitives = 0;
948		}
949
950		int ref = atomicDecrement(&primitiveProgress[unit].references);
951
952		if(ref == 0)
953		{
954			ref = atomicDecrement(&draw.references);
955
956			if(ref == 0)
957			{
958				#if PERF_PROFILE
959					for(int cluster = 0; cluster < clusterCount; cluster++)
960					{
961						for(int i = 0; i < PERF_TIMERS; i++)
962						{
963							profiler.cycles[i] += data.cycles[i][cluster];
964						}
965					}
966				#endif
967
968				if(draw.queries)
969				{
970					for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
971					{
972						Query *query = *q;
973
974						switch(query->type)
975						{
976						case Query::FRAGMENTS_PASSED:
977							for(int cluster = 0; cluster < clusterCount; cluster++)
978							{
979								atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
980							}
981							break;
982						case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
983							atomicAdd((volatile int*)&query->data, processedPrimitives);
984							break;
985						default:
986							break;
987						}
988
989						atomicDecrement(&query->reference);
990					}
991
992					delete draw.queries;
993					draw.queries = 0;
994				}
995
996				for(int i = 0; i < RENDERTARGETS; i++)
997				{
998					if(draw.renderTarget[i])
999					{
1000						draw.renderTarget[i]->unlockInternal();
1001					}
1002				}
1003
1004				if(draw.depthBuffer)
1005				{
1006					draw.depthBuffer->unlockInternal();
1007				}
1008
1009				if(draw.stencilBuffer)
1010				{
1011					draw.stencilBuffer->unlockStencil();
1012				}
1013
1014				for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
1015				{
1016					if(draw.texture[i])
1017					{
1018						draw.texture[i]->unlock();
1019					}
1020				}
1021
1022				for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
1023				{
1024					if(draw.vertexStream[i])
1025					{
1026						draw.vertexStream[i]->unlock();
1027					}
1028				}
1029
1030				if(draw.indexBuffer)
1031				{
1032					draw.indexBuffer->unlock();
1033				}
1034
1035				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1036				{
1037					if(draw.pUniformBuffers[i])
1038					{
1039						draw.pUniformBuffers[i]->unlock();
1040					}
1041					if(draw.vUniformBuffers[i])
1042					{
1043						draw.vUniformBuffers[i]->unlock();
1044					}
1045				}
1046
1047				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1048				{
1049					if(draw.transformFeedbackBuffers[i])
1050					{
1051						draw.transformFeedbackBuffers[i]->unlock();
1052					}
1053				}
1054
1055				draw.vertexRoutine->unbind();
1056				draw.setupRoutine->unbind();
1057				draw.pixelRoutine->unbind();
1058
1059				sync->unlock();
1060
1061				draw.references = -1;
1062				resumeApp->signal();
1063			}
1064		}
1065
1066		pixelProgress[cluster].executing = false;
1067	}
1068
1069	void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1070	{
1071		Triangle *triangle = triangleBatch[unit];
1072		DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1073		DrawData *data = draw->data;
1074		VertexTask *task = vertexTask[thread];
1075
1076		const void *indices = data->indices;
1077		VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1078
1079		if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1080		{
1081			task->vertexCache.clear();
1082			task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1083		}
1084
1085		unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
1086
1087		switch(draw->drawType)
1088		{
1089		case DRAW_POINTLIST:
1090			{
1091				unsigned int index = start;
1092
1093				for(unsigned int i = 0; i < triangleCount; i++)
1094				{
1095					batch[i][0] = index;
1096					batch[i][1] = index;
1097					batch[i][2] = index;
1098
1099					index += 1;
1100				}
1101			}
1102			break;
1103		case DRAW_LINELIST:
1104			{
1105				unsigned int index = 2 * start;
1106
1107				for(unsigned int i = 0; i < triangleCount; i++)
1108				{
1109					batch[i][0] = index + 0;
1110					batch[i][1] = index + 1;
1111					batch[i][2] = index + 1;
1112
1113					index += 2;
1114				}
1115			}
1116			break;
1117		case DRAW_LINESTRIP:
1118			{
1119				unsigned int index = start;
1120
1121				for(unsigned int i = 0; i < triangleCount; i++)
1122				{
1123					batch[i][0] = index + 0;
1124					batch[i][1] = index + 1;
1125					batch[i][2] = index + 1;
1126
1127					index += 1;
1128				}
1129			}
1130			break;
1131		case DRAW_LINELOOP:
1132			{
1133				unsigned int index = start;
1134
1135				for(unsigned int i = 0; i < triangleCount; i++)
1136				{
1137					batch[i][0] = (index + 0) % loop;
1138					batch[i][1] = (index + 1) % loop;
1139					batch[i][2] = (index + 1) % loop;
1140
1141					index += 1;
1142				}
1143			}
1144			break;
1145		case DRAW_TRIANGLELIST:
1146			{
1147				unsigned int index = 3 * start;
1148
1149				for(unsigned int i = 0; i < triangleCount; i++)
1150				{
1151					batch[i][0] = index + 0;
1152					batch[i][1] = index + 1;
1153					batch[i][2] = index + 2;
1154
1155					index += 3;
1156				}
1157			}
1158			break;
1159		case DRAW_TRIANGLESTRIP:
1160			{
1161				unsigned int index = start;
1162
1163				for(unsigned int i = 0; i < triangleCount; i++)
1164				{
1165					batch[i][0] = index + 0;
1166					batch[i][1] = index + (index & 1) + 1;
1167					batch[i][2] = index + (~index & 1) + 1;
1168
1169					index += 1;
1170				}
1171			}
1172			break;
1173		case DRAW_TRIANGLEFAN:
1174			{
1175				unsigned int index = start;
1176
1177				for(unsigned int i = 0; i < triangleCount; i++)
1178				{
1179					batch[i][0] = index + 1;
1180					batch[i][1] = index + 2;
1181					batch[i][2] = 0;
1182
1183					index += 1;
1184				}
1185			}
1186			break;
1187		case DRAW_INDEXEDPOINTLIST8:
1188			{
1189				const unsigned char *index = (const unsigned char*)indices + start;
1190
1191				for(unsigned int i = 0; i < triangleCount; i++)
1192				{
1193					batch[i][0] = *index;
1194					batch[i][1] = *index;
1195					batch[i][2] = *index;
1196
1197					index += 1;
1198				}
1199			}
1200			break;
1201		case DRAW_INDEXEDPOINTLIST16:
1202			{
1203				const unsigned short *index = (const unsigned short*)indices + start;
1204
1205				for(unsigned int i = 0; i < triangleCount; i++)
1206				{
1207					batch[i][0] = *index;
1208					batch[i][1] = *index;
1209					batch[i][2] = *index;
1210
1211					index += 1;
1212				}
1213			}
1214			break;
1215		case DRAW_INDEXEDPOINTLIST32:
1216			{
1217				const unsigned int *index = (const unsigned int*)indices + start;
1218
1219				for(unsigned int i = 0; i < triangleCount; i++)
1220				{
1221					batch[i][0] = *index;
1222					batch[i][1] = *index;
1223					batch[i][2] = *index;
1224
1225					index += 1;
1226				}
1227			}
1228			break;
1229		case DRAW_INDEXEDLINELIST8:
1230			{
1231				const unsigned char *index = (const unsigned char*)indices + 2 * start;
1232
1233				for(unsigned int i = 0; i < triangleCount; i++)
1234				{
1235					batch[i][0] = index[0];
1236					batch[i][1] = index[1];
1237					batch[i][2] = index[1];
1238
1239					index += 2;
1240				}
1241			}
1242			break;
1243		case DRAW_INDEXEDLINELIST16:
1244			{
1245				const unsigned short *index = (const unsigned short*)indices + 2 * start;
1246
1247				for(unsigned int i = 0; i < triangleCount; i++)
1248				{
1249					batch[i][0] = index[0];
1250					batch[i][1] = index[1];
1251					batch[i][2] = index[1];
1252
1253					index += 2;
1254				}
1255			}
1256			break;
1257		case DRAW_INDEXEDLINELIST32:
1258			{
1259				const unsigned int *index = (const unsigned int*)indices + 2 * start;
1260
1261				for(unsigned int i = 0; i < triangleCount; i++)
1262				{
1263					batch[i][0] = index[0];
1264					batch[i][1] = index[1];
1265					batch[i][2] = index[1];
1266
1267					index += 2;
1268				}
1269			}
1270			break;
1271		case DRAW_INDEXEDLINESTRIP8:
1272			{
1273				const unsigned char *index = (const unsigned char*)indices + start;
1274
1275				for(unsigned int i = 0; i < triangleCount; i++)
1276				{
1277					batch[i][0] = index[0];
1278					batch[i][1] = index[1];
1279					batch[i][2] = index[1];
1280
1281					index += 1;
1282				}
1283			}
1284			break;
1285		case DRAW_INDEXEDLINESTRIP16:
1286			{
1287				const unsigned short *index = (const unsigned short*)indices + start;
1288
1289				for(unsigned int i = 0; i < triangleCount; i++)
1290				{
1291					batch[i][0] = index[0];
1292					batch[i][1] = index[1];
1293					batch[i][2] = index[1];
1294
1295					index += 1;
1296				}
1297			}
1298			break;
1299		case DRAW_INDEXEDLINESTRIP32:
1300			{
1301				const unsigned int *index = (const unsigned int*)indices + start;
1302
1303				for(unsigned int i = 0; i < triangleCount; i++)
1304				{
1305					batch[i][0] = index[0];
1306					batch[i][1] = index[1];
1307					batch[i][2] = index[1];
1308
1309					index += 1;
1310				}
1311			}
1312			break;
1313		case DRAW_INDEXEDLINELOOP8:
1314			{
1315				const unsigned char *index = (const unsigned char*)indices;
1316
1317				for(unsigned int i = 0; i < triangleCount; i++)
1318				{
1319					batch[i][0] = index[(start + i + 0) % loop];
1320					batch[i][1] = index[(start + i + 1) % loop];
1321					batch[i][2] = index[(start + i + 1) % loop];
1322				}
1323			}
1324			break;
1325		case DRAW_INDEXEDLINELOOP16:
1326			{
1327				const unsigned short *index = (const unsigned short*)indices;
1328
1329				for(unsigned int i = 0; i < triangleCount; i++)
1330				{
1331					batch[i][0] = index[(start + i + 0) % loop];
1332					batch[i][1] = index[(start + i + 1) % loop];
1333					batch[i][2] = index[(start + i + 1) % loop];
1334				}
1335			}
1336			break;
1337		case DRAW_INDEXEDLINELOOP32:
1338			{
1339				const unsigned int *index = (const unsigned int*)indices;
1340
1341				for(unsigned int i = 0; i < triangleCount; i++)
1342				{
1343					batch[i][0] = index[(start + i + 0) % loop];
1344					batch[i][1] = index[(start + i + 1) % loop];
1345					batch[i][2] = index[(start + i + 1) % loop];
1346				}
1347			}
1348			break;
1349		case DRAW_INDEXEDTRIANGLELIST8:
1350			{
1351				const unsigned char *index = (const unsigned char*)indices + 3 * start;
1352
1353				for(unsigned int i = 0; i < triangleCount; i++)
1354				{
1355					batch[i][0] = index[0];
1356					batch[i][1] = index[1];
1357					batch[i][2] = index[2];
1358
1359					index += 3;
1360				}
1361			}
1362			break;
1363		case DRAW_INDEXEDTRIANGLELIST16:
1364			{
1365				const unsigned short *index = (const unsigned short*)indices + 3 * start;
1366
1367				for(unsigned int i = 0; i < triangleCount; i++)
1368				{
1369					batch[i][0] = index[0];
1370					batch[i][1] = index[1];
1371					batch[i][2] = index[2];
1372
1373					index += 3;
1374				}
1375			}
1376			break;
1377		case DRAW_INDEXEDTRIANGLELIST32:
1378			{
1379				const unsigned int *index = (const unsigned int*)indices + 3 * start;
1380
1381				for(unsigned int i = 0; i < triangleCount; i++)
1382				{
1383					batch[i][0] = index[0];
1384					batch[i][1] = index[1];
1385					batch[i][2] = index[2];
1386
1387					index += 3;
1388				}
1389			}
1390			break;
1391		case DRAW_INDEXEDTRIANGLESTRIP8:
1392			{
1393				const unsigned char *index = (const unsigned char*)indices + start;
1394
1395				for(unsigned int i = 0; i < triangleCount; i++)
1396				{
1397					batch[i][0] = index[0];
1398					batch[i][1] = index[((start + i) & 1) + 1];
1399					batch[i][2] = index[(~(start + i) & 1) + 1];
1400
1401					index += 1;
1402				}
1403			}
1404			break;
1405		case DRAW_INDEXEDTRIANGLESTRIP16:
1406			{
1407				const unsigned short *index = (const unsigned short*)indices + start;
1408
1409				for(unsigned int i = 0; i < triangleCount; i++)
1410				{
1411					batch[i][0] = index[0];
1412					batch[i][1] = index[((start + i) & 1) + 1];
1413					batch[i][2] = index[(~(start + i) & 1) + 1];
1414
1415					index += 1;
1416				}
1417			}
1418			break;
1419		case DRAW_INDEXEDTRIANGLESTRIP32:
1420			{
1421				const unsigned int *index = (const unsigned int*)indices + start;
1422
1423				for(unsigned int i = 0; i < triangleCount; i++)
1424				{
1425					batch[i][0] = index[0];
1426					batch[i][1] = index[((start + i) & 1) + 1];
1427					batch[i][2] = index[(~(start + i) & 1) + 1];
1428
1429					index += 1;
1430				}
1431			}
1432			break;
1433		case DRAW_INDEXEDTRIANGLEFAN8:
1434			{
1435				const unsigned char *index = (const unsigned char*)indices;
1436
1437				for(unsigned int i = 0; i < triangleCount; i++)
1438				{
1439					batch[i][0] = index[start + i + 1];
1440					batch[i][1] = index[start + i + 2];
1441					batch[i][2] = index[0];
1442				}
1443			}
1444			break;
1445		case DRAW_INDEXEDTRIANGLEFAN16:
1446			{
1447				const unsigned short *index = (const unsigned short*)indices;
1448
1449				for(unsigned int i = 0; i < triangleCount; i++)
1450				{
1451					batch[i][0] = index[start + i + 1];
1452					batch[i][1] = index[start + i + 2];
1453					batch[i][2] = index[0];
1454				}
1455			}
1456			break;
1457		case DRAW_INDEXEDTRIANGLEFAN32:
1458			{
1459				const unsigned int *index = (const unsigned int*)indices;
1460
1461				for(unsigned int i = 0; i < triangleCount; i++)
1462				{
1463					batch[i][0] = index[start + i + 1];
1464					batch[i][1] = index[start + i + 2];
1465					batch[i][2] = index[0];
1466				}
1467			}
1468			break;
1469		case DRAW_QUADLIST:
1470			{
1471				unsigned int index = 4 * start / 2;
1472
1473				for(unsigned int i = 0; i < triangleCount; i += 2)
1474				{
1475					batch[i+0][0] = index + 0;
1476					batch[i+0][1] = index + 1;
1477					batch[i+0][2] = index + 2;
1478
1479					batch[i+1][0] = index + 0;
1480					batch[i+1][1] = index + 2;
1481					batch[i+1][2] = index + 3;
1482
1483					index += 4;
1484				}
1485			}
1486			break;
1487		default:
1488			ASSERT(false);
1489			return;
1490		}
1491
1492		task->primitiveStart = start;
1493		task->vertexCount = triangleCount * 3;
1494		vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1495	}
1496
1497	int Renderer::setupSolidTriangles(int unit, int count)
1498	{
1499		Triangle *triangle = triangleBatch[unit];
1500		Primitive *primitive = primitiveBatch[unit];
1501
1502		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1503		SetupProcessor::State &state = draw.setupState;
1504		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1505
1506		int ms = state.multiSample;
1507		int pos = state.positionRegister;
1508		const DrawData *data = draw.data;
1509		int visible = 0;
1510
1511		for(int i = 0; i < count; i++, triangle++)
1512		{
1513			Vertex &v0 = triangle->v0;
1514			Vertex &v1 = triangle->v1;
1515			Vertex &v2 = triangle->v2;
1516
1517			if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1518			{
1519				Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1520
1521				int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1522
1523				if(clipFlagsOr != Clipper::CLIP_FINITE)
1524				{
1525					if(!clipper->clip(polygon, clipFlagsOr, draw))
1526					{
1527						continue;
1528					}
1529				}
1530
1531				if(setupRoutine(primitive, triangle, &polygon, data))
1532				{
1533					primitive += ms;
1534					visible++;
1535				}
1536			}
1537		}
1538
1539		return visible;
1540	}
1541
1542	int Renderer::setupWireframeTriangle(int unit, int count)
1543	{
1544		Triangle *triangle = triangleBatch[unit];
1545		Primitive *primitive = primitiveBatch[unit];
1546		int visible = 0;
1547
1548		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1549		SetupProcessor::State &state = draw.setupState;
1550
1551		const Vertex &v0 = triangle[0].v0;
1552		const Vertex &v1 = triangle[0].v1;
1553		const Vertex &v2 = triangle[0].v2;
1554
1555		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1556
1557		if(state.cullMode == CULL_CLOCKWISE)
1558		{
1559			if(d >= 0) return 0;
1560		}
1561		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1562		{
1563			if(d <= 0) return 0;
1564		}
1565
1566		// Copy attributes
1567		triangle[1].v0 = v1;
1568		triangle[1].v1 = v2;
1569		triangle[2].v0 = v2;
1570		triangle[2].v1 = v0;
1571
1572		if(state.color[0][0].flat)   // FIXME
1573		{
1574			for(int i = 0; i < 2; i++)
1575			{
1576				triangle[1].v0.C[i] = triangle[0].v0.C[i];
1577				triangle[1].v1.C[i] = triangle[0].v0.C[i];
1578				triangle[2].v0.C[i] = triangle[0].v0.C[i];
1579				triangle[2].v1.C[i] = triangle[0].v0.C[i];
1580			}
1581		}
1582
1583		for(int i = 0; i < 3; i++)
1584		{
1585			if(setupLine(*primitive, *triangle, draw))
1586			{
1587				primitive->area = 0.5f * d;
1588
1589				primitive++;
1590				visible++;
1591			}
1592
1593			triangle++;
1594		}
1595
1596		return visible;
1597	}
1598
1599	int Renderer::setupVertexTriangle(int unit, int count)
1600	{
1601		Triangle *triangle = triangleBatch[unit];
1602		Primitive *primitive = primitiveBatch[unit];
1603		int visible = 0;
1604
1605		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1606		SetupProcessor::State &state = draw.setupState;
1607
1608		const Vertex &v0 = triangle[0].v0;
1609		const Vertex &v1 = triangle[0].v1;
1610		const Vertex &v2 = triangle[0].v2;
1611
1612		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1613
1614		if(state.cullMode == CULL_CLOCKWISE)
1615		{
1616			if(d >= 0) return 0;
1617		}
1618		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1619		{
1620			if(d <= 0) return 0;
1621		}
1622
1623		// Copy attributes
1624		triangle[1].v0 = v1;
1625		triangle[2].v0 = v2;
1626
1627		for(int i = 0; i < 3; i++)
1628		{
1629			if(setupPoint(*primitive, *triangle, draw))
1630			{
1631				primitive->area = 0.5f * d;
1632
1633				primitive++;
1634				visible++;
1635			}
1636
1637			triangle++;
1638		}
1639
1640		return visible;
1641	}
1642
1643	int Renderer::setupLines(int unit, int count)
1644	{
1645		Triangle *triangle = triangleBatch[unit];
1646		Primitive *primitive = primitiveBatch[unit];
1647		int visible = 0;
1648
1649		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1650		SetupProcessor::State &state = draw.setupState;
1651
1652		int ms = state.multiSample;
1653
1654		for(int i = 0; i < count; i++)
1655		{
1656			if(setupLine(*primitive, *triangle, draw))
1657			{
1658				primitive += ms;
1659				visible++;
1660			}
1661
1662			triangle++;
1663		}
1664
1665		return visible;
1666	}
1667
1668	int Renderer::setupPoints(int unit, int count)
1669	{
1670		Triangle *triangle = triangleBatch[unit];
1671		Primitive *primitive = primitiveBatch[unit];
1672		int visible = 0;
1673
1674		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1675		SetupProcessor::State &state = draw.setupState;
1676
1677		int ms = state.multiSample;
1678
1679		for(int i = 0; i < count; i++)
1680		{
1681			if(setupPoint(*primitive, *triangle, draw))
1682			{
1683				primitive += ms;
1684				visible++;
1685			}
1686
1687			triangle++;
1688		}
1689
1690		return visible;
1691	}
1692
1693	bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1694	{
1695		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1696		const SetupProcessor::State &state = draw.setupState;
1697		const DrawData &data = *draw.data;
1698
1699		float lineWidth = data.lineWidth;
1700
1701		Vertex &v0 = triangle.v0;
1702		Vertex &v1 = triangle.v1;
1703
1704		int pos = state.positionRegister;
1705
1706		const float4 &P0 = v0.v[pos];
1707		const float4 &P1 = v1.v[pos];
1708
1709		if(P0.w <= 0 && P1.w <= 0)
1710		{
1711			return false;
1712		}
1713
1714		const float W = data.Wx16[0] * (1.0f / 16.0f);
1715		const float H = data.Hx16[0] * (1.0f / 16.0f);
1716
1717		float dx = W * (P1.x / P1.w - P0.x / P0.w);
1718		float dy = H * (P1.y / P1.w - P0.y / P0.w);
1719
1720		if(dx == 0 && dy == 0)
1721		{
1722			return false;
1723		}
1724
1725		if(false)   // Rectangle
1726		{
1727			float4 P[4];
1728			int C[4];
1729
1730			P[0] = P0;
1731			P[1] = P1;
1732			P[2] = P1;
1733			P[3] = P0;
1734
1735			float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1736
1737			dx *= scale;
1738			dy *= scale;
1739
1740			float dx0w = dx * P0.w / W;
1741			float dy0h = dy * P0.w / H;
1742			float dx0h = dx * P0.w / H;
1743			float dy0w = dy * P0.w / W;
1744
1745			float dx1w = dx * P1.w / W;
1746			float dy1h = dy * P1.w / H;
1747			float dx1h = dx * P1.w / H;
1748			float dy1w = dy * P1.w / W;
1749
1750			P[0].x += -dy0w + -dx0w;
1751			P[0].y += -dx0h + +dy0h;
1752			C[0] = clipper->computeClipFlags(P[0]);
1753
1754			P[1].x += -dy1w + +dx1w;
1755			P[1].y += -dx1h + +dy1h;
1756			C[1] = clipper->computeClipFlags(P[1]);
1757
1758			P[2].x += +dy1w + +dx1w;
1759			P[2].y += +dx1h + -dy1h;
1760			C[2] = clipper->computeClipFlags(P[2]);
1761
1762			P[3].x += +dy0w + -dx0w;
1763			P[3].y += +dx0h + +dy0h;
1764			C[3] = clipper->computeClipFlags(P[3]);
1765
1766			if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1767			{
1768				Polygon polygon(P, 4);
1769
1770				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1771
1772				if(clipFlagsOr != Clipper::CLIP_FINITE)
1773				{
1774					if(!clipper->clip(polygon, clipFlagsOr, draw))
1775					{
1776						return false;
1777					}
1778				}
1779
1780				return setupRoutine(&primitive, &triangle, &polygon, &data);
1781			}
1782		}
1783		else   // Diamond test convention
1784		{
1785			float4 P[8];
1786			int C[8];
1787
1788			P[0] = P0;
1789			P[1] = P0;
1790			P[2] = P0;
1791			P[3] = P0;
1792			P[4] = P1;
1793			P[5] = P1;
1794			P[6] = P1;
1795			P[7] = P1;
1796
1797			float dx0 = lineWidth * 0.5f * P0.w / W;
1798			float dy0 = lineWidth * 0.5f * P0.w / H;
1799
1800			float dx1 = lineWidth * 0.5f * P1.w / W;
1801			float dy1 = lineWidth * 0.5f * P1.w / H;
1802
1803			P[0].x += -dx0;
1804			C[0] = clipper->computeClipFlags(P[0]);
1805
1806			P[1].y += +dy0;
1807			C[1] = clipper->computeClipFlags(P[1]);
1808
1809			P[2].x += +dx0;
1810			C[2] = clipper->computeClipFlags(P[2]);
1811
1812			P[3].y += -dy0;
1813			C[3] = clipper->computeClipFlags(P[3]);
1814
1815			P[4].x += -dx1;
1816			C[4] = clipper->computeClipFlags(P[4]);
1817
1818			P[5].y += +dy1;
1819			C[5] = clipper->computeClipFlags(P[5]);
1820
1821			P[6].x += +dx1;
1822			C[6] = clipper->computeClipFlags(P[6]);
1823
1824			P[7].y += -dy1;
1825			C[7] = clipper->computeClipFlags(P[7]);
1826
1827			if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1828			{
1829				float4 L[6];
1830
1831				if(dx > -dy)
1832				{
1833					if(dx > dy)   // Right
1834					{
1835						L[0] = P[0];
1836						L[1] = P[1];
1837						L[2] = P[5];
1838						L[3] = P[6];
1839						L[4] = P[7];
1840						L[5] = P[3];
1841					}
1842					else   // Down
1843					{
1844						L[0] = P[0];
1845						L[1] = P[4];
1846						L[2] = P[5];
1847						L[3] = P[6];
1848						L[4] = P[2];
1849						L[5] = P[3];
1850					}
1851				}
1852				else
1853				{
1854					if(dx > dy)   // Up
1855					{
1856						L[0] = P[0];
1857						L[1] = P[1];
1858						L[2] = P[2];
1859						L[3] = P[6];
1860						L[4] = P[7];
1861						L[5] = P[4];
1862					}
1863					else   // Left
1864					{
1865						L[0] = P[1];
1866						L[1] = P[2];
1867						L[2] = P[3];
1868						L[3] = P[7];
1869						L[4] = P[4];
1870						L[5] = P[5];
1871					}
1872				}
1873
1874				Polygon polygon(L, 6);
1875
1876				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1877
1878				if(clipFlagsOr != Clipper::CLIP_FINITE)
1879				{
1880					if(!clipper->clip(polygon, clipFlagsOr, draw))
1881					{
1882						return false;
1883					}
1884				}
1885
1886				return setupRoutine(&primitive, &triangle, &polygon, &data);
1887			}
1888		}
1889
1890		return false;
1891	}
1892
1893	bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1894	{
1895		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1896		const SetupProcessor::State &state = draw.setupState;
1897		const DrawData &data = *draw.data;
1898
1899		Vertex &v = triangle.v0;
1900
1901		float pSize;
1902
1903		int pts = state.pointSizeRegister;
1904
1905		if(state.pointSizeRegister != Unused)
1906		{
1907			pSize = v.v[pts].y;
1908		}
1909		else
1910		{
1911			pSize = data.point.pointSize[0];
1912		}
1913
1914		pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1915
1916		float4 P[4];
1917		int C[4];
1918
1919		int pos = state.positionRegister;
1920
1921		P[0] = v.v[pos];
1922		P[1] = v.v[pos];
1923		P[2] = v.v[pos];
1924		P[3] = v.v[pos];
1925
1926		const float X = pSize * P[0].w * data.halfPixelX[0];
1927		const float Y = pSize * P[0].w * data.halfPixelY[0];
1928
1929		P[0].x -= X;
1930		P[0].y += Y;
1931		C[0] = clipper->computeClipFlags(P[0]);
1932
1933		P[1].x += X;
1934		P[1].y += Y;
1935		C[1] = clipper->computeClipFlags(P[1]);
1936
1937		P[2].x += X;
1938		P[2].y -= Y;
1939		C[2] = clipper->computeClipFlags(P[2]);
1940
1941		P[3].x -= X;
1942		P[3].y -= Y;
1943		C[3] = clipper->computeClipFlags(P[3]);
1944
1945		triangle.v1 = triangle.v0;
1946		triangle.v2 = triangle.v0;
1947
1948		triangle.v1.X += iround(16 * 0.5f * pSize);
1949		triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1950
1951		Polygon polygon(P, 4);
1952
1953		if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1954		{
1955			int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1956
1957			if(clipFlagsOr != Clipper::CLIP_FINITE)
1958			{
1959				if(!clipper->clip(polygon, clipFlagsOr, draw))
1960				{
1961					return false;
1962				}
1963			}
1964
1965			return setupRoutine(&primitive, &triangle, &polygon, &data);
1966		}
1967
1968		return false;
1969	}
1970
1971	void Renderer::initializeThreads()
1972	{
1973		unitCount = ceilPow2(threadCount);
1974		clusterCount = ceilPow2(threadCount);
1975
1976		for(int i = 0; i < unitCount; i++)
1977		{
1978			triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1979			primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1980		}
1981
1982		for(int i = 0; i < threadCount; i++)
1983		{
1984			vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1985			vertexTask[i]->vertexCache.drawCall = -1;
1986
1987			task[i].type = Task::SUSPEND;
1988
1989			resume[i] = new Event();
1990			suspend[i] = new Event();
1991
1992			Parameters parameters;
1993			parameters.threadIndex = i;
1994			parameters.renderer = this;
1995
1996			exitThreads = false;
1997			worker[i] = new Thread(threadFunction, &parameters);
1998
1999			suspend[i]->wait();
2000			suspend[i]->signal();
2001		}
2002	}
2003
2004	void Renderer::terminateThreads()
2005	{
2006		while(threadsAwake != 0)
2007		{
2008			Thread::sleep(1);
2009		}
2010
2011		for(int thread = 0; thread < threadCount; thread++)
2012		{
2013			if(worker[thread])
2014			{
2015				exitThreads = true;
2016				resume[thread]->signal();
2017				worker[thread]->join();
2018
2019				delete worker[thread];
2020				worker[thread] = 0;
2021				delete resume[thread];
2022				resume[thread] = 0;
2023				delete suspend[thread];
2024				suspend[thread] = 0;
2025			}
2026
2027			deallocate(vertexTask[thread]);
2028			vertexTask[thread] = 0;
2029		}
2030
2031		for(int i = 0; i < 16; i++)
2032		{
2033			deallocate(triangleBatch[i]);
2034			triangleBatch[i] = 0;
2035
2036			deallocate(primitiveBatch[i]);
2037			primitiveBatch[i] = 0;
2038		}
2039	}
2040
2041	void Renderer::loadConstants(const VertexShader *vertexShader)
2042	{
2043		if(!vertexShader) return;
2044
2045		size_t count = vertexShader->getLength();
2046
2047		for(size_t i = 0; i < count; i++)
2048		{
2049			const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2050
2051			if(instruction->opcode == Shader::OPCODE_DEF)
2052			{
2053				int index = instruction->dst.index;
2054				float value[4];
2055
2056				value[0] = instruction->src[0].value[0];
2057				value[1] = instruction->src[0].value[1];
2058				value[2] = instruction->src[0].value[2];
2059				value[3] = instruction->src[0].value[3];
2060
2061				setVertexShaderConstantF(index, value);
2062			}
2063			else if(instruction->opcode == Shader::OPCODE_DEFI)
2064			{
2065				int index = instruction->dst.index;
2066				int integer[4];
2067
2068				integer[0] = instruction->src[0].integer[0];
2069				integer[1] = instruction->src[0].integer[1];
2070				integer[2] = instruction->src[0].integer[2];
2071				integer[3] = instruction->src[0].integer[3];
2072
2073				setVertexShaderConstantI(index, integer);
2074			}
2075			else if(instruction->opcode == Shader::OPCODE_DEFB)
2076			{
2077				int index = instruction->dst.index;
2078				int boolean = instruction->src[0].boolean[0];
2079
2080				setVertexShaderConstantB(index, &boolean);
2081			}
2082		}
2083	}
2084
2085	void Renderer::loadConstants(const PixelShader *pixelShader)
2086	{
2087		if(!pixelShader) return;
2088
2089		size_t count = pixelShader->getLength();
2090
2091		for(size_t i = 0; i < count; i++)
2092		{
2093			const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2094
2095			if(instruction->opcode == Shader::OPCODE_DEF)
2096			{
2097				int index = instruction->dst.index;
2098				float value[4];
2099
2100				value[0] = instruction->src[0].value[0];
2101				value[1] = instruction->src[0].value[1];
2102				value[2] = instruction->src[0].value[2];
2103				value[3] = instruction->src[0].value[3];
2104
2105				setPixelShaderConstantF(index, value);
2106			}
2107			else if(instruction->opcode == Shader::OPCODE_DEFI)
2108			{
2109				int index = instruction->dst.index;
2110				int integer[4];
2111
2112				integer[0] = instruction->src[0].integer[0];
2113				integer[1] = instruction->src[0].integer[1];
2114				integer[2] = instruction->src[0].integer[2];
2115				integer[3] = instruction->src[0].integer[3];
2116
2117				setPixelShaderConstantI(index, integer);
2118			}
2119			else if(instruction->opcode == Shader::OPCODE_DEFB)
2120			{
2121				int index = instruction->dst.index;
2122				int boolean = instruction->src[0].boolean[0];
2123
2124				setPixelShaderConstantB(index, &boolean);
2125			}
2126		}
2127	}
2128
2129	void Renderer::setIndexBuffer(Resource *indexBuffer)
2130	{
2131		context->indexBuffer = indexBuffer;
2132	}
2133
2134	void Renderer::setMultiSampleMask(unsigned int mask)
2135	{
2136		context->sampleMask = mask;
2137	}
2138
2139	void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2140	{
2141		sw::transparencyAntialiasing = transparencyAntialiasing;
2142	}
2143
2144	bool Renderer::isReadWriteTexture(int sampler)
2145	{
2146		for(int index = 0; index < RENDERTARGETS; index++)
2147		{
2148			if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2149			{
2150				return true;
2151			}
2152		}
2153
2154		if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2155		{
2156			return true;
2157		}
2158
2159		return false;
2160	}
2161
2162	void Renderer::updateClipper()
2163	{
2164		if(updateClipPlanes)
2165		{
2166			if(VertexProcessor::isFixedFunction())   // User plane in world space
2167			{
2168				const Matrix &scissorWorld = getViewTransform();
2169
2170				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2171				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2172				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2173				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2174				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2175				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2176			}
2177			else   // User plane in clip space
2178			{
2179				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2180				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2181				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2182				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2183				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2184				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2185			}
2186
2187			updateClipPlanes = false;
2188		}
2189	}
2190
2191	void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2192	{
2193		ASSERT(sampler < TOTAL_IMAGE_UNITS);
2194
2195		context->texture[sampler] = resource;
2196	}
2197
2198	void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2199	{
2200		ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2201
2202		context->sampler[sampler].setTextureLevel(face, level, surface, type);
2203	}
2204
2205	void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2206	{
2207		if(type == SAMPLER_PIXEL)
2208		{
2209			PixelProcessor::setTextureFilter(sampler, textureFilter);
2210		}
2211		else
2212		{
2213			VertexProcessor::setTextureFilter(sampler, textureFilter);
2214		}
2215	}
2216
2217	void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2218	{
2219		if(type == SAMPLER_PIXEL)
2220		{
2221			PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2222		}
2223		else
2224		{
2225			VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2226		}
2227	}
2228
2229	void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2230	{
2231		if(type == SAMPLER_PIXEL)
2232		{
2233			PixelProcessor::setGatherEnable(sampler, enable);
2234		}
2235		else
2236		{
2237			VertexProcessor::setGatherEnable(sampler, enable);
2238		}
2239	}
2240
2241	void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2242	{
2243		if(type == SAMPLER_PIXEL)
2244		{
2245			PixelProcessor::setAddressingModeU(sampler, addressMode);
2246		}
2247		else
2248		{
2249			VertexProcessor::setAddressingModeU(sampler, addressMode);
2250		}
2251	}
2252
2253	void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2254	{
2255		if(type == SAMPLER_PIXEL)
2256		{
2257			PixelProcessor::setAddressingModeV(sampler, addressMode);
2258		}
2259		else
2260		{
2261			VertexProcessor::setAddressingModeV(sampler, addressMode);
2262		}
2263	}
2264
2265	void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2266	{
2267		if(type == SAMPLER_PIXEL)
2268		{
2269			PixelProcessor::setAddressingModeW(sampler, addressMode);
2270		}
2271		else
2272		{
2273			VertexProcessor::setAddressingModeW(sampler, addressMode);
2274		}
2275	}
2276
2277	void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2278	{
2279		if(type == SAMPLER_PIXEL)
2280		{
2281			PixelProcessor::setReadSRGB(sampler, sRGB);
2282		}
2283		else
2284		{
2285			VertexProcessor::setReadSRGB(sampler, sRGB);
2286		}
2287	}
2288
2289	void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2290	{
2291		if(type == SAMPLER_PIXEL)
2292		{
2293			PixelProcessor::setMipmapLOD(sampler, bias);
2294		}
2295		else
2296		{
2297			VertexProcessor::setMipmapLOD(sampler, bias);
2298		}
2299	}
2300
2301	void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2302	{
2303		if(type == SAMPLER_PIXEL)
2304		{
2305			PixelProcessor::setBorderColor(sampler, borderColor);
2306		}
2307		else
2308		{
2309			VertexProcessor::setBorderColor(sampler, borderColor);
2310		}
2311	}
2312
2313	void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2314	{
2315		if(type == SAMPLER_PIXEL)
2316		{
2317			PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2318		}
2319		else
2320		{
2321			VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2322		}
2323	}
2324
2325	void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering)
2326	{
2327		if(type == SAMPLER_PIXEL)
2328		{
2329			PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
2330		}
2331		else
2332		{
2333			VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
2334		}
2335	}
2336
2337	void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2338	{
2339		if(type == SAMPLER_PIXEL)
2340		{
2341			PixelProcessor::setSwizzleR(sampler, swizzleR);
2342		}
2343		else
2344		{
2345			VertexProcessor::setSwizzleR(sampler, swizzleR);
2346		}
2347	}
2348
2349	void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2350	{
2351		if(type == SAMPLER_PIXEL)
2352		{
2353			PixelProcessor::setSwizzleG(sampler, swizzleG);
2354		}
2355		else
2356		{
2357			VertexProcessor::setSwizzleG(sampler, swizzleG);
2358		}
2359	}
2360
2361	void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2362	{
2363		if(type == SAMPLER_PIXEL)
2364		{
2365			PixelProcessor::setSwizzleB(sampler, swizzleB);
2366		}
2367		else
2368		{
2369			VertexProcessor::setSwizzleB(sampler, swizzleB);
2370		}
2371	}
2372
2373	void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2374	{
2375		if(type == SAMPLER_PIXEL)
2376		{
2377			PixelProcessor::setSwizzleA(sampler, swizzleA);
2378		}
2379		else
2380		{
2381			VertexProcessor::setSwizzleA(sampler, swizzleA);
2382		}
2383	}
2384
2385	void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
2386	{
2387		if(type == SAMPLER_PIXEL)
2388		{
2389			PixelProcessor::setBaseLevel(sampler, baseLevel);
2390		}
2391		else
2392		{
2393			VertexProcessor::setBaseLevel(sampler, baseLevel);
2394		}
2395	}
2396
2397	void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
2398	{
2399		if(type == SAMPLER_PIXEL)
2400		{
2401			PixelProcessor::setMaxLevel(sampler, maxLevel);
2402		}
2403		else
2404		{
2405			VertexProcessor::setMaxLevel(sampler, maxLevel);
2406		}
2407	}
2408
2409	void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
2410	{
2411		if(type == SAMPLER_PIXEL)
2412		{
2413			PixelProcessor::setMinLod(sampler, minLod);
2414		}
2415		else
2416		{
2417			VertexProcessor::setMinLod(sampler, minLod);
2418		}
2419	}
2420
2421	void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
2422	{
2423		if(type == SAMPLER_PIXEL)
2424		{
2425			PixelProcessor::setMaxLod(sampler, maxLod);
2426		}
2427		else
2428		{
2429			VertexProcessor::setMaxLod(sampler, maxLod);
2430		}
2431	}
2432
2433	void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2434	{
2435		context->setPointSpriteEnable(pointSpriteEnable);
2436	}
2437
2438	void Renderer::setPointScaleEnable(bool pointScaleEnable)
2439	{
2440		context->setPointScaleEnable(pointScaleEnable);
2441	}
2442
2443	void Renderer::setLineWidth(float width)
2444	{
2445		context->lineWidth = width;
2446	}
2447
2448	void Renderer::setDepthBias(float bias)
2449	{
2450		depthBias = bias;
2451	}
2452
2453	void Renderer::setSlopeDepthBias(float slopeBias)
2454	{
2455		slopeDepthBias = slopeBias;
2456	}
2457
2458	void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2459	{
2460		context->rasterizerDiscard = rasterizerDiscard;
2461	}
2462
2463	void Renderer::setPixelShader(const PixelShader *shader)
2464	{
2465		context->pixelShader = shader;
2466
2467		loadConstants(shader);
2468	}
2469
2470	void Renderer::setVertexShader(const VertexShader *shader)
2471	{
2472		context->vertexShader = shader;
2473
2474		loadConstants(shader);
2475	}
2476
2477	void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count)
2478	{
2479		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2480		{
2481			if(drawCall[i]->psDirtyConstF < index + count)
2482			{
2483				drawCall[i]->psDirtyConstF = index + count;
2484			}
2485		}
2486
2487		for(unsigned int i = 0; i < count; i++)
2488		{
2489			PixelProcessor::setFloatConstant(index + i, value);
2490			value += 4;
2491		}
2492	}
2493
2494	void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count)
2495	{
2496		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2497		{
2498			if(drawCall[i]->psDirtyConstI < index + count)
2499			{
2500				drawCall[i]->psDirtyConstI = index + count;
2501			}
2502		}
2503
2504		for(unsigned int i = 0; i < count; i++)
2505		{
2506			PixelProcessor::setIntegerConstant(index + i, value);
2507			value += 4;
2508		}
2509	}
2510
2511	void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
2512	{
2513		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2514		{
2515			if(drawCall[i]->psDirtyConstB < index + count)
2516			{
2517				drawCall[i]->psDirtyConstB = index + count;
2518			}
2519		}
2520
2521		for(unsigned int i = 0; i < count; i++)
2522		{
2523			PixelProcessor::setBooleanConstant(index + i, *boolean);
2524			boolean++;
2525		}
2526	}
2527
2528	void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count)
2529	{
2530		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2531		{
2532			if(drawCall[i]->vsDirtyConstF < index + count)
2533			{
2534				drawCall[i]->vsDirtyConstF = index + count;
2535			}
2536		}
2537
2538		for(unsigned int i = 0; i < count; i++)
2539		{
2540			VertexProcessor::setFloatConstant(index + i, value);
2541			value += 4;
2542		}
2543	}
2544
2545	void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count)
2546	{
2547		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2548		{
2549			if(drawCall[i]->vsDirtyConstI < index + count)
2550			{
2551				drawCall[i]->vsDirtyConstI = index + count;
2552			}
2553		}
2554
2555		for(unsigned int i = 0; i < count; i++)
2556		{
2557			VertexProcessor::setIntegerConstant(index + i, value);
2558			value += 4;
2559		}
2560	}
2561
2562	void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
2563	{
2564		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2565		{
2566			if(drawCall[i]->vsDirtyConstB < index + count)
2567			{
2568				drawCall[i]->vsDirtyConstB = index + count;
2569			}
2570		}
2571
2572		for(unsigned int i = 0; i < count; i++)
2573		{
2574			VertexProcessor::setBooleanConstant(index + i, *boolean);
2575			boolean++;
2576		}
2577	}
2578
2579	void Renderer::setModelMatrix(const Matrix &M, int i)
2580	{
2581		VertexProcessor::setModelMatrix(M, i);
2582	}
2583
2584	void Renderer::setViewMatrix(const Matrix &V)
2585	{
2586		VertexProcessor::setViewMatrix(V);
2587		updateClipPlanes = true;
2588	}
2589
2590	void Renderer::setBaseMatrix(const Matrix &B)
2591	{
2592		VertexProcessor::setBaseMatrix(B);
2593		updateClipPlanes = true;
2594	}
2595
2596	void Renderer::setProjectionMatrix(const Matrix &P)
2597	{
2598		VertexProcessor::setProjectionMatrix(P);
2599		updateClipPlanes = true;
2600	}
2601
2602	void Renderer::addQuery(Query *query)
2603	{
2604		queries.push_back(query);
2605	}
2606
2607	void Renderer::removeQuery(Query *query)
2608	{
2609		queries.remove(query);
2610	}
2611
2612	#if PERF_HUD
2613		int Renderer::getThreadCount()
2614		{
2615			return threadCount;
2616		}
2617
2618		int64_t Renderer::getVertexTime(int thread)
2619		{
2620			return vertexTime[thread];
2621		}
2622
2623		int64_t Renderer::getSetupTime(int thread)
2624		{
2625			return setupTime[thread];
2626		}
2627
2628		int64_t Renderer::getPixelTime(int thread)
2629		{
2630			return pixelTime[thread];
2631		}
2632
2633		void Renderer::resetTimers()
2634		{
2635			for(int thread = 0; thread < threadCount; thread++)
2636			{
2637				vertexTime[thread] = 0;
2638				setupTime[thread] = 0;
2639				pixelTime[thread] = 0;
2640			}
2641		}
2642	#endif
2643
2644	void Renderer::setViewport(const Viewport &viewport)
2645	{
2646		this->viewport = viewport;
2647	}
2648
2649	void Renderer::setScissor(const Rect &scissor)
2650	{
2651		this->scissor = scissor;
2652	}
2653
2654	void Renderer::setClipFlags(int flags)
2655	{
2656		clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2657	}
2658
2659	void Renderer::setClipPlane(unsigned int index, const float plane[4])
2660	{
2661		if(index < MAX_CLIP_PLANES)
2662		{
2663			userPlane[index] = plane;
2664		}
2665		else ASSERT(false);
2666
2667		updateClipPlanes = true;
2668	}
2669
2670	void Renderer::updateConfiguration(bool initialUpdate)
2671	{
2672		bool newConfiguration = swiftConfig->hasNewConfiguration();
2673
2674		if(newConfiguration || initialUpdate)
2675		{
2676			terminateThreads();
2677
2678			SwiftConfig::Configuration configuration = {};
2679			swiftConfig->getConfiguration(configuration);
2680
2681			precacheVertex = !newConfiguration && configuration.precache;
2682			precacheSetup = !newConfiguration && configuration.precache;
2683			precachePixel = !newConfiguration && configuration.precache;
2684
2685			VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2686			PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2687			SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2688
2689			switch(configuration.textureSampleQuality)
2690			{
2691			case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2692			case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2693			case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2694			default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2695			}
2696
2697			switch(configuration.mipmapQuality)
2698			{
2699			case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2700			case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2701			default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2702			}
2703
2704			setPerspectiveCorrection(configuration.perspectiveCorrection);
2705
2706			switch(configuration.transcendentalPrecision)
2707			{
2708			case 0:
2709				logPrecision = APPROXIMATE;
2710				expPrecision = APPROXIMATE;
2711				rcpPrecision = APPROXIMATE;
2712				rsqPrecision = APPROXIMATE;
2713				break;
2714			case 1:
2715				logPrecision = PARTIAL;
2716				expPrecision = PARTIAL;
2717				rcpPrecision = PARTIAL;
2718				rsqPrecision = PARTIAL;
2719				break;
2720			case 2:
2721				logPrecision = ACCURATE;
2722				expPrecision = ACCURATE;
2723				rcpPrecision = ACCURATE;
2724				rsqPrecision = ACCURATE;
2725				break;
2726			case 3:
2727				logPrecision = WHQL;
2728				expPrecision = WHQL;
2729				rcpPrecision = WHQL;
2730				rsqPrecision = WHQL;
2731				break;
2732			case 4:
2733				logPrecision = IEEE;
2734				expPrecision = IEEE;
2735				rcpPrecision = IEEE;
2736				rsqPrecision = IEEE;
2737				break;
2738			default:
2739				logPrecision = ACCURATE;
2740				expPrecision = ACCURATE;
2741				rcpPrecision = ACCURATE;
2742				rsqPrecision = ACCURATE;
2743				break;
2744			}
2745
2746			switch(configuration.transparencyAntialiasing)
2747			{
2748			case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2749			case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2750			default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2751			}
2752
2753			switch(configuration.threadCount)
2754			{
2755			case -1: threadCount = CPUID::coreCount();        break;
2756			case 0:  threadCount = CPUID::processAffinity();  break;
2757			default: threadCount = configuration.threadCount; break;
2758			}
2759
2760			CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2761			CPUID::setEnableSSSE3(configuration.enableSSSE3);
2762			CPUID::setEnableSSE3(configuration.enableSSE3);
2763			CPUID::setEnableSSE2(configuration.enableSSE2);
2764			CPUID::setEnableSSE(configuration.enableSSE);
2765
2766			for(int pass = 0; pass < 10; pass++)
2767			{
2768				optimization[pass] = configuration.optimization[pass];
2769			}
2770
2771			forceWindowed = configuration.forceWindowed;
2772			complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2773			postBlendSRGB = configuration.postBlendSRGB;
2774			exactColorRounding = configuration.exactColorRounding;
2775			forceClearRegisters = configuration.forceClearRegisters;
2776
2777		#ifndef NDEBUG
2778			minPrimitives = configuration.minPrimitives;
2779			maxPrimitives = configuration.maxPrimitives;
2780		#endif
2781		}
2782
2783		if(!initialUpdate && !worker[0])
2784		{
2785			initializeThreads();
2786		}
2787	}
2788}
2789