1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Renderer.hpp"
16
17#include "Clipper.hpp"
18#include "Math.hpp"
19#include "FrameBuffer.hpp"
20#include "Timer.hpp"
21#include "Surface.hpp"
22#include "Half.hpp"
23#include "Primitive.hpp"
24#include "Polygon.hpp"
25#include "SwiftConfig.hpp"
26#include "MutexLock.hpp"
27#include "CPUID.hpp"
28#include "Memory.hpp"
29#include "Resource.hpp"
30#include "Constants.hpp"
31#include "Debug.hpp"
32#include "Reactor/Reactor.hpp"
33
34#undef max
35
36bool disableServer = true;
37
38#ifndef NDEBUG
39unsigned int minPrimitives = 1;
40unsigned int maxPrimitives = 1 << 21;
41#endif
42
43namespace sw
44{
45	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
46	extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
47	extern bool booleanFaceRegister;
48	extern bool fullPixelPositionRegister;
49	extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
50	extern bool secondaryColor;             // Specular lighting is applied after texturing
51
52	extern bool forceWindowed;
53	extern bool complementaryDepthBuffer;
54	extern bool postBlendSRGB;
55	extern bool exactColorRounding;
56	extern TransparencyAntialiasing transparencyAntialiasing;
57	extern bool forceClearRegisters;
58
59	extern bool precacheVertex;
60	extern bool precacheSetup;
61	extern bool precachePixel;
62
63	int batchSize = 128;
64	int threadCount = 1;
65	int unitCount = 1;
66	int clusterCount = 1;
67
68	TranscendentalPrecision logPrecision = ACCURATE;
69	TranscendentalPrecision expPrecision = ACCURATE;
70	TranscendentalPrecision rcpPrecision = ACCURATE;
71	TranscendentalPrecision rsqPrecision = ACCURATE;
72	bool perspectiveCorrection = true;
73
74	struct Parameters
75	{
76		Renderer *renderer;
77		int threadIndex;
78	};
79
80	DrawCall::DrawCall()
81	{
82		queries = 0;
83
84		vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
85		vsDirtyConstI = 16;
86		vsDirtyConstB = 16;
87
88		psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
89		psDirtyConstI = 16;
90		psDirtyConstB = 16;
91
92		references = -1;
93
94		data = (DrawData*)allocate(sizeof(DrawData));
95		data->constants = &constants;
96	}
97
98	DrawCall::~DrawCall()
99	{
100		delete queries;
101
102		deallocate(data);
103	}
104
105	Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
106	{
107		sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
108		sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
109		sw::booleanFaceRegister = conventions.booleanFaceRegister;
110		sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
111		sw::leadingVertexFirst = conventions.leadingVertexFirst;
112		sw::secondaryColor = conventions.secondaryColor;
113		sw::exactColorRounding = exactColorRounding;
114
115		setRenderTarget(0, 0);
116		clipper = new Clipper(symmetricNormalizedDepth);
117
118		updateViewMatrix = true;
119		updateBaseMatrix = true;
120		updateProjectionMatrix = true;
121		updateClipPlanes = true;
122
123		#if PERF_HUD
124			resetTimers();
125		#endif
126
127		for(int i = 0; i < 16; i++)
128		{
129			vertexTask[i] = 0;
130
131			worker[i] = 0;
132			resume[i] = 0;
133			suspend[i] = 0;
134		}
135
136		threadsAwake = 0;
137		resumeApp = new Event();
138
139		currentDraw = 0;
140		nextDraw = 0;
141
142		qHead = 0;
143		qSize = 0;
144
145		for(int i = 0; i < 16; i++)
146		{
147			triangleBatch[i] = 0;
148			primitiveBatch[i] = 0;
149		}
150
151		for(int draw = 0; draw < DRAW_COUNT; draw++)
152		{
153			drawCall[draw] = new DrawCall();
154			drawList[draw] = drawCall[draw];
155		}
156
157		for(int unit = 0; unit < 16; unit++)
158		{
159			primitiveProgress[unit].init();
160		}
161
162		for(int cluster = 0; cluster < 16; cluster++)
163		{
164			pixelProgress[cluster].init();
165		}
166
167		clipFlags = 0;
168
169		swiftConfig = new SwiftConfig(disableServer);
170		updateConfiguration(true);
171
172		sync = new Resource(0);
173	}
174
175	Renderer::~Renderer()
176	{
177		sync->destruct();
178
179		delete clipper;
180		clipper = 0;
181
182		terminateThreads();
183		delete resumeApp;
184
185		for(int draw = 0; draw < DRAW_COUNT; draw++)
186		{
187			delete drawCall[draw];
188		}
189
190		delete swiftConfig;
191	}
192
193	void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
194	{
195		blitter.clear(pixel, format, dest, dRect, rgbaMask);
196	}
197
198	void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
199	{
200		blitter.blit(source, sRect, dest, dRect, filter);
201	}
202
203	void Renderer::blit3D(Surface *source, Surface *dest)
204	{
205		blitter.blit3D(source, dest);
206	}
207
208	void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
209	{
210		#ifndef NDEBUG
211			if(count < minPrimitives || count > maxPrimitives)
212			{
213				return;
214			}
215		#endif
216
217		context->drawType = drawType;
218
219		updateConfiguration();
220		updateClipper();
221
222		int ss = context->getSuperSampleCount();
223		int ms = context->getMultiSampleCount();
224
225		for(int q = 0; q < ss; q++)
226		{
227			unsigned int oldMultiSampleMask = context->multiSampleMask;
228			context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
229
230			if(!context->multiSampleMask)
231			{
232				continue;
233			}
234
235			sync->lock(sw::PRIVATE);
236
237			Routine *vertexRoutine;
238			Routine *setupRoutine;
239			Routine *pixelRoutine;
240
241			if(update || oldMultiSampleMask != context->multiSampleMask)
242			{
243				vertexState = VertexProcessor::update(drawType);
244				setupState = SetupProcessor::update();
245				pixelState = PixelProcessor::update();
246
247				vertexRoutine = VertexProcessor::routine(vertexState);
248				setupRoutine = SetupProcessor::routine(setupState);
249				pixelRoutine = PixelProcessor::routine(pixelState);
250			}
251
252			int batch = batchSize / ms;
253
254			int (Renderer::*setupPrimitives)(int batch, int count);
255
256			if(context->isDrawTriangle())
257			{
258				switch(context->fillMode)
259				{
260				case FILL_SOLID:
261					setupPrimitives = &Renderer::setupSolidTriangles;
262					break;
263				case FILL_WIREFRAME:
264					setupPrimitives = &Renderer::setupWireframeTriangle;
265					batch = 1;
266					break;
267				case FILL_VERTEX:
268					setupPrimitives = &Renderer::setupVertexTriangle;
269					batch = 1;
270					break;
271				default: ASSERT(false);
272				}
273			}
274			else if(context->isDrawLine())
275			{
276				setupPrimitives = &Renderer::setupLines;
277			}
278			else   // Point draw
279			{
280				setupPrimitives = &Renderer::setupPoints;
281			}
282
283			DrawCall *draw = 0;
284
285			do
286			{
287				for(int i = 0; i < DRAW_COUNT; i++)
288				{
289					if(drawCall[i]->references == -1)
290					{
291						draw = drawCall[i];
292						drawList[nextDraw % DRAW_COUNT] = draw;
293
294						break;
295					}
296				}
297
298				if(!draw)
299				{
300					resumeApp->wait();
301				}
302			}
303			while(!draw);
304
305			DrawData *data = draw->data;
306
307			if(queries.size() != 0)
308			{
309				draw->queries = new std::list<Query*>();
310				bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
311				for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
312				{
313					Query* q = *query;
314					if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
315					{
316						atomicIncrement(&(q->reference));
317						draw->queries->push_back(q);
318					}
319				}
320			}
321
322			draw->drawType = drawType;
323			draw->batchSize = batch;
324
325			vertexRoutine->bind();
326			setupRoutine->bind();
327			pixelRoutine->bind();
328
329			draw->vertexRoutine = vertexRoutine;
330			draw->setupRoutine = setupRoutine;
331			draw->pixelRoutine = pixelRoutine;
332			draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
333			draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
334			draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
335			draw->setupPrimitives = setupPrimitives;
336			draw->setupState = setupState;
337
338			for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
339			{
340				draw->vertexStream[i] = context->input[i].resource;
341				data->input[i] = context->input[i].buffer;
342				data->stride[i] = context->input[i].stride;
343
344				if(draw->vertexStream[i])
345				{
346					draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
347				}
348			}
349
350			if(context->indexBuffer)
351			{
352				data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
353			}
354
355			draw->indexBuffer = context->indexBuffer;
356
357			for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
358			{
359				draw->texture[sampler] = 0;
360			}
361
362			for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
363			{
364				if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
365				{
366					draw->texture[sampler] = context->texture[sampler];
367					draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
368
369					data->mipmap[sampler] = context->sampler[sampler].getTextureData();
370				}
371			}
372
373			if(context->pixelShader)
374			{
375				if(draw->psDirtyConstF)
376				{
377					memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
378					memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
379					draw->psDirtyConstF = 0;
380				}
381
382				if(draw->psDirtyConstI)
383				{
384					memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
385					draw->psDirtyConstI = 0;
386				}
387
388				if(draw->psDirtyConstB)
389				{
390					memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
391					draw->psDirtyConstB = 0;
392				}
393
394				PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
395			}
396			else
397			{
398				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
399				{
400					draw->pUniformBuffers[i] = nullptr;
401				}
402			}
403
404			if(context->pixelShaderVersion() <= 0x0104)
405			{
406				for(int stage = 0; stage < 8; stage++)
407				{
408					if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
409					{
410						data->textureStage[stage] = context->textureStage[stage].uniforms;
411					}
412					else break;
413				}
414			}
415
416			if(context->vertexShader)
417			{
418				if(context->vertexShader->getVersion() >= 0x0300)
419				{
420					for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
421					{
422						if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
423						{
424							draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
425							draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
426
427							data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
428						}
429					}
430				}
431
432				if(draw->vsDirtyConstF)
433				{
434					memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
435					draw->vsDirtyConstF = 0;
436				}
437
438				if(draw->vsDirtyConstI)
439				{
440					memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
441					draw->vsDirtyConstI = 0;
442				}
443
444				if(draw->vsDirtyConstB)
445				{
446					memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
447					draw->vsDirtyConstB = 0;
448				}
449
450				if(context->vertexShader->instanceIdDeclared)
451				{
452					data->instanceID = context->instanceID;
453				}
454
455				VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
456				VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
457			}
458			else
459			{
460				data->ff = ff;
461
462				draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
463				draw->vsDirtyConstI = 16;
464				draw->vsDirtyConstB = 16;
465
466				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
467				{
468					draw->vUniformBuffers[i] = nullptr;
469				}
470
471				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
472				{
473					draw->transformFeedbackBuffers[i] = nullptr;
474				}
475			}
476
477			if(pixelState.stencilActive)
478			{
479				data->stencil[0] = stencil;
480				data->stencil[1] = stencilCCW;
481			}
482
483			if(pixelState.fogActive)
484			{
485				data->fog = fog;
486			}
487
488			if(setupState.isDrawPoint)
489			{
490				data->point = point;
491			}
492
493			data->lineWidth = context->lineWidth;
494
495			data->factor = factor;
496
497			if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
498			{
499				float ref = context->alphaReference * (1.0f / 255.0f);
500				float margin = sw::min(ref, 1.0f - ref);
501
502				if(ms == 4)
503				{
504					data->a2c0 = replicate(ref - margin * 0.6f);
505					data->a2c1 = replicate(ref - margin * 0.2f);
506					data->a2c2 = replicate(ref + margin * 0.2f);
507					data->a2c3 = replicate(ref + margin * 0.6f);
508				}
509				else if(ms == 2)
510				{
511					data->a2c0 = replicate(ref - margin * 0.3f);
512					data->a2c1 = replicate(ref + margin * 0.3f);
513				}
514				else ASSERT(false);
515			}
516
517			if(pixelState.occlusionEnabled)
518			{
519				for(int cluster = 0; cluster < clusterCount; cluster++)
520				{
521					data->occlusion[cluster] = 0;
522				}
523			}
524
525			#if PERF_PROFILE
526				for(int cluster = 0; cluster < clusterCount; cluster++)
527				{
528					for(int i = 0; i < PERF_TIMERS; i++)
529					{
530						data->cycles[i][cluster] = 0;
531					}
532				}
533			#endif
534
535			// Viewport
536			{
537				float W = 0.5f * viewport.width;
538				float H = 0.5f * viewport.height;
539				float X0 = viewport.x0 + W;
540				float Y0 = viewport.y0 + H;
541				float N = viewport.minZ;
542				float F = viewport.maxZ;
543				float Z = F - N;
544
545				if(context->isDrawTriangle(false))
546				{
547					N += depthBias;
548				}
549
550				if(complementaryDepthBuffer)
551				{
552					Z = -Z;
553					N = 1 - N;
554				}
555
556				static const float X[5][16] =   // Fragment offsets
557				{
558					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
559					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
560					{-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
561					{+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
562					{+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
563				};
564
565				static const float Y[5][16] =   // Fragment offsets
566				{
567					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
568					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
569					{-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
570					{-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
571					{-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
572				};
573
574				int s = sw::log2(ss);
575
576				data->Wx16 = replicate(W * 16);
577				data->Hx16 = replicate(H * 16);
578				data->X0x16 = replicate(X0 * 16 - 8);
579				data->Y0x16 = replicate(Y0 * 16 - 8);
580				data->XXXX = replicate(X[s][q] / W);
581				data->YYYY = replicate(Y[s][q] / H);
582				data->halfPixelX = replicate(0.5f / W);
583				data->halfPixelY = replicate(0.5f / H);
584				data->viewportHeight = abs(viewport.height);
585				data->slopeDepthBias = slopeDepthBias;
586				data->depthRange = Z;
587				data->depthNear = N;
588				draw->clipFlags = clipFlags;
589
590				if(clipFlags)
591				{
592					if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
593					if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
594					if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
595					if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
596					if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
597					if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
598				}
599			}
600
601			// Target
602			{
603				for(int index = 0; index < RENDERTARGETS; index++)
604				{
605					draw->renderTarget[index] = context->renderTarget[index];
606
607					if(draw->renderTarget[index])
608					{
609						data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
610						data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
611						data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
612					}
613				}
614
615				draw->depthBuffer = context->depthBuffer;
616				draw->stencilBuffer = context->stencilBuffer;
617
618				if(draw->depthBuffer)
619				{
620					data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
621					data->depthPitchB = context->depthBuffer->getInternalPitchB();
622					data->depthSliceB = context->depthBuffer->getInternalSliceB();
623				}
624
625				if(draw->stencilBuffer)
626				{
627					data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(q * ms, MANAGED);
628					data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
629					data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
630				}
631			}
632
633			// Scissor
634			{
635				data->scissorX0 = scissor.x0;
636				data->scissorX1 = scissor.x1;
637				data->scissorY0 = scissor.y0;
638				data->scissorY1 = scissor.y1;
639			}
640
641			draw->primitive = 0;
642			draw->count = count;
643
644			draw->references = (count + batch - 1) / batch;
645
646			schedulerMutex.lock();
647			nextDraw++;
648			schedulerMutex.unlock();
649
650			if(threadCount > 1)
651			{
652				if(!threadsAwake)
653				{
654					suspend[0]->wait();
655
656					threadsAwake = 1;
657					task[0].type = Task::RESUME;
658
659					resume[0]->signal();
660				}
661			}
662			else   // Use main thread for draw execution
663			{
664				threadsAwake = 1;
665				task[0].type = Task::RESUME;
666
667				taskLoop(0);
668			}
669		}
670	}
671
672	void Renderer::threadFunction(void *parameters)
673	{
674		Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
675		int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
676
677		if(logPrecision < IEEE)
678		{
679			CPUID::setFlushToZero(true);
680			CPUID::setDenormalsAreZero(true);
681		}
682
683		renderer->threadLoop(threadIndex);
684	}
685
686	void Renderer::threadLoop(int threadIndex)
687	{
688		while(!exitThreads)
689		{
690			taskLoop(threadIndex);
691
692			suspend[threadIndex]->signal();
693			resume[threadIndex]->wait();
694		}
695	}
696
697	void Renderer::taskLoop(int threadIndex)
698	{
699		while(task[threadIndex].type != Task::SUSPEND)
700		{
701			scheduleTask(threadIndex);
702			executeTask(threadIndex);
703		}
704	}
705
706	void Renderer::findAvailableTasks()
707	{
708		// Find pixel tasks
709		for(int cluster = 0; cluster < clusterCount; cluster++)
710		{
711			if(!pixelProgress[cluster].executing)
712			{
713				for(int unit = 0; unit < unitCount; unit++)
714				{
715					if(primitiveProgress[unit].references > 0)   // Contains processed primitives
716					{
717						if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
718						{
719							if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
720							{
721								Task &task = taskQueue[qHead];
722								task.type = Task::PIXELS;
723								task.primitiveUnit = unit;
724								task.pixelCluster = cluster;
725
726								pixelProgress[cluster].executing = true;
727
728								// Commit to the task queue
729								qHead = (qHead + 1) % 32;
730								qSize++;
731
732								break;
733							}
734						}
735					}
736				}
737			}
738		}
739
740		// Find primitive tasks
741		if(currentDraw == nextDraw)
742		{
743			return;   // No more primitives to process
744		}
745
746		for(int unit = 0; unit < unitCount; unit++)
747		{
748			DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
749
750			if(draw->primitive >= draw->count)
751			{
752				currentDraw++;
753
754				if(currentDraw == nextDraw)
755				{
756					return;   // No more primitives to process
757				}
758
759				draw = drawList[currentDraw % DRAW_COUNT];
760			}
761
762			if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
763			{
764				int primitive = draw->primitive;
765				int count = draw->count;
766				int batch = draw->batchSize;
767
768				primitiveProgress[unit].drawCall = currentDraw;
769				primitiveProgress[unit].firstPrimitive = primitive;
770				primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
771
772				draw->primitive += batch;
773
774				Task &task = taskQueue[qHead];
775				task.type = Task::PRIMITIVES;
776				task.primitiveUnit = unit;
777
778				primitiveProgress[unit].references = -1;
779
780				// Commit to the task queue
781				qHead = (qHead + 1) % 32;
782				qSize++;
783			}
784		}
785	}
786
787	void Renderer::scheduleTask(int threadIndex)
788	{
789		schedulerMutex.lock();
790
791		if((int)qSize < threadCount - threadsAwake + 1)
792		{
793			findAvailableTasks();
794		}
795
796		if(qSize != 0)
797		{
798			task[threadIndex] = taskQueue[(qHead - qSize) % 32];
799			qSize--;
800
801			if(threadsAwake != threadCount)
802			{
803				int wakeup = qSize - threadsAwake + 1;
804
805				for(int i = 0; i < threadCount && wakeup > 0; i++)
806				{
807					if(task[i].type == Task::SUSPEND)
808					{
809						suspend[i]->wait();
810						task[i].type = Task::RESUME;
811						resume[i]->signal();
812
813						threadsAwake++;
814						wakeup--;
815					}
816				}
817			}
818		}
819		else
820		{
821			task[threadIndex].type = Task::SUSPEND;
822
823			threadsAwake--;
824		}
825
826		schedulerMutex.unlock();
827	}
828
829	void Renderer::executeTask(int threadIndex)
830	{
831		#if PERF_HUD
832			int64_t startTick = Timer::ticks();
833		#endif
834
835		switch(task[threadIndex].type)
836		{
837		case Task::PRIMITIVES:
838			{
839				int unit = task[threadIndex].primitiveUnit;
840
841				int input = primitiveProgress[unit].firstPrimitive;
842				int count = primitiveProgress[unit].primitiveCount;
843				DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
844				int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
845
846				processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
847
848				#if PERF_HUD
849					int64_t time = Timer::ticks();
850					vertexTime[threadIndex] += time - startTick;
851					startTick = time;
852				#endif
853
854				int visible = 0;
855
856				if(!draw->setupState.rasterizerDiscard)
857				{
858					visible = (this->*setupPrimitives)(unit, count);
859				}
860
861				primitiveProgress[unit].visible = visible;
862				primitiveProgress[unit].references = clusterCount;
863
864				#if PERF_HUD
865					setupTime[threadIndex] += Timer::ticks() - startTick;
866				#endif
867			}
868			break;
869		case Task::PIXELS:
870			{
871				int unit = task[threadIndex].primitiveUnit;
872				int visible = primitiveProgress[unit].visible;
873
874				if(visible > 0)
875				{
876					int cluster = task[threadIndex].pixelCluster;
877					Primitive *primitive = primitiveBatch[unit];
878					DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
879					DrawData *data = draw->data;
880					PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
881
882					pixelRoutine(primitive, visible, cluster, data);
883				}
884
885				finishRendering(task[threadIndex]);
886
887				#if PERF_HUD
888					pixelTime[threadIndex] += Timer::ticks() - startTick;
889				#endif
890			}
891			break;
892		case Task::RESUME:
893			break;
894		case Task::SUSPEND:
895			break;
896		default:
897			ASSERT(false);
898		}
899	}
900
901	void Renderer::synchronize()
902	{
903		sync->lock(sw::PUBLIC);
904		sync->unlock();
905	}
906
907	void Renderer::finishRendering(Task &pixelTask)
908	{
909		int unit = pixelTask.primitiveUnit;
910		int cluster = pixelTask.pixelCluster;
911
912		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
913		DrawData &data = *draw.data;
914		int primitive = primitiveProgress[unit].firstPrimitive;
915		int count = primitiveProgress[unit].primitiveCount;
916		int processedPrimitives = primitive + count;
917
918		pixelProgress[cluster].processedPrimitives = processedPrimitives;
919
920		if(pixelProgress[cluster].processedPrimitives >= draw.count)
921		{
922			pixelProgress[cluster].drawCall++;
923			pixelProgress[cluster].processedPrimitives = 0;
924		}
925
926		int ref = atomicDecrement(&primitiveProgress[unit].references);
927
928		if(ref == 0)
929		{
930			ref = atomicDecrement(&draw.references);
931
932			if(ref == 0)
933			{
934				#if PERF_PROFILE
935					for(int cluster = 0; cluster < clusterCount; cluster++)
936					{
937						for(int i = 0; i < PERF_TIMERS; i++)
938						{
939							profiler.cycles[i] += data.cycles[i][cluster];
940						}
941					}
942				#endif
943
944				if(draw.queries)
945				{
946					for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
947					{
948						Query *query = *q;
949
950						switch(query->type)
951						{
952						case Query::FRAGMENTS_PASSED:
953							for(int cluster = 0; cluster < clusterCount; cluster++)
954							{
955								atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
956							}
957							break;
958						case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
959							atomicAdd((volatile int*)&query->data, processedPrimitives);
960							break;
961						default:
962							break;
963						}
964
965						atomicDecrement(&query->reference);
966					}
967
968					delete draw.queries;
969					draw.queries = 0;
970				}
971
972				for(int i = 0; i < RENDERTARGETS; i++)
973				{
974					if(draw.renderTarget[i])
975					{
976						draw.renderTarget[i]->unlockInternal();
977					}
978				}
979
980				if(draw.depthBuffer)
981				{
982					draw.depthBuffer->unlockInternal();
983				}
984
985				if(draw.stencilBuffer)
986				{
987					draw.stencilBuffer->unlockStencil();
988				}
989
990				for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
991				{
992					if(draw.texture[i])
993					{
994						draw.texture[i]->unlock();
995					}
996				}
997
998				for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
999				{
1000					if(draw.vertexStream[i])
1001					{
1002						draw.vertexStream[i]->unlock();
1003					}
1004				}
1005
1006				if(draw.indexBuffer)
1007				{
1008					draw.indexBuffer->unlock();
1009				}
1010
1011				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1012				{
1013					if(draw.pUniformBuffers[i])
1014					{
1015						draw.pUniformBuffers[i]->unlock();
1016					}
1017					if(draw.vUniformBuffers[i])
1018					{
1019						draw.vUniformBuffers[i]->unlock();
1020					}
1021				}
1022
1023				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1024				{
1025					if(draw.transformFeedbackBuffers[i])
1026					{
1027						draw.transformFeedbackBuffers[i]->unlock();
1028					}
1029				}
1030
1031				draw.vertexRoutine->unbind();
1032				draw.setupRoutine->unbind();
1033				draw.pixelRoutine->unbind();
1034
1035				sync->unlock();
1036
1037				draw.references = -1;
1038				resumeApp->signal();
1039			}
1040		}
1041
1042		pixelProgress[cluster].executing = false;
1043	}
1044
1045	void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1046	{
1047		Triangle *triangle = triangleBatch[unit];
1048		DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1049		DrawData *data = draw->data;
1050		VertexTask *task = vertexTask[thread];
1051
1052		const void *indices = data->indices;
1053		VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1054
1055		if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1056		{
1057			task->vertexCache.clear();
1058			task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1059		}
1060
1061		unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
1062
1063		switch(draw->drawType)
1064		{
1065		case DRAW_POINTLIST:
1066			{
1067				unsigned int index = start;
1068
1069				for(unsigned int i = 0; i < triangleCount; i++)
1070				{
1071					batch[i][0] = index;
1072					batch[i][1] = index;
1073					batch[i][2] = index;
1074
1075					index += 1;
1076				}
1077			}
1078			break;
1079		case DRAW_LINELIST:
1080			{
1081				unsigned int index = 2 * start;
1082
1083				for(unsigned int i = 0; i < triangleCount; i++)
1084				{
1085					batch[i][0] = index + 0;
1086					batch[i][1] = index + 1;
1087					batch[i][2] = index + 1;
1088
1089					index += 2;
1090				}
1091			}
1092			break;
1093		case DRAW_LINESTRIP:
1094			{
1095				unsigned int index = start;
1096
1097				for(unsigned int i = 0; i < triangleCount; i++)
1098				{
1099					batch[i][0] = index + 0;
1100					batch[i][1] = index + 1;
1101					batch[i][2] = index + 1;
1102
1103					index += 1;
1104				}
1105			}
1106			break;
1107		case DRAW_LINELOOP:
1108			{
1109				unsigned int index = start;
1110
1111				for(unsigned int i = 0; i < triangleCount; i++)
1112				{
1113					batch[i][0] = (index + 0) % loop;
1114					batch[i][1] = (index + 1) % loop;
1115					batch[i][2] = (index + 1) % loop;
1116
1117					index += 1;
1118				}
1119			}
1120			break;
1121		case DRAW_TRIANGLELIST:
1122			{
1123				unsigned int index = 3 * start;
1124
1125				for(unsigned int i = 0; i < triangleCount; i++)
1126				{
1127					batch[i][0] = index + 0;
1128					batch[i][1] = index + 1;
1129					batch[i][2] = index + 2;
1130
1131					index += 3;
1132				}
1133			}
1134			break;
1135		case DRAW_TRIANGLESTRIP:
1136			{
1137				unsigned int index = start;
1138
1139				for(unsigned int i = 0; i < triangleCount; i++)
1140				{
1141					batch[i][0] = index + 0;
1142					batch[i][1] = index + (index & 1) + 1;
1143					batch[i][2] = index + (~index & 1) + 1;
1144
1145					index += 1;
1146				}
1147			}
1148			break;
1149		case DRAW_TRIANGLEFAN:
1150			{
1151				unsigned int index = start;
1152
1153				for(unsigned int i = 0; i < triangleCount; i++)
1154				{
1155					batch[i][0] = index + 1;
1156					batch[i][1] = index + 2;
1157					batch[i][2] = 0;
1158
1159					index += 1;
1160				}
1161			}
1162			break;
1163		case DRAW_INDEXEDPOINTLIST8:
1164			{
1165				const unsigned char *index = (const unsigned char*)indices + start;
1166
1167				for(unsigned int i = 0; i < triangleCount; i++)
1168				{
1169					batch[i][0] = *index;
1170					batch[i][1] = *index;
1171					batch[i][2] = *index;
1172
1173					index += 1;
1174				}
1175			}
1176			break;
1177		case DRAW_INDEXEDPOINTLIST16:
1178			{
1179				const unsigned short *index = (const unsigned short*)indices + start;
1180
1181				for(unsigned int i = 0; i < triangleCount; i++)
1182				{
1183					batch[i][0] = *index;
1184					batch[i][1] = *index;
1185					batch[i][2] = *index;
1186
1187					index += 1;
1188				}
1189			}
1190			break;
1191		case DRAW_INDEXEDPOINTLIST32:
1192			{
1193				const unsigned int *index = (const unsigned int*)indices + start;
1194
1195				for(unsigned int i = 0; i < triangleCount; i++)
1196				{
1197					batch[i][0] = *index;
1198					batch[i][1] = *index;
1199					batch[i][2] = *index;
1200
1201					index += 1;
1202				}
1203			}
1204			break;
1205		case DRAW_INDEXEDLINELIST8:
1206			{
1207				const unsigned char *index = (const unsigned char*)indices + 2 * start;
1208
1209				for(unsigned int i = 0; i < triangleCount; i++)
1210				{
1211					batch[i][0] = index[0];
1212					batch[i][1] = index[1];
1213					batch[i][2] = index[1];
1214
1215					index += 2;
1216				}
1217			}
1218			break;
1219		case DRAW_INDEXEDLINELIST16:
1220			{
1221				const unsigned short *index = (const unsigned short*)indices + 2 * start;
1222
1223				for(unsigned int i = 0; i < triangleCount; i++)
1224				{
1225					batch[i][0] = index[0];
1226					batch[i][1] = index[1];
1227					batch[i][2] = index[1];
1228
1229					index += 2;
1230				}
1231			}
1232			break;
1233		case DRAW_INDEXEDLINELIST32:
1234			{
1235				const unsigned int *index = (const unsigned int*)indices + 2 * start;
1236
1237				for(unsigned int i = 0; i < triangleCount; i++)
1238				{
1239					batch[i][0] = index[0];
1240					batch[i][1] = index[1];
1241					batch[i][2] = index[1];
1242
1243					index += 2;
1244				}
1245			}
1246			break;
1247		case DRAW_INDEXEDLINESTRIP8:
1248			{
1249				const unsigned char *index = (const unsigned char*)indices + start;
1250
1251				for(unsigned int i = 0; i < triangleCount; i++)
1252				{
1253					batch[i][0] = index[0];
1254					batch[i][1] = index[1];
1255					batch[i][2] = index[1];
1256
1257					index += 1;
1258				}
1259			}
1260			break;
1261		case DRAW_INDEXEDLINESTRIP16:
1262			{
1263				const unsigned short *index = (const unsigned short*)indices + start;
1264
1265				for(unsigned int i = 0; i < triangleCount; i++)
1266				{
1267					batch[i][0] = index[0];
1268					batch[i][1] = index[1];
1269					batch[i][2] = index[1];
1270
1271					index += 1;
1272				}
1273			}
1274			break;
1275		case DRAW_INDEXEDLINESTRIP32:
1276			{
1277				const unsigned int *index = (const unsigned int*)indices + start;
1278
1279				for(unsigned int i = 0; i < triangleCount; i++)
1280				{
1281					batch[i][0] = index[0];
1282					batch[i][1] = index[1];
1283					batch[i][2] = index[1];
1284
1285					index += 1;
1286				}
1287			}
1288			break;
1289		case DRAW_INDEXEDLINELOOP8:
1290			{
1291				const unsigned char *index = (const unsigned char*)indices;
1292
1293				for(unsigned int i = 0; i < triangleCount; i++)
1294				{
1295					batch[i][0] = index[(start + i + 0) % loop];
1296					batch[i][1] = index[(start + i + 1) % loop];
1297					batch[i][2] = index[(start + i + 1) % loop];
1298				}
1299			}
1300			break;
1301		case DRAW_INDEXEDLINELOOP16:
1302			{
1303				const unsigned short *index = (const unsigned short*)indices;
1304
1305				for(unsigned int i = 0; i < triangleCount; i++)
1306				{
1307					batch[i][0] = index[(start + i + 0) % loop];
1308					batch[i][1] = index[(start + i + 1) % loop];
1309					batch[i][2] = index[(start + i + 1) % loop];
1310				}
1311			}
1312			break;
1313		case DRAW_INDEXEDLINELOOP32:
1314			{
1315				const unsigned int *index = (const unsigned int*)indices;
1316
1317				for(unsigned int i = 0; i < triangleCount; i++)
1318				{
1319					batch[i][0] = index[(start + i + 0) % loop];
1320					batch[i][1] = index[(start + i + 1) % loop];
1321					batch[i][2] = index[(start + i + 1) % loop];
1322				}
1323			}
1324			break;
1325		case DRAW_INDEXEDTRIANGLELIST8:
1326			{
1327				const unsigned char *index = (const unsigned char*)indices + 3 * start;
1328
1329				for(unsigned int i = 0; i < triangleCount; i++)
1330				{
1331					batch[i][0] = index[0];
1332					batch[i][1] = index[1];
1333					batch[i][2] = index[2];
1334
1335					index += 3;
1336				}
1337			}
1338			break;
1339		case DRAW_INDEXEDTRIANGLELIST16:
1340			{
1341				const unsigned short *index = (const unsigned short*)indices + 3 * start;
1342
1343				for(unsigned int i = 0; i < triangleCount; i++)
1344				{
1345					batch[i][0] = index[0];
1346					batch[i][1] = index[1];
1347					batch[i][2] = index[2];
1348
1349					index += 3;
1350				}
1351			}
1352			break;
1353		case DRAW_INDEXEDTRIANGLELIST32:
1354			{
1355				const unsigned int *index = (const unsigned int*)indices + 3 * start;
1356
1357				for(unsigned int i = 0; i < triangleCount; i++)
1358				{
1359					batch[i][0] = index[0];
1360					batch[i][1] = index[1];
1361					batch[i][2] = index[2];
1362
1363					index += 3;
1364				}
1365			}
1366			break;
1367		case DRAW_INDEXEDTRIANGLESTRIP8:
1368			{
1369				const unsigned char *index = (const unsigned char*)indices + start;
1370
1371				for(unsigned int i = 0; i < triangleCount; i++)
1372				{
1373					batch[i][0] = index[0];
1374					batch[i][1] = index[((start + i) & 1) + 1];
1375					batch[i][2] = index[(~(start + i) & 1) + 1];
1376
1377					index += 1;
1378				}
1379			}
1380			break;
1381		case DRAW_INDEXEDTRIANGLESTRIP16:
1382			{
1383				const unsigned short *index = (const unsigned short*)indices + start;
1384
1385				for(unsigned int i = 0; i < triangleCount; i++)
1386				{
1387					batch[i][0] = index[0];
1388					batch[i][1] = index[((start + i) & 1) + 1];
1389					batch[i][2] = index[(~(start + i) & 1) + 1];
1390
1391					index += 1;
1392				}
1393			}
1394			break;
1395		case DRAW_INDEXEDTRIANGLESTRIP32:
1396			{
1397				const unsigned int *index = (const unsigned int*)indices + start;
1398
1399				for(unsigned int i = 0; i < triangleCount; i++)
1400				{
1401					batch[i][0] = index[0];
1402					batch[i][1] = index[((start + i) & 1) + 1];
1403					batch[i][2] = index[(~(start + i) & 1) + 1];
1404
1405					index += 1;
1406				}
1407			}
1408			break;
1409		case DRAW_INDEXEDTRIANGLEFAN8:
1410			{
1411				const unsigned char *index = (const unsigned char*)indices;
1412
1413				for(unsigned int i = 0; i < triangleCount; i++)
1414				{
1415					batch[i][0] = index[start + i + 1];
1416					batch[i][1] = index[start + i + 2];
1417					batch[i][2] = index[0];
1418				}
1419			}
1420			break;
1421		case DRAW_INDEXEDTRIANGLEFAN16:
1422			{
1423				const unsigned short *index = (const unsigned short*)indices;
1424
1425				for(unsigned int i = 0; i < triangleCount; i++)
1426				{
1427					batch[i][0] = index[start + i + 1];
1428					batch[i][1] = index[start + i + 2];
1429					batch[i][2] = index[0];
1430				}
1431			}
1432			break;
1433		case DRAW_INDEXEDTRIANGLEFAN32:
1434			{
1435				const unsigned int *index = (const unsigned int*)indices;
1436
1437				for(unsigned int i = 0; i < triangleCount; i++)
1438				{
1439					batch[i][0] = index[start + i + 1];
1440					batch[i][1] = index[start + i + 2];
1441					batch[i][2] = index[0];
1442				}
1443			}
1444			break;
1445		case DRAW_QUADLIST:
1446			{
1447				unsigned int index = 4 * start / 2;
1448
1449				for(unsigned int i = 0; i < triangleCount; i += 2)
1450				{
1451					batch[i+0][0] = index + 0;
1452					batch[i+0][1] = index + 1;
1453					batch[i+0][2] = index + 2;
1454
1455					batch[i+1][0] = index + 0;
1456					batch[i+1][1] = index + 2;
1457					batch[i+1][2] = index + 3;
1458
1459					index += 4;
1460				}
1461			}
1462			break;
1463		default:
1464			ASSERT(false);
1465			return;
1466		}
1467
1468		task->primitiveStart = start;
1469		task->vertexCount = triangleCount * 3;
1470		vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1471	}
1472
1473	int Renderer::setupSolidTriangles(int unit, int count)
1474	{
1475		Triangle *triangle = triangleBatch[unit];
1476		Primitive *primitive = primitiveBatch[unit];
1477
1478		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1479		SetupProcessor::State &state = draw.setupState;
1480		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1481
1482		int ms = state.multiSample;
1483		int pos = state.positionRegister;
1484		const DrawData *data = draw.data;
1485		int visible = 0;
1486
1487		for(int i = 0; i < count; i++, triangle++)
1488		{
1489			Vertex &v0 = triangle->v0;
1490			Vertex &v1 = triangle->v1;
1491			Vertex &v2 = triangle->v2;
1492
1493			if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1494			{
1495				Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1496
1497				int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1498
1499				if(clipFlagsOr != Clipper::CLIP_FINITE)
1500				{
1501					if(!clipper->clip(polygon, clipFlagsOr, draw))
1502					{
1503						continue;
1504					}
1505				}
1506
1507				if(setupRoutine(primitive, triangle, &polygon, data))
1508				{
1509					primitive += ms;
1510					visible++;
1511				}
1512			}
1513		}
1514
1515		return visible;
1516	}
1517
1518	int Renderer::setupWireframeTriangle(int unit, int count)
1519	{
1520		Triangle *triangle = triangleBatch[unit];
1521		Primitive *primitive = primitiveBatch[unit];
1522		int visible = 0;
1523
1524		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1525		SetupProcessor::State &state = draw.setupState;
1526		SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1527
1528		const Vertex &v0 = triangle[0].v0;
1529		const Vertex &v1 = triangle[0].v1;
1530		const Vertex &v2 = triangle[0].v2;
1531
1532		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1533
1534		if(state.cullMode == CULL_CLOCKWISE)
1535		{
1536			if(d >= 0) return 0;
1537		}
1538		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1539		{
1540			if(d <= 0) return 0;
1541		}
1542
1543		// Copy attributes
1544		triangle[1].v0 = v1;
1545		triangle[1].v1 = v2;
1546		triangle[2].v0 = v2;
1547		triangle[2].v1 = v0;
1548
1549		if(state.color[0][0].flat)   // FIXME
1550		{
1551			for(int i = 0; i < 2; i++)
1552			{
1553				triangle[1].v0.C[i] = triangle[0].v0.C[i];
1554				triangle[1].v1.C[i] = triangle[0].v0.C[i];
1555				triangle[2].v0.C[i] = triangle[0].v0.C[i];
1556				triangle[2].v1.C[i] = triangle[0].v0.C[i];
1557			}
1558		}
1559
1560		for(int i = 0; i < 3; i++)
1561		{
1562			if(setupLine(*primitive, *triangle, draw))
1563			{
1564				primitive->area = 0.5f * d;
1565
1566				primitive++;
1567				visible++;
1568			}
1569
1570			triangle++;
1571		}
1572
1573		return visible;
1574	}
1575
1576	int Renderer::setupVertexTriangle(int unit, int count)
1577	{
1578		Triangle *triangle = triangleBatch[unit];
1579		Primitive *primitive = primitiveBatch[unit];
1580		int visible = 0;
1581
1582		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1583		SetupProcessor::State &state = draw.setupState;
1584
1585		const Vertex &v0 = triangle[0].v0;
1586		const Vertex &v1 = triangle[0].v1;
1587		const Vertex &v2 = triangle[0].v2;
1588
1589		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1590
1591		if(state.cullMode == CULL_CLOCKWISE)
1592		{
1593			if(d >= 0) return 0;
1594		}
1595		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1596		{
1597			if(d <= 0) return 0;
1598		}
1599
1600		// Copy attributes
1601		triangle[1].v0 = v1;
1602		triangle[2].v0 = v2;
1603
1604		for(int i = 0; i < 3; i++)
1605		{
1606			if(setupPoint(*primitive, *triangle, draw))
1607			{
1608				primitive->area = 0.5f * d;
1609
1610				primitive++;
1611				visible++;
1612			}
1613
1614			triangle++;
1615		}
1616
1617		return visible;
1618	}
1619
1620	int Renderer::setupLines(int unit, int count)
1621	{
1622		Triangle *triangle = triangleBatch[unit];
1623		Primitive *primitive = primitiveBatch[unit];
1624		int visible = 0;
1625
1626		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1627		SetupProcessor::State &state = draw.setupState;
1628
1629		int ms = state.multiSample;
1630
1631		for(int i = 0; i < count; i++)
1632		{
1633			if(setupLine(*primitive, *triangle, draw))
1634			{
1635				primitive += ms;
1636				visible++;
1637			}
1638
1639			triangle++;
1640		}
1641
1642		return visible;
1643	}
1644
1645	int Renderer::setupPoints(int unit, int count)
1646	{
1647		Triangle *triangle = triangleBatch[unit];
1648		Primitive *primitive = primitiveBatch[unit];
1649		int visible = 0;
1650
1651		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1652		SetupProcessor::State &state = draw.setupState;
1653
1654		int ms = state.multiSample;
1655
1656		for(int i = 0; i < count; i++)
1657		{
1658			if(setupPoint(*primitive, *triangle, draw))
1659			{
1660				primitive += ms;
1661				visible++;
1662			}
1663
1664			triangle++;
1665		}
1666
1667		return visible;
1668	}
1669
1670	bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1671	{
1672		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1673		const SetupProcessor::State &state = draw.setupState;
1674		const DrawData &data = *draw.data;
1675
1676		float lineWidth = data.lineWidth;
1677
1678		Vertex &v0 = triangle.v0;
1679		Vertex &v1 = triangle.v1;
1680
1681		int pos = state.positionRegister;
1682
1683		const float4 &P0 = v0.v[pos];
1684		const float4 &P1 = v1.v[pos];
1685
1686		if(P0.w <= 0 && P1.w <= 0)
1687		{
1688			return false;
1689		}
1690
1691		const float W = data.Wx16[0] * (1.0f / 16.0f);
1692		const float H = data.Hx16[0] * (1.0f / 16.0f);
1693
1694		float dx = W * (P1.x / P1.w - P0.x / P0.w);
1695		float dy = H * (P1.y / P1.w - P0.y / P0.w);
1696
1697		if(dx == 0 && dy == 0)
1698		{
1699			return false;
1700		}
1701
1702		if(false)   // Rectangle
1703		{
1704			float4 P[4];
1705			int C[4];
1706
1707			P[0] = P0;
1708			P[1] = P1;
1709			P[2] = P1;
1710			P[3] = P0;
1711
1712			float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1713
1714			dx *= scale;
1715			dy *= scale;
1716
1717			float dx0w = dx * P0.w / W;
1718			float dy0h = dy * P0.w / H;
1719			float dx0h = dx * P0.w / H;
1720			float dy0w = dy * P0.w / W;
1721
1722			float dx1w = dx * P1.w / W;
1723			float dy1h = dy * P1.w / H;
1724			float dx1h = dx * P1.w / H;
1725			float dy1w = dy * P1.w / W;
1726
1727			P[0].x += -dy0w + -dx0w;
1728			P[0].y += -dx0h + +dy0h;
1729			C[0] = clipper->computeClipFlags(P[0]);
1730
1731			P[1].x += -dy1w + +dx1w;
1732			P[1].y += -dx1h + +dy1h;
1733			C[1] = clipper->computeClipFlags(P[1]);
1734
1735			P[2].x += +dy1w + +dx1w;
1736			P[2].y += +dx1h + -dy1h;
1737			C[2] = clipper->computeClipFlags(P[2]);
1738
1739			P[3].x += +dy0w + -dx0w;
1740			P[3].y += +dx0h + +dy0h;
1741			C[3] = clipper->computeClipFlags(P[3]);
1742
1743			if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1744			{
1745				Polygon polygon(P, 4);
1746
1747				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1748
1749				if(clipFlagsOr != Clipper::CLIP_FINITE)
1750				{
1751					if(!clipper->clip(polygon, clipFlagsOr, draw))
1752					{
1753						return false;
1754					}
1755				}
1756
1757				return setupRoutine(&primitive, &triangle, &polygon, &data);
1758			}
1759		}
1760		else   // Diamond test convention
1761		{
1762			float4 P[8];
1763			int C[8];
1764
1765			P[0] = P0;
1766			P[1] = P0;
1767			P[2] = P0;
1768			P[3] = P0;
1769			P[4] = P1;
1770			P[5] = P1;
1771			P[6] = P1;
1772			P[7] = P1;
1773
1774			float dx0 = lineWidth * 0.5f * P0.w / W;
1775			float dy0 = lineWidth * 0.5f * P0.w / H;
1776
1777			float dx1 = lineWidth * 0.5f * P1.w / W;
1778			float dy1 = lineWidth * 0.5f * P1.w / H;
1779
1780			P[0].x += -dx0;
1781			C[0] = clipper->computeClipFlags(P[0]);
1782
1783			P[1].y += +dy0;
1784			C[1] = clipper->computeClipFlags(P[1]);
1785
1786			P[2].x += +dx0;
1787			C[2] = clipper->computeClipFlags(P[2]);
1788
1789			P[3].y += -dy0;
1790			C[3] = clipper->computeClipFlags(P[3]);
1791
1792			P[4].x += -dx1;
1793			C[4] = clipper->computeClipFlags(P[4]);
1794
1795			P[5].y += +dy1;
1796			C[5] = clipper->computeClipFlags(P[5]);
1797
1798			P[6].x += +dx1;
1799			C[6] = clipper->computeClipFlags(P[6]);
1800
1801			P[7].y += -dy1;
1802			C[7] = clipper->computeClipFlags(P[7]);
1803
1804			if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1805			{
1806				float4 L[6];
1807
1808				if(dx > -dy)
1809				{
1810					if(dx > dy)   // Right
1811					{
1812						L[0] = P[0];
1813						L[1] = P[1];
1814						L[2] = P[5];
1815						L[3] = P[6];
1816						L[4] = P[7];
1817						L[5] = P[3];
1818					}
1819					else   // Down
1820					{
1821						L[0] = P[0];
1822						L[1] = P[4];
1823						L[2] = P[5];
1824						L[3] = P[6];
1825						L[4] = P[2];
1826						L[5] = P[3];
1827					}
1828				}
1829				else
1830				{
1831					if(dx > dy)   // Up
1832					{
1833						L[0] = P[0];
1834						L[1] = P[1];
1835						L[2] = P[2];
1836						L[3] = P[6];
1837						L[4] = P[7];
1838						L[5] = P[4];
1839					}
1840					else   // Left
1841					{
1842						L[0] = P[1];
1843						L[1] = P[2];
1844						L[2] = P[3];
1845						L[3] = P[7];
1846						L[4] = P[4];
1847						L[5] = P[5];
1848					}
1849				}
1850
1851				Polygon polygon(L, 6);
1852
1853				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1854
1855				if(clipFlagsOr != Clipper::CLIP_FINITE)
1856				{
1857					if(!clipper->clip(polygon, clipFlagsOr, draw))
1858					{
1859						return false;
1860					}
1861				}
1862
1863				return setupRoutine(&primitive, &triangle, &polygon, &data);
1864			}
1865		}
1866
1867		return false;
1868	}
1869
1870	bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1871	{
1872		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1873		const SetupProcessor::State &state = draw.setupState;
1874		const DrawData &data = *draw.data;
1875
1876		Vertex &v = triangle.v0;
1877
1878		float pSize;
1879
1880		int pts = state.pointSizeRegister;
1881
1882		if(state.pointSizeRegister != Unused)
1883		{
1884			pSize = v.v[pts].y;
1885		}
1886		else
1887		{
1888			pSize = data.point.pointSize[0];
1889		}
1890
1891		pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1892
1893		float4 P[4];
1894		int C[4];
1895
1896		int pos = state.positionRegister;
1897
1898		P[0] = v.v[pos];
1899		P[1] = v.v[pos];
1900		P[2] = v.v[pos];
1901		P[3] = v.v[pos];
1902
1903		const float X = pSize * P[0].w * data.halfPixelX[0];
1904		const float Y = pSize * P[0].w * data.halfPixelY[0];
1905
1906		P[0].x -= X;
1907		P[0].y += Y;
1908		C[0] = clipper->computeClipFlags(P[0]);
1909
1910		P[1].x += X;
1911		P[1].y += Y;
1912		C[1] = clipper->computeClipFlags(P[1]);
1913
1914		P[2].x += X;
1915		P[2].y -= Y;
1916		C[2] = clipper->computeClipFlags(P[2]);
1917
1918		P[3].x -= X;
1919		P[3].y -= Y;
1920		C[3] = clipper->computeClipFlags(P[3]);
1921
1922		triangle.v1 = triangle.v0;
1923		triangle.v2 = triangle.v0;
1924
1925		triangle.v1.X += iround(16 * 0.5f * pSize);
1926		triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1927
1928		Polygon polygon(P, 4);
1929
1930		if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1931		{
1932			int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1933
1934			if(clipFlagsOr != Clipper::CLIP_FINITE)
1935			{
1936				if(!clipper->clip(polygon, clipFlagsOr, draw))
1937				{
1938					return false;
1939				}
1940			}
1941
1942			return setupRoutine(&primitive, &triangle, &polygon, &data);
1943		}
1944
1945		return false;
1946	}
1947
1948	void Renderer::initializeThreads()
1949	{
1950		unitCount = ceilPow2(threadCount);
1951		clusterCount = ceilPow2(threadCount);
1952
1953		for(int i = 0; i < unitCount; i++)
1954		{
1955			triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1956			primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1957		}
1958
1959		for(int i = 0; i < threadCount; i++)
1960		{
1961			vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1962			vertexTask[i]->vertexCache.drawCall = -1;
1963
1964			task[i].type = Task::SUSPEND;
1965
1966			resume[i] = new Event();
1967			suspend[i] = new Event();
1968
1969			Parameters parameters;
1970			parameters.threadIndex = i;
1971			parameters.renderer = this;
1972
1973			exitThreads = false;
1974			worker[i] = new Thread(threadFunction, &parameters);
1975
1976			suspend[i]->wait();
1977			suspend[i]->signal();
1978		}
1979	}
1980
1981	void Renderer::terminateThreads()
1982	{
1983		while(threadsAwake != 0)
1984		{
1985			Thread::sleep(1);
1986		}
1987
1988		for(int thread = 0; thread < threadCount; thread++)
1989		{
1990			if(worker[thread])
1991			{
1992				exitThreads = true;
1993				resume[thread]->signal();
1994				worker[thread]->join();
1995
1996				delete worker[thread];
1997				worker[thread] = 0;
1998				delete resume[thread];
1999				resume[thread] = 0;
2000				delete suspend[thread];
2001				suspend[thread] = 0;
2002			}
2003
2004			deallocate(vertexTask[thread]);
2005			vertexTask[thread] = 0;
2006		}
2007
2008		for(int i = 0; i < 16; i++)
2009		{
2010			deallocate(triangleBatch[i]);
2011			triangleBatch[i] = 0;
2012
2013			deallocate(primitiveBatch[i]);
2014			primitiveBatch[i] = 0;
2015		}
2016	}
2017
2018	void Renderer::loadConstants(const VertexShader *vertexShader)
2019	{
2020		if(!vertexShader) return;
2021
2022		size_t count = vertexShader->getLength();
2023
2024		for(size_t i = 0; i < count; i++)
2025		{
2026			const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2027
2028			if(instruction->opcode == Shader::OPCODE_DEF)
2029			{
2030				int index = instruction->dst.index;
2031				float value[4];
2032
2033				value[0] = instruction->src[0].value[0];
2034				value[1] = instruction->src[0].value[1];
2035				value[2] = instruction->src[0].value[2];
2036				value[3] = instruction->src[0].value[3];
2037
2038				setVertexShaderConstantF(index, value);
2039			}
2040			else if(instruction->opcode == Shader::OPCODE_DEFI)
2041			{
2042				int index = instruction->dst.index;
2043				int integer[4];
2044
2045				integer[0] = instruction->src[0].integer[0];
2046				integer[1] = instruction->src[0].integer[1];
2047				integer[2] = instruction->src[0].integer[2];
2048				integer[3] = instruction->src[0].integer[3];
2049
2050				setVertexShaderConstantI(index, integer);
2051			}
2052			else if(instruction->opcode == Shader::OPCODE_DEFB)
2053			{
2054				int index = instruction->dst.index;
2055				int boolean = instruction->src[0].boolean[0];
2056
2057				setVertexShaderConstantB(index, &boolean);
2058			}
2059		}
2060	}
2061
2062	void Renderer::loadConstants(const PixelShader *pixelShader)
2063	{
2064		if(!pixelShader) return;
2065
2066		size_t count = pixelShader->getLength();
2067
2068		for(size_t i = 0; i < count; i++)
2069		{
2070			const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2071
2072			if(instruction->opcode == Shader::OPCODE_DEF)
2073			{
2074				int index = instruction->dst.index;
2075				float value[4];
2076
2077				value[0] = instruction->src[0].value[0];
2078				value[1] = instruction->src[0].value[1];
2079				value[2] = instruction->src[0].value[2];
2080				value[3] = instruction->src[0].value[3];
2081
2082				setPixelShaderConstantF(index, value);
2083			}
2084			else if(instruction->opcode == Shader::OPCODE_DEFI)
2085			{
2086				int index = instruction->dst.index;
2087				int integer[4];
2088
2089				integer[0] = instruction->src[0].integer[0];
2090				integer[1] = instruction->src[0].integer[1];
2091				integer[2] = instruction->src[0].integer[2];
2092				integer[3] = instruction->src[0].integer[3];
2093
2094				setPixelShaderConstantI(index, integer);
2095			}
2096			else if(instruction->opcode == Shader::OPCODE_DEFB)
2097			{
2098				int index = instruction->dst.index;
2099				int boolean = instruction->src[0].boolean[0];
2100
2101				setPixelShaderConstantB(index, &boolean);
2102			}
2103		}
2104	}
2105
2106	void Renderer::setIndexBuffer(Resource *indexBuffer)
2107	{
2108		context->indexBuffer = indexBuffer;
2109	}
2110
2111	void Renderer::setMultiSampleMask(unsigned int mask)
2112	{
2113		context->sampleMask = mask;
2114	}
2115
2116	void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2117	{
2118		sw::transparencyAntialiasing = transparencyAntialiasing;
2119	}
2120
2121	bool Renderer::isReadWriteTexture(int sampler)
2122	{
2123		for(int index = 0; index < RENDERTARGETS; index++)
2124		{
2125			if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2126			{
2127				return true;
2128			}
2129		}
2130
2131		if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2132		{
2133			return true;
2134		}
2135
2136		return false;
2137	}
2138
2139	void Renderer::updateClipper()
2140	{
2141		if(updateClipPlanes)
2142		{
2143			if(VertexProcessor::isFixedFunction())   // User plane in world space
2144			{
2145				const Matrix &scissorWorld = getViewTransform();
2146
2147				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2148				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2149				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2150				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2151				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2152				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2153			}
2154			else   // User plane in clip space
2155			{
2156				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2157				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2158				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2159				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2160				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2161				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2162			}
2163
2164			updateClipPlanes = false;
2165		}
2166	}
2167
2168	void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2169	{
2170		ASSERT(sampler < TOTAL_IMAGE_UNITS);
2171
2172		context->texture[sampler] = resource;
2173	}
2174
2175	void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2176	{
2177		ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2178
2179		context->sampler[sampler].setTextureLevel(face, level, surface, type);
2180	}
2181
2182	void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2183	{
2184		if(type == SAMPLER_PIXEL)
2185		{
2186			PixelProcessor::setTextureFilter(sampler, textureFilter);
2187		}
2188		else
2189		{
2190			VertexProcessor::setTextureFilter(sampler, textureFilter);
2191		}
2192	}
2193
2194	void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2195	{
2196		if(type == SAMPLER_PIXEL)
2197		{
2198			PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2199		}
2200		else
2201		{
2202			VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2203		}
2204	}
2205
2206	void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2207	{
2208		if(type == SAMPLER_PIXEL)
2209		{
2210			PixelProcessor::setGatherEnable(sampler, enable);
2211		}
2212		else
2213		{
2214			VertexProcessor::setGatherEnable(sampler, enable);
2215		}
2216	}
2217
2218	void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2219	{
2220		if(type == SAMPLER_PIXEL)
2221		{
2222			PixelProcessor::setAddressingModeU(sampler, addressMode);
2223		}
2224		else
2225		{
2226			VertexProcessor::setAddressingModeU(sampler, addressMode);
2227		}
2228	}
2229
2230	void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2231	{
2232		if(type == SAMPLER_PIXEL)
2233		{
2234			PixelProcessor::setAddressingModeV(sampler, addressMode);
2235		}
2236		else
2237		{
2238			VertexProcessor::setAddressingModeV(sampler, addressMode);
2239		}
2240	}
2241
2242	void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2243	{
2244		if(type == SAMPLER_PIXEL)
2245		{
2246			PixelProcessor::setAddressingModeW(sampler, addressMode);
2247		}
2248		else
2249		{
2250			VertexProcessor::setAddressingModeW(sampler, addressMode);
2251		}
2252	}
2253
2254	void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2255	{
2256		if(type == SAMPLER_PIXEL)
2257		{
2258			PixelProcessor::setReadSRGB(sampler, sRGB);
2259		}
2260		else
2261		{
2262			VertexProcessor::setReadSRGB(sampler, sRGB);
2263		}
2264	}
2265
2266	void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2267	{
2268		if(type == SAMPLER_PIXEL)
2269		{
2270			PixelProcessor::setMipmapLOD(sampler, bias);
2271		}
2272		else
2273		{
2274			VertexProcessor::setMipmapLOD(sampler, bias);
2275		}
2276	}
2277
2278	void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2279	{
2280		if(type == SAMPLER_PIXEL)
2281		{
2282			PixelProcessor::setBorderColor(sampler, borderColor);
2283		}
2284		else
2285		{
2286			VertexProcessor::setBorderColor(sampler, borderColor);
2287		}
2288	}
2289
2290	void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2291	{
2292		if(type == SAMPLER_PIXEL)
2293		{
2294			PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2295		}
2296		else
2297		{
2298			VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2299		}
2300	}
2301
2302	void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2303	{
2304		if(type == SAMPLER_PIXEL)
2305		{
2306			PixelProcessor::setSwizzleR(sampler, swizzleR);
2307		}
2308		else
2309		{
2310			VertexProcessor::setSwizzleR(sampler, swizzleR);
2311		}
2312	}
2313
2314	void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2315	{
2316		if(type == SAMPLER_PIXEL)
2317		{
2318			PixelProcessor::setSwizzleG(sampler, swizzleG);
2319		}
2320		else
2321		{
2322			VertexProcessor::setSwizzleG(sampler, swizzleG);
2323		}
2324	}
2325
2326	void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2327	{
2328		if(type == SAMPLER_PIXEL)
2329		{
2330			PixelProcessor::setSwizzleB(sampler, swizzleB);
2331		}
2332		else
2333		{
2334			VertexProcessor::setSwizzleB(sampler, swizzleB);
2335		}
2336	}
2337
2338	void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2339	{
2340		if(type == SAMPLER_PIXEL)
2341		{
2342			PixelProcessor::setSwizzleA(sampler, swizzleA);
2343		}
2344		else
2345		{
2346			VertexProcessor::setSwizzleA(sampler, swizzleA);
2347		}
2348	}
2349
2350	void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2351	{
2352		context->setPointSpriteEnable(pointSpriteEnable);
2353	}
2354
2355	void Renderer::setPointScaleEnable(bool pointScaleEnable)
2356	{
2357		context->setPointScaleEnable(pointScaleEnable);
2358	}
2359
2360	void Renderer::setLineWidth(float width)
2361	{
2362		context->lineWidth = width;
2363	}
2364
2365	void Renderer::setDepthBias(float bias)
2366	{
2367		depthBias = bias;
2368	}
2369
2370	void Renderer::setSlopeDepthBias(float slopeBias)
2371	{
2372		slopeDepthBias = slopeBias;
2373	}
2374
2375	void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2376	{
2377		context->rasterizerDiscard = rasterizerDiscard;
2378	}
2379
2380	void Renderer::setPixelShader(const PixelShader *shader)
2381	{
2382		context->pixelShader = shader;
2383
2384		loadConstants(shader);
2385	}
2386
2387	void Renderer::setVertexShader(const VertexShader *shader)
2388	{
2389		context->vertexShader = shader;
2390
2391		loadConstants(shader);
2392	}
2393
2394	void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2395	{
2396		for(int i = 0; i < DRAW_COUNT; i++)
2397		{
2398			if(drawCall[i]->psDirtyConstF < index + count)
2399			{
2400				drawCall[i]->psDirtyConstF = index + count;
2401			}
2402		}
2403
2404		for(int i = 0; i < count; i++)
2405		{
2406			PixelProcessor::setFloatConstant(index + i, value);
2407			value += 4;
2408		}
2409	}
2410
2411	void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2412	{
2413		for(int i = 0; i < DRAW_COUNT; i++)
2414		{
2415			if(drawCall[i]->psDirtyConstI < index + count)
2416			{
2417				drawCall[i]->psDirtyConstI = index + count;
2418			}
2419		}
2420
2421		for(int i = 0; i < count; i++)
2422		{
2423			PixelProcessor::setIntegerConstant(index + i, value);
2424			value += 4;
2425		}
2426	}
2427
2428	void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2429	{
2430		for(int i = 0; i < DRAW_COUNT; i++)
2431		{
2432			if(drawCall[i]->psDirtyConstB < index + count)
2433			{
2434				drawCall[i]->psDirtyConstB = index + count;
2435			}
2436		}
2437
2438		for(int i = 0; i < count; i++)
2439		{
2440			PixelProcessor::setBooleanConstant(index + i, *boolean);
2441			boolean++;
2442		}
2443	}
2444
2445	void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2446	{
2447		for(int i = 0; i < DRAW_COUNT; i++)
2448		{
2449			if(drawCall[i]->vsDirtyConstF < index + count)
2450			{
2451				drawCall[i]->vsDirtyConstF = index + count;
2452			}
2453		}
2454
2455		for(int i = 0; i < count; i++)
2456		{
2457			VertexProcessor::setFloatConstant(index + i, value);
2458			value += 4;
2459		}
2460	}
2461
2462	void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2463	{
2464		for(int i = 0; i < DRAW_COUNT; i++)
2465		{
2466			if(drawCall[i]->vsDirtyConstI < index + count)
2467			{
2468				drawCall[i]->vsDirtyConstI = index + count;
2469			}
2470		}
2471
2472		for(int i = 0; i < count; i++)
2473		{
2474			VertexProcessor::setIntegerConstant(index + i, value);
2475			value += 4;
2476		}
2477	}
2478
2479	void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2480	{
2481		for(int i = 0; i < DRAW_COUNT; i++)
2482		{
2483			if(drawCall[i]->vsDirtyConstB < index + count)
2484			{
2485				drawCall[i]->vsDirtyConstB = index + count;
2486			}
2487		}
2488
2489		for(int i = 0; i < count; i++)
2490		{
2491			VertexProcessor::setBooleanConstant(index + i, *boolean);
2492			boolean++;
2493		}
2494	}
2495
2496	void Renderer::setModelMatrix(const Matrix &M, int i)
2497	{
2498		VertexProcessor::setModelMatrix(M, i);
2499	}
2500
2501	void Renderer::setViewMatrix(const Matrix &V)
2502	{
2503		VertexProcessor::setViewMatrix(V);
2504		updateClipPlanes = true;
2505	}
2506
2507	void Renderer::setBaseMatrix(const Matrix &B)
2508	{
2509		VertexProcessor::setBaseMatrix(B);
2510		updateClipPlanes = true;
2511	}
2512
2513	void Renderer::setProjectionMatrix(const Matrix &P)
2514	{
2515		VertexProcessor::setProjectionMatrix(P);
2516		updateClipPlanes = true;
2517	}
2518
2519	void Renderer::addQuery(Query *query)
2520	{
2521		queries.push_back(query);
2522	}
2523
2524	void Renderer::removeQuery(Query *query)
2525	{
2526		queries.remove(query);
2527	}
2528
2529	#if PERF_HUD
2530		int Renderer::getThreadCount()
2531		{
2532			return threadCount;
2533		}
2534
2535		int64_t Renderer::getVertexTime(int thread)
2536		{
2537			return vertexTime[thread];
2538		}
2539
2540		int64_t Renderer::getSetupTime(int thread)
2541		{
2542			return setupTime[thread];
2543		}
2544
2545		int64_t Renderer::getPixelTime(int thread)
2546		{
2547			return pixelTime[thread];
2548		}
2549
2550		void Renderer::resetTimers()
2551		{
2552			for(int thread = 0; thread < threadCount; thread++)
2553			{
2554				vertexTime[thread] = 0;
2555				setupTime[thread] = 0;
2556				pixelTime[thread] = 0;
2557			}
2558		}
2559	#endif
2560
2561	void Renderer::setViewport(const Viewport &viewport)
2562	{
2563		this->viewport = viewport;
2564	}
2565
2566	void Renderer::setScissor(const Rect &scissor)
2567	{
2568		this->scissor = scissor;
2569	}
2570
2571	void Renderer::setClipFlags(int flags)
2572	{
2573		clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2574	}
2575
2576	void Renderer::setClipPlane(unsigned int index, const float plane[4])
2577	{
2578		if(index < MAX_CLIP_PLANES)
2579		{
2580			userPlane[index] = plane;
2581		}
2582		else ASSERT(false);
2583
2584		updateClipPlanes = true;
2585	}
2586
2587	void Renderer::updateConfiguration(bool initialUpdate)
2588	{
2589		bool newConfiguration = swiftConfig->hasNewConfiguration();
2590
2591		if(newConfiguration || initialUpdate)
2592		{
2593			terminateThreads();
2594
2595			SwiftConfig::Configuration configuration = {};
2596			swiftConfig->getConfiguration(configuration);
2597
2598			precacheVertex = !newConfiguration && configuration.precache;
2599			precacheSetup = !newConfiguration && configuration.precache;
2600			precachePixel = !newConfiguration && configuration.precache;
2601
2602			VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2603			PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2604			SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2605
2606			switch(configuration.textureSampleQuality)
2607			{
2608			case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2609			case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2610			case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2611			default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2612			}
2613
2614			switch(configuration.mipmapQuality)
2615			{
2616			case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2617			case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2618			default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2619			}
2620
2621			setPerspectiveCorrection(configuration.perspectiveCorrection);
2622
2623			switch(configuration.transcendentalPrecision)
2624			{
2625			case 0:
2626				logPrecision = APPROXIMATE;
2627				expPrecision = APPROXIMATE;
2628				rcpPrecision = APPROXIMATE;
2629				rsqPrecision = APPROXIMATE;
2630				break;
2631			case 1:
2632				logPrecision = PARTIAL;
2633				expPrecision = PARTIAL;
2634				rcpPrecision = PARTIAL;
2635				rsqPrecision = PARTIAL;
2636				break;
2637			case 2:
2638				logPrecision = ACCURATE;
2639				expPrecision = ACCURATE;
2640				rcpPrecision = ACCURATE;
2641				rsqPrecision = ACCURATE;
2642				break;
2643			case 3:
2644				logPrecision = WHQL;
2645				expPrecision = WHQL;
2646				rcpPrecision = WHQL;
2647				rsqPrecision = WHQL;
2648				break;
2649			case 4:
2650				logPrecision = IEEE;
2651				expPrecision = IEEE;
2652				rcpPrecision = IEEE;
2653				rsqPrecision = IEEE;
2654				break;
2655			default:
2656				logPrecision = ACCURATE;
2657				expPrecision = ACCURATE;
2658				rcpPrecision = ACCURATE;
2659				rsqPrecision = ACCURATE;
2660				break;
2661			}
2662
2663			switch(configuration.transparencyAntialiasing)
2664			{
2665			case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2666			case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2667			default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2668			}
2669
2670			switch(configuration.threadCount)
2671			{
2672			case -1: threadCount = CPUID::coreCount();        break;
2673			case 0:  threadCount = CPUID::processAffinity();  break;
2674			default: threadCount = configuration.threadCount; break;
2675			}
2676
2677			CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2678			CPUID::setEnableSSSE3(configuration.enableSSSE3);
2679			CPUID::setEnableSSE3(configuration.enableSSE3);
2680			CPUID::setEnableSSE2(configuration.enableSSE2);
2681			CPUID::setEnableSSE(configuration.enableSSE);
2682
2683			for(int pass = 0; pass < 10; pass++)
2684			{
2685				optimization[pass] = configuration.optimization[pass];
2686			}
2687
2688			forceWindowed = configuration.forceWindowed;
2689			complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2690			postBlendSRGB = configuration.postBlendSRGB;
2691			exactColorRounding = configuration.exactColorRounding;
2692			forceClearRegisters = configuration.forceClearRegisters;
2693
2694		#ifndef NDEBUG
2695			minPrimitives = configuration.minPrimitives;
2696			maxPrimitives = configuration.maxPrimitives;
2697		#endif
2698		}
2699
2700		if(!initialUpdate && !worker[0])
2701		{
2702			initializeThreads();
2703		}
2704	}
2705}
2706