VertexPipeline.cpp revision 6aea1b274d591a8f5fcd4cf3f5f75637ba34a1a5
1// SwiftShader Software Renderer
2//
3// Copyright(c) 2005-2012 TransGaming Inc.
4//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "VertexPipeline.hpp"
13
14#include "Vertex.hpp"
15#include "Renderer.hpp"
16#include "Debug.hpp"
17
18#include <string.h>
19#include <stdlib.h>
20#include <stdio.h>
21
22#undef max
23#undef min
24
25namespace sw
26{
27	VertexPipeline::VertexPipeline(const VertexProcessor::State &state) : VertexRoutine(state, 0)
28	{
29	}
30
31	VertexPipeline::~VertexPipeline()
32	{
33	}
34
35	Vector4f VertexPipeline::transformBlend(Registers &r, const Register &src, const Pointer<Byte> &matrix, bool homogeneous)
36	{
37		Vector4f dst;
38
39		if(state.vertexBlendMatrixCount == 0)
40		{
41			dst = transform(src, matrix, homogeneous);
42		}
43		else
44		{
45			UInt index0[4];
46			UInt index1[4];
47			UInt index2[4];
48			UInt index3[4];
49
50			if(state.indexedVertexBlendEnable)
51			{
52				for(int i = 0; i < 4; i++)
53				{
54					Float4 B = r.v[BlendIndices].x;
55					UInt indices;
56
57					switch(i)
58					{
59					case 0: indices = As<UInt>(Float(B.x)); break;
60					case 1: indices = As<UInt>(Float(B.y)); break;
61					case 2: indices = As<UInt>(Float(B.z)); break;
62					case 3: indices = As<UInt>(Float(B.w)); break;
63					}
64
65					index0[i] = (indices & 0x000000FF) << 6;
66					index1[i] = (indices & 0x0000FF00) >> 2;
67					index2[i] = (indices & 0x00FF0000) >> 10;
68					index3[i] = (indices & 0xFF000000) >> 18;
69				}
70			}
71			else
72			{
73				for(int i = 0; i < 4; i++)
74				{
75					index0[i] = 0 * 64;
76					index1[i] = 1 * 64;
77					index2[i] = 2 * 64;
78					index3[i] = 3 * 64;
79				}
80			}
81
82			Float4 weight0;
83			Float4 weight1;
84			Float4 weight2;
85			Float4 weight3;
86
87			switch(state.vertexBlendMatrixCount)
88			{
89			case 4: weight2 = r.v[BlendWeight].z;
90			case 3: weight1 = r.v[BlendWeight].y;
91			case 2: weight0 = r.v[BlendWeight].x;
92			case 1:
93				break;
94			}
95
96			if(state.vertexBlendMatrixCount == 1)
97			{
98				dst = transform(src, matrix, index0, homogeneous);
99			}
100			else if(state.vertexBlendMatrixCount == 2)
101			{
102				weight1 = Float4(1.0f) - weight0;
103
104				Vector4f pos0;
105				Vector4f pos1;
106
107				pos0 = transform(src, matrix, index0, homogeneous);
108				pos1 = transform(src, matrix, index1, homogeneous);
109
110				dst.x = pos0.x * weight0 + pos1.x * weight1;   // FIXME: Vector4f operators
111				dst.y = pos0.y * weight0 + pos1.y * weight1;
112				dst.z = pos0.z * weight0 + pos1.z * weight1;
113				dst.w = pos0.w * weight0 + pos1.w * weight1;
114			}
115			else if(state.vertexBlendMatrixCount == 3)
116			{
117				weight2 = Float4(1.0f) - (weight0 + weight1);
118
119				Vector4f pos0;
120				Vector4f pos1;
121				Vector4f pos2;
122
123				pos0 = transform(src, matrix, index0, homogeneous);
124				pos1 = transform(src, matrix, index1, homogeneous);
125				pos2 = transform(src, matrix, index2, homogeneous);
126
127				dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2;
128				dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2;
129				dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2;
130				dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2;
131			}
132			else if(state.vertexBlendMatrixCount == 4)
133			{
134				weight3 = Float4(1.0f) - (weight0 + weight1 + weight2);
135
136				Vector4f pos0;
137				Vector4f pos1;
138				Vector4f pos2;
139				Vector4f pos3;
140
141				pos0 = transform(src, matrix, index0, homogeneous);
142				pos1 = transform(src, matrix, index1, homogeneous);
143				pos2 = transform(src, matrix, index2, homogeneous);
144				pos3 = transform(src, matrix, index3, homogeneous);
145
146				dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2 + pos3.x * weight3;
147				dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2 + pos3.y * weight3;
148				dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2 + pos3.z * weight3;
149				dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2 + pos3.w * weight3;
150			}
151		}
152
153		return dst;
154	}
155
156	void VertexPipeline::pipeline(Registers &r)
157	{
158		Vector4f position;
159		Vector4f normal;
160
161		if(!state.preTransformed)
162		{
163			position = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.transformT)), true);
164		}
165		else
166		{
167			position = r.v[PositionT];
168		}
169
170		r.o[Pos].x = position.x;
171		r.o[Pos].y = position.y;
172		r.o[Pos].z = position.z;
173		r.o[Pos].w = position.w;
174
175		Vector4f vertexPosition = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true);
176
177		if(state.vertexNormalActive)
178		{
179			normal = transformBlend(r, r.v[Normal], Pointer<Byte>(r.data + OFFSET(DrawData,ff.normalTransformT)), false);
180
181			if(state.normalizeNormals)
182			{
183				normal = normalize(normal);
184			}
185		}
186
187		if(!state.vertexLightingActive)
188		{
189			// FIXME: Don't process if not used at all
190			if(state.diffuseActive && state.input[Color0])
191			{
192				Vector4f diffuse = r.v[Color0];
193
194				r.o[D0].x = diffuse.x;
195				r.o[D0].y = diffuse.y;
196				r.o[D0].z = diffuse.z;
197				r.o[D0].w = diffuse.w;
198			}
199			else
200			{
201				r.o[D0].x = Float4(1.0f);
202				r.o[D0].y = Float4(1.0f);
203				r.o[D0].z = Float4(1.0f);
204				r.o[D0].w = Float4(1.0f);
205			}
206
207			// FIXME: Don't process if not used at all
208			if(state.specularActive && state.input[Color1])
209			{
210				Vector4f specular = r.v[Color1];
211
212				r.o[D1].x = specular.x;
213				r.o[D1].y = specular.y;
214				r.o[D1].z = specular.z;
215				r.o[D1].w = specular.w;
216			}
217			else
218			{
219				r.o[D1].x = Float4(0.0f);
220				r.o[D1].y = Float4(0.0f);
221				r.o[D1].z = Float4(0.0f);
222				r.o[D1].w = Float4(1.0f);
223			}
224		}
225		else
226		{
227			r.o[D0].x = Float4(0.0f);
228			r.o[D0].y = Float4(0.0f);
229			r.o[D0].z = Float4(0.0f);
230			r.o[D0].w = Float4(0.0f);
231
232			r.o[D1].x = Float4(0.0f);
233			r.o[D1].y = Float4(0.0f);
234			r.o[D1].z = Float4(0.0f);
235			r.o[D1].w = Float4(0.0f);
236
237			Vector4f ambient;
238			Float4 globalAmbient = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.globalAmbient));   // FIXME: Unpack
239
240			ambient.x = globalAmbient.x;
241			ambient.y = globalAmbient.y;
242			ambient.z = globalAmbient.z;
243
244			for(int i = 0; i < 8; i++)
245			{
246				if(!(state.vertexLightActive & (1 << i)))
247				{
248					continue;
249				}
250
251				Vector4f L;    // Light vector
252				Float4 att;   // Attenuation
253
254				// Attenuation
255				{
256					Float4 d;   // Distance
257
258					L.x = L.y = L.z = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.lightPosition[i]));   // FIXME: Unpack
259					L.x = L.x.xxxx;
260					L.y = L.y.yyyy;
261					L.z = L.z.zzzz;
262
263					L.x -= vertexPosition.x;
264					L.y -= vertexPosition.y;
265					L.z -= vertexPosition.z;
266					d = dot3(L, L);
267					d = RcpSqrt_pp(d);     // FIXME: Sufficient precision?
268					L.x *= d;
269					L.y *= d;
270					L.z *= d;
271					d = Rcp_pp(d);       // FIXME: Sufficient precision?
272
273					Float4 q = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.attenuationQuadratic[i]));
274					Float4 l = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.attenuationLinear[i]));
275					Float4 c = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.attenuationConstant[i]));
276
277					att = Rcp_pp((q * d + l) * d + c);
278				}
279
280				// Ambient per light
281				{
282					Float4 lightAmbient = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.lightAmbient[i]));   // FIXME: Unpack
283
284					ambient.x = ambient.x + lightAmbient.x * att;
285					ambient.y = ambient.y + lightAmbient.y * att;
286					ambient.z = ambient.z + lightAmbient.z * att;
287				}
288
289				// Diffuse
290				if(state.vertexNormalActive)
291				{
292					Float4 dot;
293
294					dot = dot3(L, normal);
295					dot = Max(dot, Float4(0.0f));
296					dot *= att;
297
298					Vector4f diff;
299
300					if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL)
301					{
302						diff.x = diff.y = diff.z = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialDiffuse));   // FIXME: Unpack
303						diff.x = diff.x.xxxx;
304						diff.y = diff.y.yyyy;
305						diff.z = diff.z.zzzz;
306					}
307					else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1)
308					{
309						diff = r.v[Color0];
310					}
311					else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2)
312					{
313						diff = r.v[Color1];
314					}
315					else ASSERT(false);
316
317					Float4 lightDiffuse = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.lightDiffuse[i]));
318
319					r.o[D0].x = r.o[D0].x + diff.x * dot * lightDiffuse.x;   // FIXME: Clamp first?
320					r.o[D0].y = r.o[D0].y + diff.y * dot * lightDiffuse.y;   // FIXME: Clamp first?
321					r.o[D0].z = r.o[D0].z + diff.z * dot * lightDiffuse.z;   // FIXME: Clamp first?
322				}
323
324				// Specular
325				if(state.vertexSpecularActive)
326				{
327					Vector4f S;
328					Vector4f C;   // Camera vector
329					Float4 pow;
330
331					pow = *Pointer<Float>(r.data + OFFSET(DrawData,ff.materialShininess));
332
333					S.x = Float4(0.0f) - vertexPosition.x;
334					S.y = Float4(0.0f) - vertexPosition.y;
335					S.z = Float4(0.0f) - vertexPosition.z;
336					C = normalize(S);
337
338					S.x = L.x + C.x;
339					S.y = L.y + C.y;
340					S.z = L.z + C.z;
341					C = normalize(S);
342
343					Float4 dot = Max(dot3(C, normal), Float4(0.0f));   // FIXME: max(dot3(C, normal), 0)
344
345					Float4 P = power(dot, pow);
346					P *= att;
347
348					Vector4f spec;
349
350					if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL)
351					{
352						Float4 materialSpecular = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialSpecular));   // FIXME: Unpack
353
354						spec.x = materialSpecular.x;
355						spec.y = materialSpecular.y;
356						spec.z = materialSpecular.z;
357					}
358					else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1)
359					{
360						spec = r.v[Color0];
361					}
362					else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2)
363					{
364						spec = r.v[Color1];
365					}
366					else ASSERT(false);
367
368					Float4 lightSpecular = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.lightSpecular[i]));
369
370					spec.x *= lightSpecular.x;
371					spec.y *= lightSpecular.y;
372					spec.z *= lightSpecular.z;
373
374					spec.x *= P;
375					spec.y *= P;
376					spec.z *= P;
377
378					spec.x = Max(spec.x, Float4(0.0f));
379					spec.y = Max(spec.y, Float4(0.0f));
380					spec.z = Max(spec.z, Float4(0.0f));
381
382					r.o[D1].x = r.o[D1].x + spec.x;
383					r.o[D1].y = r.o[D1].y + spec.y;
384					r.o[D1].z = r.o[D1].z + spec.z;
385				}
386			}
387
388			if(state.vertexAmbientMaterialSourceActive == MATERIAL_MATERIAL)
389			{
390				Float4 materialAmbient = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialAmbient));   // FIXME: Unpack
391
392				ambient.x = ambient.x * materialAmbient.x;
393				ambient.y = ambient.y * materialAmbient.y;
394				ambient.z = ambient.z * materialAmbient.z;
395			}
396			else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR1)
397			{
398				Vector4f materialDiffuse = r.v[Color0];
399
400				ambient.x = ambient.x * materialDiffuse.x;
401				ambient.y = ambient.y * materialDiffuse.y;
402				ambient.z = ambient.z * materialDiffuse.z;
403			}
404			else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR2)
405			{
406				Vector4f materialSpecular = r.v[Color1];
407
408				ambient.x = ambient.x * materialSpecular.x;
409				ambient.y = ambient.y * materialSpecular.y;
410				ambient.z = ambient.z * materialSpecular.z;
411			}
412			else ASSERT(false);
413
414			r.o[D0].x = r.o[D0].x + ambient.x;
415			r.o[D0].y = r.o[D0].y + ambient.y;
416			r.o[D0].z = r.o[D0].z + ambient.z;
417
418			// Emissive
419			if(state.vertexEmissiveMaterialSourceActive == MATERIAL_MATERIAL)
420			{
421				Float4 materialEmission = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialEmission));   // FIXME: Unpack
422
423				r.o[D0].x = r.o[D0].x + materialEmission.x;
424				r.o[D0].y = r.o[D0].y + materialEmission.y;
425				r.o[D0].z = r.o[D0].z + materialEmission.z;
426			}
427			else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR1)
428			{
429				Vector4f materialSpecular = r.v[Color0];
430
431				r.o[D0].x = r.o[D0].x + materialSpecular.x;
432				r.o[D0].y = r.o[D0].y + materialSpecular.y;
433				r.o[D0].z = r.o[D0].z + materialSpecular.z;
434			}
435			else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR2)
436			{
437				Vector4f materialSpecular = r.v[Color1];
438
439				r.o[D0].x = r.o[D0].x + materialSpecular.x;
440				r.o[D0].y = r.o[D0].y + materialSpecular.y;
441				r.o[D0].z = r.o[D0].z + materialSpecular.z;
442			}
443			else ASSERT(false);
444
445			// Diffuse alpha component
446			if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL)
447			{
448				r.o[D0].w = Float4(*Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww;   // FIXME: Unpack
449			}
450			else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1)
451			{
452				Vector4f alpha = r.v[Color0];
453				r.o[D0].w = alpha.w;
454			}
455			else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2)
456			{
457				Vector4f alpha = r.v[Color1];
458				r.o[D0].w = alpha.w;
459			}
460			else ASSERT(false);
461
462			if(state.vertexSpecularActive)
463			{
464				// Specular alpha component
465				if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL)
466				{
467					r.o[D1].w = Float4(*Pointer<Float4>(r.data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww;   // FIXME: Unpack
468				}
469				else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1)
470				{
471					Vector4f alpha = r.v[Color0];
472					r.o[D1].w = alpha.w;
473				}
474				else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2)
475				{
476					Vector4f alpha = r.v[Color1];
477					r.o[D1].w = alpha.w;
478				}
479				else ASSERT(false);
480			}
481		}
482
483		if(state.fogActive)
484		{
485			Float4 f;
486
487			if(!state.rangeFogActive)
488			{
489				f = Abs(vertexPosition.z);
490			}
491			else
492			{
493				f = Sqrt(dot3(vertexPosition, vertexPosition));   // FIXME: f = length(vertexPosition);
494			}
495
496			switch(state.vertexFogMode)
497			{
498			case FOG_NONE:
499				if(state.specularActive)
500				{
501					r.o[Fog].x = r.o[D1].w;
502				}
503				else
504				{
505					r.o[Fog].x = Float4(0.0f);
506				}
507				break;
508			case FOG_LINEAR:
509				r.o[Fog].x = f * *Pointer<Float4>(r.data + OFFSET(DrawData,fog.scale)) + *Pointer<Float4>(r.data + OFFSET(DrawData,fog.offset));
510				break;
511			case FOG_EXP:
512				r.o[Fog].x = exponential2(f * *Pointer<Float4>(r.data + OFFSET(DrawData,fog.densityE)), true);
513				break;
514			case FOG_EXP2:
515				r.o[Fog].x = exponential2((f * f) * *Pointer<Float4>(r.data + OFFSET(DrawData,fog.density2E)), true);
516				break;
517			default:
518				ASSERT(false);
519			}
520		}
521
522		for(int stage = 0; stage < 8; stage++)
523		{
524			processTextureCoordinate(r, stage, normal, position);
525		}
526
527		processPointSize(r);
528	}
529
530	void VertexPipeline::processTextureCoordinate(Registers &r, int stage, Vector4f &normal, Vector4f &position)
531	{
532		if(state.output[T0 + stage].write)
533		{
534			int i = state.textureState[stage].texCoordIndexActive;
535
536			switch(state.textureState[stage].texGenActive)
537			{
538			case TEXGEN_NONE:
539				{
540					Vector4f v = r.v[TexCoord0 + i];
541
542					r.o[T0 + stage].x = v.x;
543					r.o[T0 + stage].y = v.y;
544					r.o[T0 + stage].z = v.z;
545					r.o[T0 + stage].w = v.w;
546				}
547				break;
548			case TEXGEN_PASSTHRU:
549				{
550					Vector4f v = r.v[TexCoord0 + i];
551
552					r.o[T0 + stage].x = v.x;
553					r.o[T0 + stage].y = v.y;
554					r.o[T0 + stage].z = v.z;
555					r.o[T0 + stage].w = v.w;
556
557					if(state.input[TexCoord0 + i])
558					{
559						switch(state.input[TexCoord0 + i].count)
560						{
561						case 1:
562							r.o[T0 + stage].y = Float4(1.0f);
563							r.o[T0 + stage].z = Float4(0.0f);
564							r.o[T0 + stage].w = Float4(0.0f);
565							break;
566						case 2:
567							r.o[T0 + stage].z = Float4(1.0f);
568							r.o[T0 + stage].w = Float4(0.0f);
569							break;
570						case 3:
571							r.o[T0 + stage].w = Float4(1.0f);
572							break;
573						case 4:
574							break;
575						default:
576							ASSERT(false);
577						}
578					}
579				}
580				break;
581			case TEXGEN_NORMAL:
582				{
583					Vector4f Nc;   // Normal vector in camera space
584
585					if(state.vertexNormalActive)
586					{
587						Nc = normal;
588					}
589					else
590					{
591						Nc.x = Float4(0.0f);
592						Nc.y = Float4(0.0f);
593						Nc.z = Float4(0.0f);
594					}
595
596					Nc.w = Float4(1.0f);
597
598					r.o[T0 + stage].x = Nc.x;
599					r.o[T0 + stage].y = Nc.y;
600					r.o[T0 + stage].z = Nc.z;
601					r.o[T0 + stage].w = Nc.w;
602				}
603				break;
604			case TEXGEN_POSITION:
605				{
606					Vector4f Pn = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true);   // Position in camera space
607
608					Pn.w = Float4(1.0f);
609
610					r.o[T0 + stage].x = Pn.x;
611					r.o[T0 + stage].y = Pn.y;
612					r.o[T0 + stage].z = Pn.z;
613					r.o[T0 + stage].w = Pn.w;
614				}
615				break;
616			case TEXGEN_REFLECTION:
617				{
618					Vector4f R;   // Reflection vector
619
620					if(state.vertexNormalActive)
621					{
622						Vector4f Nc;   // Normal vector in camera space
623
624						Nc = normal;
625
626						if(state.localViewerActive)
627						{
628							Vector4f Ec;   // Eye vector in camera space
629							Vector4f N2;
630
631							Ec = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true);
632							Ec = normalize(Ec);
633
634							// R = E - 2 * N * (E . N)
635							Float4 dot = Float4(2.0f) * dot3(Ec, Nc);
636
637							R.x = Ec.x - Nc.x * dot;
638							R.y = Ec.y - Nc.y * dot;
639							R.z = Ec.z - Nc.z * dot;
640						}
641						else
642						{
643							// u = -2 * Nz * Nx
644							// v = -2 * Nz * Ny
645							// w = 1 - 2 * Nz * Nz
646
647							R.x = -Float4(2.0f) * Nc.z * Nc.x;
648							R.y = -Float4(2.0f) * Nc.z * Nc.y;
649							R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z;
650						}
651					}
652					else
653					{
654						R.x = Float4(0.0f);
655						R.y = Float4(0.0f);
656						R.z = Float4(0.0f);
657					}
658
659					R.w = Float4(1.0f);
660
661					r.o[T0 + stage].x = R.x;
662					r.o[T0 + stage].y = R.y;
663					r.o[T0 + stage].z = R.z;
664					r.o[T0 + stage].w = R.w;
665				}
666				break;
667			case TEXGEN_SPHEREMAP:
668				{
669					Vector4f R;   // Reflection vector
670
671					if(state.vertexNormalActive)
672					{
673						Vector4f Nc;   // Normal vector in camera space
674
675						Nc = normal;
676
677						if(state.localViewerActive)
678						{
679							Vector4f Ec;   // Eye vector in camera space
680							Vector4f N2;
681
682							Ec = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true);
683							Ec = normalize(Ec);
684
685							// R = E - 2 * N * (E . N)
686							Float4 dot = Float4(2.0f) * dot3(Ec, Nc);
687
688							R.x = Ec.x - Nc.x * dot;
689							R.y = Ec.y - Nc.y * dot;
690							R.z = Ec.z - Nc.z * dot;
691						}
692						else
693						{
694							// u = -2 * Nz * Nx
695							// v = -2 * Nz * Ny
696							// w = 1 - 2 * Nz * Nz
697
698							R.x = -Float4(2.0f) * Nc.z * Nc.x;
699							R.y = -Float4(2.0f) * Nc.z * Nc.y;
700							R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z;
701						}
702					}
703					else
704					{
705						R.x = Float4(0.0f);
706						R.y = Float4(0.0f);
707						R.z = Float4(0.0f);
708					}
709
710					R.z -= Float4(1.0f);
711					R = normalize(R);
712					R.x = Float4(0.5f) * R.x + Float4(0.5f);
713					R.y = Float4(0.5f) * R.y + Float4(0.5f);
714
715					R.z = Float4(1.0f);
716					R.w = Float4(0.0f);
717
718					r.o[T0 + stage].x = R.x;
719					r.o[T0 + stage].y = R.y;
720					r.o[T0 + stage].z = R.z;
721					r.o[T0 + stage].w = R.w;
722				}
723				break;
724			default:
725				ASSERT(false);
726			}
727
728			Vector4f texTrans0;
729			Vector4f texTrans1;
730			Vector4f texTrans2;
731			Vector4f texTrans3;
732
733			Vector4f T;
734			Vector4f t;
735
736			T.x = r.o[T0 + stage].x;
737			T.y = r.o[T0 + stage].y;
738			T.z = r.o[T0 + stage].z;
739			T.w = r.o[T0 + stage].w;
740
741			switch(state.textureState[stage].textureTransformCountActive)
742			{
743			case 4:
744				texTrans3.x = texTrans3.y = texTrans3.z = texTrans3.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.textureTransform[stage][3]));   // FIXME: Unpack
745				texTrans3.x = texTrans3.x.xxxx;
746				texTrans3.y = texTrans3.y.yyyy;
747				texTrans3.z = texTrans3.z.zzzz;
748				texTrans3.w = texTrans3.w.wwww;
749				t.w = dot4(T, texTrans3);
750			case 3:
751				texTrans2.x = texTrans2.y = texTrans2.z = texTrans2.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.textureTransform[stage][2]));   // FIXME: Unpack
752				texTrans2.x = texTrans2.x.xxxx;
753				texTrans2.y = texTrans2.y.yyyy;
754				texTrans2.z = texTrans2.z.zzzz;
755				texTrans2.w = texTrans2.w.wwww;
756				t.z = dot4(T, texTrans2);
757			case 2:
758				texTrans1.x = texTrans1.y = texTrans1.z = texTrans1.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.textureTransform[stage][1]));   // FIXME: Unpack
759				texTrans1.x = texTrans1.x.xxxx;
760				texTrans1.y = texTrans1.y.yyyy;
761				texTrans1.z = texTrans1.z.zzzz;
762				texTrans1.w = texTrans1.w.wwww;
763				t.y = dot4(T, texTrans1);
764			case 1:
765				texTrans0.x = texTrans0.y = texTrans0.z = texTrans0.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ff.textureTransform[stage][0]));   // FIXME: Unpack
766				texTrans0.x = texTrans0.x.xxxx;
767				texTrans0.y = texTrans0.y.yyyy;
768				texTrans0.z = texTrans0.z.zzzz;
769				texTrans0.w = texTrans0.w.wwww;
770				t.x = dot4(T, texTrans0);
771
772				r.o[T0 + stage].x = t.x;
773				r.o[T0 + stage].y = t.y;
774				r.o[T0 + stage].z = t.z;
775				r.o[T0 + stage].w = t.w;
776			case 0:
777				break;
778			default:
779				ASSERT(false);
780			}
781		}
782	}
783
784	void VertexPipeline::processPointSize(Registers &r)
785	{
786		if(!state.pointSizeActive)
787		{
788			return;   // Use global pointsize
789		}
790
791		if(state.input[PointSize])
792		{
793			r.o[Pts].y = r.v[PointSize].x;
794		}
795		else
796		{
797			r.o[Pts].y = *Pointer<Float4>(r.data + OFFSET(DrawData,point.pointSize));
798		}
799
800		if(state.pointScaleActive && !state.preTransformed)
801		{
802			Vector4f p = transformBlend(r, r.v[Position], Pointer<Byte>(r.data + OFFSET(DrawData,ff.cameraTransformT)), true);
803
804			Float4 d = Sqrt(dot3(p, p));   // FIXME: length(p);
805
806			Float4 A = *Pointer<Float>(r.data + OFFSET(DrawData,point.pointScaleA));   // FIXME: Unpack
807			Float4 B = *Pointer<Float>(r.data + OFFSET(DrawData,point.pointScaleB));   // FIXME: Unpack
808			Float4 C = *Pointer<Float>(r.data + OFFSET(DrawData,point.pointScaleC));   // FIXME: Unpack
809
810			A = RcpSqrt_pp(A + d * (B + d * C));
811
812			r.o[Pts].y = r.o[Pts].y * Float4(*Pointer<Float>(r.data + OFFSET(DrawData,viewportHeight))) * A;   // FIXME: Unpack
813		}
814	}
815
816	Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, bool homogeneous)
817	{
818		Vector4f dst;
819
820		if(homogeneous)
821		{
822			Float4 m[4][4];
823
824			for(int j = 0; j < 4; j++)
825			{
826				for(int i = 0; i < 4; i++)
827				{
828					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j);
829					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j);
830					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j);
831					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j);
832				}
833			}
834
835			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + src.w * m[0][3];
836			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + src.w * m[1][3];
837			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + src.w * m[2][3];
838			dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + src.w * m[3][3];
839		}
840		else
841		{
842			Float4 m[3][3];
843
844			for(int j = 0; j < 3; j++)
845			{
846				for(int i = 0; i < 3; i++)
847				{
848					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j);
849					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j);
850					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j);
851					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j);
852				}
853			}
854
855			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2];
856			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2];
857			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2];
858		}
859
860		return dst;
861	}
862
863	Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, UInt index[4], bool homogeneous)
864	{
865		Vector4f dst;
866
867		if(homogeneous)
868		{
869			Float4 m[4][4];
870
871			for(int j = 0; j < 4; j++)
872			{
873				for(int i = 0; i < 4; i++)
874				{
875					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]);
876					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]);
877					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]);
878					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]);
879				}
880			}
881
882			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + m[0][3];
883			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + m[1][3];
884			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + m[2][3];
885			dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + m[3][3];
886		}
887		else
888		{
889			Float4 m[3][3];
890
891			for(int j = 0; j < 3; j++)
892			{
893				for(int i = 0; i < 3; i++)
894				{
895					m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]);
896					m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]);
897					m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]);
898					m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]);
899				}
900			}
901
902			dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2];
903			dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2];
904			dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2];
905		}
906
907		return dst;
908	}
909
910	Vector4f VertexPipeline::normalize(Vector4f &src)
911	{
912		Vector4f dst;
913
914		Float4 rcpLength = RcpSqrt_pp(dot3(src, src));
915
916		dst.x = src.x * rcpLength;
917		dst.y = src.y * rcpLength;
918		dst.z = src.z * rcpLength;
919
920		return dst;
921	}
922
923	Float4 VertexPipeline::power(Float4 &src0, Float4 &src1)
924	{
925		Float4 dst = src0;
926
927		dst = dst * dst;
928		dst = dst * dst;
929		dst = Float4(As<Int4>(dst) - As<Int4>(Float4(1.0f)));
930
931		dst *= src1;
932
933		dst = As<Float4>(Int4(dst) + As<Int4>(Float4(1.0f)));
934		dst = RcpSqrt_pp(dst);
935		dst = RcpSqrt_pp(dst);
936
937		return dst;
938	}
939}
940