1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "QuadRasterizer.hpp"
16
17#include "Primitive.hpp"
18#include "Renderer.hpp"
19#include "Shader/Constants.hpp"
20#include "Common/Math.hpp"
21#include "Common/Debug.hpp"
22
23namespace sw
24{
25	extern bool veryEarlyDepthTest;
26	extern bool complementaryDepthBuffer;
27	extern bool fullPixelPositionRegister;
28
29	extern int clusterCount;
30
31	QuadRasterizer::QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader) : state(state), shader(pixelShader)
32	{
33	}
34
35	QuadRasterizer::~QuadRasterizer()
36	{
37	}
38
39	void QuadRasterizer::generate()
40	{
41		#if PERF_PROFILE
42			for(int i = 0; i < PERF_TIMERS; i++)
43			{
44				cycles[i] = 0;
45			}
46
47			Long pixelTime = Ticks();
48		#endif
49
50		constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
51		occlusion = 0;
52		int clusterCount = Renderer::getClusterCount();
53
54		Do
55		{
56			Int yMin = *Pointer<Int>(primitive + OFFSET(Primitive,yMin));
57			Int yMax = *Pointer<Int>(primitive + OFFSET(Primitive,yMax));
58
59			Int cluster2 = cluster + cluster;
60			yMin += clusterCount * 2 - 2 - cluster2;
61			yMin &= -clusterCount * 2;
62			yMin += cluster2;
63
64			If(yMin < yMax)
65			{
66				rasterize(yMin, yMax);
67			}
68
69			primitive += sizeof(Primitive) * state.multiSample;
70			count--;
71		}
72		Until(count == 0)
73
74		if(state.occlusionEnabled)
75		{
76			UInt clusterOcclusion = *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster);
77			clusterOcclusion += occlusion;
78			*Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster) = clusterOcclusion;
79		}
80
81		#if PERF_PROFILE
82			cycles[PERF_PIXEL] = Ticks() - pixelTime;
83
84			for(int i = 0; i < PERF_TIMERS; i++)
85			{
86				*Pointer<Long>(data + OFFSET(DrawData,cycles[i]) + 8 * cluster) += cycles[i];
87			}
88		#endif
89
90		Return();
91	}
92
93	void QuadRasterizer::rasterize(Int &yMin, Int &yMax)
94	{
95		Pointer<Byte> cBuffer[RENDERTARGETS];
96		Pointer<Byte> zBuffer;
97		Pointer<Byte> sBuffer;
98
99		for(int index = 0; index < RENDERTARGETS; index++)
100		{
101			if(state.colorWriteActive(index))
102			{
103				cBuffer[index] = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,colorBuffer[index])) + yMin * *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
104			}
105		}
106
107		if(state.depthTestActive)
108		{
109			zBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,depthBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
110		}
111
112		if(state.stencilActive)
113		{
114			sBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,stencilBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB));
115		}
116
117		Int y = yMin;
118
119		Do
120		{
121			Int x0a = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span)));
122			Int x0b = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span)));
123			Int x0 = Min(x0a, x0b);
124
125			for(unsigned int q = 1; q < state.multiSample; q++)
126			{
127				x0a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span)));
128				x0b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span)));
129				x0 = Min(x0, Min(x0a, x0b));
130			}
131
132			x0 &= 0xFFFFFFFE;
133
134			Int x1a = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span)));
135			Int x1b = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span)));
136			Int x1 = Max(x1a, x1b);
137
138			for(unsigned int q = 1; q < state.multiSample; q++)
139			{
140				x1a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span)));
141				x1b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span)));
142				x1 = Max(x1, Max(x1a, x1b));
143			}
144
145			Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16);
146
147			if(interpolateZ())
148			{
149				for(unsigned int q = 0; q < state.multiSample; q++)
150				{
151					Float4 y = yyyy;
152
153					if(state.multiSample > 1)
154					{
155						y -= *Pointer<Float4>(constants + OFFSET(Constants,Y) + q * sizeof(float4));
156					}
157
158					Dz[q] = *Pointer<Float4>(primitive + OFFSET(Primitive,z.C), 16) + y * *Pointer<Float4>(primitive + OFFSET(Primitive,z.B), 16);
159				}
160			}
161
162			if(veryEarlyDepthTest && state.multiSample == 1 && !state.depthOverride)
163			{
164				if(!state.stencilActive && state.depthTestActive && (state.depthCompareMode == DEPTH_LESSEQUAL || state.depthCompareMode == DEPTH_LESS))   // FIXME: Both modes ok?
165				{
166					Float4 xxxx = Float4(Float(x0)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16);
167
168					Pointer<Byte> buffer;
169					Int pitch;
170
171					if(!state.quadLayoutDepthBuffer)
172					{
173						buffer = zBuffer + 4 * x0;
174						pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
175					}
176					else
177					{
178						buffer = zBuffer + 8 * x0;
179					}
180
181					For(Int x = x0, x < x1, x += 2)
182					{
183						Float4 z = interpolate(xxxx, Dz[0], z, primitive + OFFSET(Primitive,z), false, false, state.depthClamp);
184
185						Float4 zValue;
186
187						if(!state.quadLayoutDepthBuffer)
188						{
189							// FIXME: Properly optimizes?
190							zValue.xy = *Pointer<Float4>(buffer);
191							zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
192						}
193						else
194						{
195							zValue = *Pointer<Float4>(buffer, 16);
196						}
197
198						Int4 zTest;
199
200						if(complementaryDepthBuffer)
201						{
202							zTest = CmpLE(zValue, z);
203						}
204						else
205						{
206							zTest = CmpNLT(zValue, z);
207						}
208
209						Int zMask = SignMask(zTest);
210
211						If(zMask == 0)
212						{
213							x0 += 2;
214						}
215						Else
216						{
217							x = x1;
218						}
219
220						xxxx += Float4(2);
221
222						if(!state.quadLayoutDepthBuffer)
223						{
224							buffer += 8;
225						}
226						else
227						{
228							buffer += 16;
229						}
230					}
231				}
232			}
233
234			If(x0 < x1)
235			{
236				if(interpolateW())
237				{
238					Dw = *Pointer<Float4>(primitive + OFFSET(Primitive,w.C), 16) + yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,w.B), 16);
239				}
240
241				for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++)
242				{
243					for(int component = 0; component < 4; component++)
244					{
245						if(state.interpolant[interpolant].component & (1 << component))
246						{
247							Dv[interpolant][component] = *Pointer<Float4>(primitive + OFFSET(Primitive,V[interpolant][component].C), 16);
248
249							if(!(state.interpolant[interpolant].flat & (1 << component)))
250							{
251								Dv[interpolant][component] += yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,V[interpolant][component].B), 16);
252							}
253						}
254					}
255				}
256
257				if(state.fog.component)
258				{
259					Df = *Pointer<Float4>(primitive + OFFSET(Primitive,f.C), 16);
260
261					if(!state.fog.flat)
262					{
263						Df += yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,f.B), 16);
264					}
265				}
266
267				Short4 xLeft[4];
268				Short4 xRight[4];
269
270				for(unsigned int q = 0; q < state.multiSample; q++)
271				{
272					xLeft[q] = *Pointer<Short4>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline) + y * sizeof(Primitive::Span));
273					xRight[q] = xLeft[q];
274
275					xLeft[q] = Swizzle(xLeft[q], 0xA0) - Short4(1, 2, 1, 2);
276					xRight[q] = Swizzle(xRight[q], 0xF5) - Short4(0, 1, 0, 1);
277				}
278
279				For(Int x = x0, x < x1, x += 2)
280				{
281					Short4 xxxx = Short4(x);
282					Int cMask[4];
283
284					for(unsigned int q = 0; q < state.multiSample; q++)
285					{
286						Short4 mask = CmpGT(xxxx, xLeft[q]) & CmpGT(xRight[q], xxxx);
287						cMask[q] = SignMask(PackSigned(mask, mask)) & 0x0000000F;
288					}
289
290					quad(cBuffer, zBuffer, sBuffer, cMask, x, y);
291				}
292			}
293
294			int clusterCount = Renderer::getClusterCount();
295
296			for(int index = 0; index < RENDERTARGETS; index++)
297			{
298				if(state.colorWriteActive(index))
299				{
300					cBuffer[index] += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])) << (1 + sw::log2(clusterCount));   // FIXME: Precompute
301				}
302			}
303
304			if(state.depthTestActive)
305			{
306				zBuffer += *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)) << (1 + sw::log2(clusterCount));   // FIXME: Precompute
307			}
308
309			if(state.stencilActive)
310			{
311				sBuffer += *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB)) << (1 + sw::log2(clusterCount));   // FIXME: Precompute
312			}
313
314			y += 2 * clusterCount;
315		}
316		Until(y >= yMax)
317	}
318
319	Float4 QuadRasterizer::interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective, bool clamp)
320	{
321		Float4 interpolant = D;
322
323		if(!flat)
324		{
325			interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, A), 16);
326
327			if(perspective)
328			{
329				interpolant *= rhw;
330			}
331		}
332
333		if(clamp)
334		{
335			interpolant = Min(Max(interpolant, Float4(0.0f)), Float4(1.0f));
336		}
337
338		return interpolant;
339	}
340
341	bool QuadRasterizer::interpolateZ() const
342	{
343		return state.depthTestActive || state.pixelFogActive() || (shader && shader->isVPosDeclared() && fullPixelPositionRegister);
344	}
345
346	bool QuadRasterizer::interpolateW() const
347	{
348		return state.perspective || (shader && shader->isVPosDeclared() && fullPixelPositionRegister);
349	}
350}
351