1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef sw_ShaderCore_hpp
16#define sw_ShaderCore_hpp
17
18#include "Debug.hpp"
19#include "Shader.hpp"
20#include "Reactor/Reactor.hpp"
21
22namespace sw
23{
24	class Vector4s
25	{
26	public:
27		Vector4s();
28		Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
29		Vector4s(const Vector4s &rhs);
30
31		Short4 &operator[](int i);
32		Vector4s &operator=(const Vector4s &rhs);
33
34		Short4 x;
35		Short4 y;
36		Short4 z;
37		Short4 w;
38	};
39
40	class Vector4i
41	{
42	public:
43		Vector4i();
44		Vector4i(int x, int y, int z, int w);
45		Vector4i(const Vector4i &rhs);
46
47		Int4 &operator[](int i);
48		Vector4i &operator=(const Vector4i &rhs);
49
50		Int4 x;
51		Int4 y;
52		Int4 z;
53		Int4 w;
54	};
55
56	class Vector4u
57	{
58	public:
59		Vector4u();
60		Vector4u(unsigned int x, unsigned int y, unsigned int z, unsigned int w);
61		Vector4u(const Vector4u &rhs);
62
63		UInt4 &operator[](int i);
64		Vector4u &operator=(const Vector4u &rhs);
65
66		UInt4 x;
67		UInt4 y;
68		UInt4 z;
69		UInt4 w;
70	};
71
72	class Vector4f
73	{
74	public:
75		Vector4f();
76		Vector4f(float x, float y, float z, float w);
77		Vector4f(const Vector4f &rhs);
78
79		Float4 &operator[](int i);
80		Vector4f &operator=(const Vector4f &rhs);
81
82		Float4 x;
83		Float4 y;
84		Float4 z;
85		Float4 w;
86	};
87
88	Float4 exponential2(RValue<Float4> x, bool pp = false);
89	Float4 logarithm2(RValue<Float4> x, bool abs, bool pp = false);
90	Float4 exponential(RValue<Float4> x, bool pp = false);
91	Float4 logarithm(RValue<Float4> x, bool abs, bool pp = false);
92	Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false);
93	Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false);
94	Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false);
95	Float4 modulo(RValue<Float4> x, RValue<Float4> y);
96	Float4 sine_pi(RValue<Float4> x, bool pp = false);     // limited to [-pi, pi] range
97	Float4 cosine_pi(RValue<Float4> x, bool pp = false);   // limited to [-pi, pi] range
98	Float4 sine(RValue<Float4> x, bool pp = false);
99	Float4 cosine(RValue<Float4> x, bool pp = false);
100	Float4 tangent(RValue<Float4> x, bool pp = false);
101	Float4 arccos(RValue<Float4> x, bool pp = false);
102	Float4 arcsin(RValue<Float4> x, bool pp = false);
103	Float4 arctan(RValue<Float4> x, bool pp = false);
104	Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false);
105	Float4 sineh(RValue<Float4> x, bool pp = false);
106	Float4 cosineh(RValue<Float4> x, bool pp = false);
107	Float4 tangenth(RValue<Float4> x, bool pp = false);
108	Float4 arccosh(RValue<Float4> x, bool pp = false);  // Limited to x >= 1
109	Float4 arcsinh(RValue<Float4> x, bool pp = false);
110	Float4 arctanh(RValue<Float4> x, bool pp = false);  // Limited to ]-1, 1[ range
111
112	Float4 dot2(const Vector4f &v0, const Vector4f &v1);
113	Float4 dot3(const Vector4f &v0, const Vector4f &v1);
114	Float4 dot4(const Vector4f &v0, const Vector4f &v1);
115
116	void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
117	void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
118	void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
119	void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
120	void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
121	void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
122	void transpose2x4h(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
123	void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
124
125	class Register
126	{
127	public:
128		Register(const Reference<Float4> &x, const Reference<Float4> &y, const Reference<Float4> &z, const Reference<Float4> &w) : x(x), y(y), z(z), w(w)
129		{
130		}
131
132		Reference<Float4> &operator[](int i)
133		{
134			switch(i)
135			{
136			default:
137			case 0: return x;
138			case 1: return y;
139			case 2: return z;
140			case 3: return w;
141			}
142		}
143
144		Register &operator=(const Register &rhs)
145		{
146			x = rhs.x;
147			y = rhs.y;
148			z = rhs.z;
149			w = rhs.w;
150
151			return *this;
152		}
153
154		Register &operator=(const Vector4f &rhs)
155		{
156			x = rhs.x;
157			y = rhs.y;
158			z = rhs.z;
159			w = rhs.w;
160
161			return *this;
162		}
163
164		operator Vector4f()
165		{
166			Vector4f v;
167
168			v.x = x;
169			v.y = y;
170			v.z = z;
171			v.w = w;
172
173			return v;
174		}
175
176		Reference<Float4> x;
177		Reference<Float4> y;
178		Reference<Float4> z;
179		Reference<Float4> w;
180	};
181
182	template<int S, bool D = false>
183	class RegisterArray
184	{
185	public:
186		RegisterArray(bool dynamic = D) : dynamic(dynamic)
187		{
188			if(dynamic)
189			{
190				x = new Array<Float4>(S);
191				y = new Array<Float4>(S);
192				z = new Array<Float4>(S);
193				w = new Array<Float4>(S);
194			}
195			else
196			{
197				x = new Array<Float4>[S];
198				y = new Array<Float4>[S];
199				z = new Array<Float4>[S];
200				w = new Array<Float4>[S];
201			}
202		}
203
204		~RegisterArray()
205		{
206			if(dynamic)
207			{
208				delete x;
209				delete y;
210				delete z;
211				delete w;
212			}
213			else
214			{
215				delete[] x;
216				delete[] y;
217				delete[] z;
218				delete[] w;
219			}
220		}
221
222		Register operator[](int i)
223		{
224			if(dynamic)
225			{
226				return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
227			}
228			else
229			{
230				return Register(x[i][0], y[i][0], z[i][0], w[i][0]);
231			}
232		}
233
234		Register operator[](RValue<Int> i)
235		{
236			ASSERT(dynamic);
237
238			return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
239		}
240
241	private:
242		const bool dynamic;
243		Array<Float4> *x;
244		Array<Float4> *y;
245		Array<Float4> *z;
246		Array<Float4> *w;
247	};
248
249	class ShaderCore
250	{
251		typedef Shader::Control Control;
252
253	public:
254		void mov(Vector4f &dst, const Vector4f &src, bool integerDestination = false);
255		void neg(Vector4f &dst, const Vector4f &src);
256		void ineg(Vector4f &dst, const Vector4f &src);
257		void f2b(Vector4f &dst, const Vector4f &src);
258		void b2f(Vector4f &dst, const Vector4f &src);
259		void f2i(Vector4f &dst, const Vector4f &src);
260		void i2f(Vector4f &dst, const Vector4f &src);
261		void f2u(Vector4f &dst, const Vector4f &src);
262		void u2f(Vector4f &dst, const Vector4f &src);
263		void i2b(Vector4f &dst, const Vector4f &src);
264		void b2i(Vector4f &dst, const Vector4f &src);
265		void add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
266		void iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
267		void sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
268		void isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
269		void mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
270		void imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
271		void mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
272		void imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
273		void rcpx(Vector4f &dst, const Vector4f &src, bool pp = false);
274		void div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
275		void idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
276		void udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
277		void mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
278		void imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
279		void umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
280		void shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
281		void ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
282		void ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
283		void rsqx(Vector4f &dst, const Vector4f &src, bool pp = false);
284		void sqrt(Vector4f &dst, const Vector4f &src, bool pp = false);
285		void rsq(Vector4f &dst, const Vector4f &src, bool pp = false);
286		void len2(Float4 &dst, const Vector4f &src, bool pp = false);
287		void len3(Float4 &dst, const Vector4f &src, bool pp = false);
288		void len4(Float4 &dst, const Vector4f &src, bool pp = false);
289		void dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
290		void dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
291		void dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
292		void dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
293		void dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
294		void dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
295		void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
296		void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
297		void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
298		void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
299		void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
300		void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3);
301		void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
302		void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
303		void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
304		void max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
305		void imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
306		void umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
307		void slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
308		void step(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
309		void exp2x(Vector4f &dst, const Vector4f &src, bool pp = false);
310		void exp2(Vector4f &dst, const Vector4f &src, bool pp = false);
311		void exp(Vector4f &dst, const Vector4f &src, bool pp = false);
312		void log2x(Vector4f &dst, const Vector4f &src, bool pp = false);
313		void log2(Vector4f &dst, const Vector4f &src, bool pp = false);
314		void log(Vector4f &dst, const Vector4f &src, bool pp = false);
315		void lit(Vector4f &dst, const Vector4f &src);
316		void att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
317		void lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
318		void smooth(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
319		void packHalf2x16(Vector4f &dst, const Vector4f &src);
320		void unpackHalf2x16(Vector4f &dst, const Vector4f &src);
321		void packSnorm2x16(Vector4f &dst, const Vector4f &src);
322		void packUnorm2x16(Vector4f &dst, const Vector4f &src);
323		void unpackSnorm2x16(Vector4f &dst, const Vector4f &src);
324		void unpackUnorm2x16(Vector4f &dst, const Vector4f &src);
325		void frc(Vector4f &dst, const Vector4f &src);
326		void trunc(Vector4f &dst, const Vector4f &src);
327		void floor(Vector4f &dst, const Vector4f &src);
328		void round(Vector4f &dst, const Vector4f &src);
329		void roundEven(Vector4f &dst, const Vector4f &src);
330		void ceil(Vector4f &dst, const Vector4f &src);
331		void powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
332		void pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
333		void crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
334		void forward1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
335		void forward2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
336		void forward3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
337		void forward4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
338		void reflect1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
339		void reflect2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
340		void reflect3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
341		void reflect4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
342		void refract1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
343		void refract2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
344		void refract3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
345		void refract4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2);
346		void sgn(Vector4f &dst, const Vector4f &src);
347		void isgn(Vector4f &dst, const Vector4f &src);
348		void abs(Vector4f &dst, const Vector4f &src);
349		void iabs(Vector4f &dst, const Vector4f &src);
350		void nrm2(Vector4f &dst, const Vector4f &src, bool pp = false);
351		void nrm3(Vector4f &dst, const Vector4f &src, bool pp = false);
352		void nrm4(Vector4f &dst, const Vector4f &src, bool pp = false);
353		void sincos(Vector4f &dst, const Vector4f &src, bool pp = false);
354		void cos(Vector4f &dst, const Vector4f &src, bool pp = false);
355		void sin(Vector4f &dst, const Vector4f &src, bool pp = false);
356		void tan(Vector4f &dst, const Vector4f &src, bool pp = false);
357		void acos(Vector4f &dst, const Vector4f &src, bool pp = false);
358		void asin(Vector4f &dst, const Vector4f &src, bool pp = false);
359		void atan(Vector4f &dst, const Vector4f &src, bool pp = false);
360		void atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false);
361		void cosh(Vector4f &dst, const Vector4f &src, bool pp = false);
362		void sinh(Vector4f &dst, const Vector4f &src, bool pp = false);
363		void tanh(Vector4f &dst, const Vector4f &src, bool pp = false);
364		void acosh(Vector4f &dst, const Vector4f &src, bool pp = false);
365		void asinh(Vector4f &dst, const Vector4f &src, bool pp = false);
366		void atanh(Vector4f &dst, const Vector4f &src, bool pp = false);
367		void expp(Vector4f &dst, const Vector4f &src, unsigned short version);
368		void logp(Vector4f &dst, const Vector4f &src, unsigned short version);
369		void cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
370		void cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
371		void icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
372		void ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control);
373		void select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2);
374		void extract(Float4 &dst, const Vector4f &src0, const Float4 &src1);
375		void insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index);
376		void all(Float4 &dst, const Vector4f &src);
377		void any(Float4 &dst, const Vector4f &src);
378		void not(Vector4f &dst, const Vector4f &src);
379		void or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
380		void xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
381		void and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
382		void equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
383		void notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1);
384
385	private:
386		void sgn(Float4 &dst, const Float4 &src);
387		void isgn(Float4 &dst, const Float4 &src);
388		void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
389		void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2);
390		void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2);
391		void floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits);
392		void halfToFloatBits(Float4& dst, const Float4& halfBits);
393	};
394}
395
396#endif   // sw_ShaderCore_hpp
397