1/*
2 * Copyright 2015 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkNx_DEFINED
9#define SkNx_DEFINED
10
11#include "SkSafe_math.h"
12#include "SkScalar.h"
13#include "SkTypes.h"
14#include <limits>
15#include <type_traits>
16
17// Every single SkNx method wants to be fully inlined.  (We know better than MSVC).
18#define AI SK_ALWAYS_INLINE
19
20namespace {
21
22// The default SkNx<N,T> just proxies down to a pair of SkNx<N/2, T>.
23template <int N, typename T>
24struct SkNx {
25    typedef SkNx<N/2, T> Half;
26
27    Half fLo, fHi;
28
29    AI SkNx() = default;
30    AI SkNx(const Half& lo, const Half& hi) : fLo(lo), fHi(hi) {}
31
32    AI SkNx(T v) : fLo(v), fHi(v) {}
33
34    AI SkNx(T a, T b)           : fLo(a)  , fHi(b)   { static_assert(N==2, ""); }
35    AI SkNx(T a, T b, T c, T d) : fLo(a,b), fHi(c,d) { static_assert(N==4, ""); }
36    AI SkNx(T a, T b, T c, T d,  T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) {
37        static_assert(N==8, "");
38    }
39    AI SkNx(T a, T b, T c, T d,  T e, T f, T g, T h,
40            T i, T j, T k, T l,  T m, T n, T o, T p)
41        : fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) {
42        static_assert(N==16, "");
43    }
44
45    AI T operator[](int k) const {
46        SkASSERT(0 <= k && k < N);
47        return k < N/2 ? fLo[k] : fHi[k-N/2];
48    }
49
50    AI static SkNx Load(const void* vptr) {
51        auto ptr = (const char*)vptr;
52        return { Half::Load(ptr), Half::Load(ptr + N/2*sizeof(T)) };
53    }
54    AI void store(void* vptr) const {
55        auto ptr = (char*)vptr;
56        fLo.store(ptr);
57        fHi.store(ptr + N/2*sizeof(T));
58    }
59
60    AI static void Load4(const void* vptr, SkNx* a, SkNx* b, SkNx* c, SkNx* d) {
61        auto ptr = (const char*)vptr;
62        Half al, bl, cl, dl,
63             ah, bh, ch, dh;
64        Half::Load4(ptr                  , &al, &bl, &cl, &dl);
65        Half::Load4(ptr + 4*N/2*sizeof(T), &ah, &bh, &ch, &dh);
66        *a = SkNx{al, ah};
67        *b = SkNx{bl, bh};
68        *c = SkNx{cl, ch};
69        *d = SkNx{dl, dh};
70    }
71    AI static void Load3(const void* vptr, SkNx* a, SkNx* b, SkNx* c) {
72        auto ptr = (const char*)vptr;
73        Half al, bl, cl,
74             ah, bh, ch;
75        Half::Load3(ptr                  , &al, &bl, &cl);
76        Half::Load3(ptr + 3*N/2*sizeof(T), &ah, &bh, &ch);
77        *a = SkNx{al, ah};
78        *b = SkNx{bl, bh};
79        *c = SkNx{cl, ch};
80    }
81    AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
82        auto ptr = (char*)vptr;
83        Half::Store4(ptr,                   a.fLo, b.fLo, c.fLo, d.fLo);
84        Half::Store4(ptr + 4*N/2*sizeof(T), a.fHi, b.fHi, c.fHi, d.fHi);
85    }
86
87    AI bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); }
88    AI bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); }
89
90    AI SkNx    abs() const { return { fLo.   abs(), fHi.   abs() }; }
91    AI SkNx   sqrt() const { return { fLo.  sqrt(), fHi.  sqrt() }; }
92    AI SkNx  rsqrt() const { return { fLo. rsqrt(), fHi. rsqrt() }; }
93    AI SkNx  floor() const { return { fLo. floor(), fHi. floor() }; }
94    AI SkNx invert() const { return { fLo.invert(), fHi.invert() }; }
95
96    AI SkNx operator!() const { return { !fLo, !fHi }; }
97    AI SkNx operator-() const { return { -fLo, -fHi }; }
98    AI SkNx operator~() const { return { ~fLo, ~fHi }; }
99
100    AI SkNx operator<<(int bits) const { return { fLo << bits, fHi << bits }; }
101    AI SkNx operator>>(int bits) const { return { fLo >> bits, fHi >> bits }; }
102
103    AI SkNx operator+(const SkNx& y) const { return { fLo + y.fLo, fHi + y.fHi }; }
104    AI SkNx operator-(const SkNx& y) const { return { fLo - y.fLo, fHi - y.fHi }; }
105    AI SkNx operator*(const SkNx& y) const { return { fLo * y.fLo, fHi * y.fHi }; }
106    AI SkNx operator/(const SkNx& y) const { return { fLo / y.fLo, fHi / y.fHi }; }
107
108    AI SkNx operator&(const SkNx& y) const { return { fLo & y.fLo, fHi & y.fHi }; }
109    AI SkNx operator|(const SkNx& y) const { return { fLo | y.fLo, fHi | y.fHi }; }
110    AI SkNx operator^(const SkNx& y) const { return { fLo ^ y.fLo, fHi ^ y.fHi }; }
111
112    AI SkNx operator==(const SkNx& y) const { return { fLo == y.fLo, fHi == y.fHi }; }
113    AI SkNx operator!=(const SkNx& y) const { return { fLo != y.fLo, fHi != y.fHi }; }
114    AI SkNx operator<=(const SkNx& y) const { return { fLo <= y.fLo, fHi <= y.fHi }; }
115    AI SkNx operator>=(const SkNx& y) const { return { fLo >= y.fLo, fHi >= y.fHi }; }
116    AI SkNx operator< (const SkNx& y) const { return { fLo <  y.fLo, fHi <  y.fHi }; }
117    AI SkNx operator> (const SkNx& y) const { return { fLo >  y.fLo, fHi >  y.fHi }; }
118
119    AI SkNx saturatedAdd(const SkNx& y) const {
120        return { fLo.saturatedAdd(y.fLo), fHi.saturatedAdd(y.fHi) };
121    }
122    AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
123        return { fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi) };
124    }
125
126    AI static SkNx Min(const SkNx& x, const SkNx& y) {
127        return { Half::Min(x.fLo, y.fLo), Half::Min(x.fHi, y.fHi) };
128    }
129    AI static SkNx Max(const SkNx& x, const SkNx& y) {
130        return { Half::Max(x.fLo, y.fLo), Half::Max(x.fHi, y.fHi) };
131    }
132};
133
134// The N -> N/2 recursion bottoms out at N == 1, a scalar value.
135template <typename T>
136struct SkNx<1,T> {
137    T fVal;
138
139    AI SkNx() = default;
140    AI SkNx(T v) : fVal(v) {}
141
142    // Android complains against unused parameters, so we guard it
143    AI T operator[](int SkDEBUGCODE(k)) const {
144        SkASSERT(k == 0);
145        return fVal;
146    }
147
148    AI static SkNx Load(const void* ptr) {
149        SkNx v;
150        memcpy(&v, ptr, sizeof(T));
151        return v;
152    }
153    AI void store(void* ptr) const { memcpy(ptr, &fVal, sizeof(T)); }
154
155    AI static void Load4(const void* vptr, SkNx* a, SkNx* b, SkNx* c, SkNx* d) {
156        auto ptr = (const char*)vptr;
157        *a = Load(ptr + 0*sizeof(T));
158        *b = Load(ptr + 1*sizeof(T));
159        *c = Load(ptr + 2*sizeof(T));
160        *d = Load(ptr + 3*sizeof(T));
161    }
162    AI static void Load3(const void* vptr, SkNx* a, SkNx* b, SkNx* c) {
163        auto ptr = (const char*)vptr;
164        *a = Load(ptr + 0*sizeof(T));
165        *b = Load(ptr + 1*sizeof(T));
166        *c = Load(ptr + 2*sizeof(T));
167    }
168    AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
169        auto ptr = (char*)vptr;
170        a.store(ptr + 0*sizeof(T));
171        b.store(ptr + 1*sizeof(T));
172        c.store(ptr + 2*sizeof(T));
173        d.store(ptr + 3*sizeof(T));
174    }
175
176    AI bool anyTrue() const { return fVal != 0; }
177    AI bool allTrue() const { return fVal != 0; }
178
179    AI SkNx    abs() const { return Abs(fVal); }
180    AI SkNx   sqrt() const { return Sqrt(fVal); }
181    AI SkNx  rsqrt() const { return T(1) / this->sqrt(); }
182    AI SkNx  floor() const { return Floor(fVal); }
183    AI SkNx invert() const { return T(1) / *this; }
184
185    AI SkNx operator!() const { return !fVal; }
186    AI SkNx operator-() const { return -fVal; }
187    AI SkNx operator~() const { return FromBits(~ToBits(fVal)); }
188
189    AI SkNx operator<<(int bits) const { return fVal << bits; }
190    AI SkNx operator>>(int bits) const { return fVal >> bits; }
191
192    AI SkNx operator+(const SkNx& y) const { return fVal + y.fVal; }
193    AI SkNx operator-(const SkNx& y) const { return fVal - y.fVal; }
194    AI SkNx operator*(const SkNx& y) const { return fVal * y.fVal; }
195    AI SkNx operator/(const SkNx& y) const { return fVal / y.fVal; }
196
197    AI SkNx operator&(const SkNx& y) const { return FromBits(ToBits(fVal) & ToBits(y.fVal)); }
198    AI SkNx operator|(const SkNx& y) const { return FromBits(ToBits(fVal) | ToBits(y.fVal)); }
199    AI SkNx operator^(const SkNx& y) const { return FromBits(ToBits(fVal) ^ ToBits(y.fVal)); }
200
201    AI SkNx operator==(const SkNx& y) const { return FromBits(fVal == y.fVal ? ~0 : 0); }
202    AI SkNx operator!=(const SkNx& y) const { return FromBits(fVal != y.fVal ? ~0 : 0); }
203    AI SkNx operator<=(const SkNx& y) const { return FromBits(fVal <= y.fVal ? ~0 : 0); }
204    AI SkNx operator>=(const SkNx& y) const { return FromBits(fVal >= y.fVal ? ~0 : 0); }
205    AI SkNx operator< (const SkNx& y) const { return FromBits(fVal <  y.fVal ? ~0 : 0); }
206    AI SkNx operator> (const SkNx& y) const { return FromBits(fVal >  y.fVal ? ~0 : 0); }
207
208    AI static SkNx Min(const SkNx& x, const SkNx& y) { return x.fVal < y.fVal ? x : y; }
209    AI static SkNx Max(const SkNx& x, const SkNx& y) { return x.fVal > y.fVal ? x : y; }
210
211    AI SkNx saturatedAdd(const SkNx& y) const {
212        static_assert(std::is_unsigned<T>::value, "");
213        T sum = fVal + y.fVal;
214        return sum < fVal ? std::numeric_limits<T>::max() : sum;
215    }
216
217    AI SkNx thenElse(const SkNx& t, const SkNx& e) const { return fVal != 0 ? t : e; }
218
219private:
220    // Helper functions to choose the right float/double methods.  (In <cmath> madness lies...)
221    AI static float   Abs(float val) { return  ::fabsf(val); }
222    AI static float  Sqrt(float val) { return  ::sqrtf(val); }
223    AI static float Floor(float val) { return ::floorf(val); }
224
225    AI static double   Abs(double val) { return  ::fabs(val); }
226    AI static double  Sqrt(double val) { return  ::sqrt(val); }
227    AI static double Floor(double val) { return ::floor(val); }
228
229    // Helper functions for working with floats/doubles as bit patterns.
230    template <typename U>
231    AI static U ToBits(U v) { return v; }
232    AI static int32_t ToBits(float  v) { int32_t bits; memcpy(&bits, &v, sizeof(v)); return bits; }
233    AI static int64_t ToBits(double v) { int64_t bits; memcpy(&bits, &v, sizeof(v)); return bits; }
234
235    template <typename Bits>
236    AI static T FromBits(Bits bits) {
237        static_assert(std::is_pod<T   >::value &&
238                      std::is_pod<Bits>::value &&
239                      sizeof(T) <= sizeof(Bits), "");
240        T val;
241        memcpy(&val, &bits, sizeof(T));
242        return val;
243    }
244};
245
246// Allow scalars on the left or right of binary operators, and things like +=, &=, etc.
247#define V template <int N, typename T> AI static SkNx<N,T>
248    V operator+ (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) +  y; }
249    V operator- (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) -  y; }
250    V operator* (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) *  y; }
251    V operator/ (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) /  y; }
252    V operator& (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) &  y; }
253    V operator| (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) |  y; }
254    V operator^ (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) ^  y; }
255    V operator==(T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) == y; }
256    V operator!=(T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) != y; }
257    V operator<=(T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) <= y; }
258    V operator>=(T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) >= y; }
259    V operator< (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) <  y; }
260    V operator> (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) >  y; }
261
262    V operator+ (const SkNx<N,T>& x, T y) { return x +  SkNx<N,T>(y); }
263    V operator- (const SkNx<N,T>& x, T y) { return x -  SkNx<N,T>(y); }
264    V operator* (const SkNx<N,T>& x, T y) { return x *  SkNx<N,T>(y); }
265    V operator/ (const SkNx<N,T>& x, T y) { return x /  SkNx<N,T>(y); }
266    V operator& (const SkNx<N,T>& x, T y) { return x &  SkNx<N,T>(y); }
267    V operator| (const SkNx<N,T>& x, T y) { return x |  SkNx<N,T>(y); }
268    V operator^ (const SkNx<N,T>& x, T y) { return x ^  SkNx<N,T>(y); }
269    V operator==(const SkNx<N,T>& x, T y) { return x == SkNx<N,T>(y); }
270    V operator!=(const SkNx<N,T>& x, T y) { return x != SkNx<N,T>(y); }
271    V operator<=(const SkNx<N,T>& x, T y) { return x <= SkNx<N,T>(y); }
272    V operator>=(const SkNx<N,T>& x, T y) { return x >= SkNx<N,T>(y); }
273    V operator< (const SkNx<N,T>& x, T y) { return x <  SkNx<N,T>(y); }
274    V operator> (const SkNx<N,T>& x, T y) { return x >  SkNx<N,T>(y); }
275
276    V& operator<<=(SkNx<N,T>& x, int bits) { return (x = x << bits); }
277    V& operator>>=(SkNx<N,T>& x, int bits) { return (x = x >> bits); }
278
279    V& operator +=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x + y); }
280    V& operator -=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x - y); }
281    V& operator *=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x * y); }
282    V& operator /=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x / y); }
283    V& operator &=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x & y); }
284    V& operator |=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x | y); }
285    V& operator ^=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x ^ y); }
286
287    V& operator +=(SkNx<N,T>& x, T y) { return (x = x + SkNx<N,T>(y)); }
288    V& operator -=(SkNx<N,T>& x, T y) { return (x = x - SkNx<N,T>(y)); }
289    V& operator *=(SkNx<N,T>& x, T y) { return (x = x * SkNx<N,T>(y)); }
290    V& operator /=(SkNx<N,T>& x, T y) { return (x = x / SkNx<N,T>(y)); }
291    V& operator &=(SkNx<N,T>& x, T y) { return (x = x & SkNx<N,T>(y)); }
292    V& operator |=(SkNx<N,T>& x, T y) { return (x = x | SkNx<N,T>(y)); }
293    V& operator ^=(SkNx<N,T>& x, T y) { return (x = x ^ SkNx<N,T>(y)); }
294#undef V
295
296// SkNx<N,T> ~~> SkNx<N/2,T> + SkNx<N/2,T>
297template <int N, typename T>
298AI static void SkNx_split(const SkNx<N,T>& v, SkNx<N/2,T>* lo, SkNx<N/2,T>* hi) {
299    *lo = v.fLo;
300    *hi = v.fHi;
301}
302
303// SkNx<N/2,T> + SkNx<N/2,T> ~~> SkNx<N,T>
304template <int N, typename T>
305AI static SkNx<N*2,T> SkNx_join(const SkNx<N,T>& lo, const SkNx<N,T>& hi) {
306    return { lo, hi };
307}
308
309// A very generic shuffle.  Can reorder, duplicate, contract, expand...
310//    Sk4f v = { R,G,B,A };
311//    SkNx_shuffle<2,1,0,3>(v)         ~~> {B,G,R,A}
312//    SkNx_shuffle<2,1>(v)             ~~> {B,G}
313//    SkNx_shuffle<2,1,2,1,2,1,2,1>(v) ~~> {B,G,B,G,B,G,B,G}
314//    SkNx_shuffle<3,3,3,3>(v)         ~~> {A,A,A,A}
315template <int... Ix, int N, typename T>
316AI static SkNx<sizeof...(Ix),T> SkNx_shuffle(const SkNx<N,T>& v) {
317    return { v[Ix]... };
318}
319
320// Cast from SkNx<N, Src> to SkNx<N, Dst>, as if you called static_cast<Dst>(Src).
321template <typename Dst, typename Src, int N>
322AI static SkNx<N,Dst> SkNx_cast(const SkNx<N,Src>& v) {
323    return { SkNx_cast<Dst>(v.fLo), SkNx_cast<Dst>(v.fHi) };
324}
325template <typename Dst, typename Src>
326AI static SkNx<1,Dst> SkNx_cast(const SkNx<1,Src>& v) {
327    return static_cast<Dst>(v.fVal);
328}
329
330template <int N, typename T>
331AI static SkNx<N,T> SkNx_fma(const SkNx<N,T>& f, const SkNx<N,T>& m, const SkNx<N,T>& a) {
332    return f*m+a;
333}
334
335}  // namespace
336
337typedef SkNx<2,     float> Sk2f;
338typedef SkNx<4,     float> Sk4f;
339typedef SkNx<8,     float> Sk8f;
340typedef SkNx<16,    float> Sk16f;
341
342typedef SkNx<2,  SkScalar> Sk2s;
343typedef SkNx<4,  SkScalar> Sk4s;
344typedef SkNx<8,  SkScalar> Sk8s;
345typedef SkNx<16, SkScalar> Sk16s;
346
347typedef SkNx<4,   uint8_t> Sk4b;
348typedef SkNx<8,   uint8_t> Sk8b;
349typedef SkNx<16,  uint8_t> Sk16b;
350
351typedef SkNx<4,  uint16_t> Sk4h;
352typedef SkNx<8,  uint16_t> Sk8h;
353typedef SkNx<16, uint16_t> Sk16h;
354
355typedef SkNx<4,  int32_t> Sk4i;
356typedef SkNx<8,  int32_t> Sk8i;
357typedef SkNx<4, uint32_t> Sk4u;
358
359// Include platform specific specializations if available.
360#if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
361    #include "../opts/SkNx_sse.h"
362#elif !defined(SKNX_NO_SIMD) && defined(SK_ARM_HAS_NEON)
363    #include "../opts/SkNx_neon.h"
364#else
365
366AI static Sk4i Sk4f_round(const Sk4f& x) {
367    return { (int) lrintf (x[0]),
368             (int) lrintf (x[1]),
369             (int) lrintf (x[2]),
370             (int) lrintf (x[3]), };
371}
372
373#endif
374
375AI static void Sk4f_ToBytes(uint8_t p[16],
376                            const Sk4f& a, const Sk4f& b, const Sk4f& c, const Sk4f& d) {
377    SkNx_cast<uint8_t>(SkNx_join(SkNx_join(a,b), SkNx_join(c,d))).store(p);
378}
379
380#undef AI
381
382#endif//SkNx_DEFINED
383