SkNx_sse.h revision 7792dbf7ea089b3bcb81792a3ecda8a6f8b421e7
1/*
2 * Copyright 2015 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkNx_sse_DEFINED
9#define SkNx_sse_DEFINED
10
11// This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything more recent.
12#include <immintrin.h>
13
14template <>
15class SkNi<2, int32_t> {
16public:
17    SkNi(const __m128i& vec) : fVec(vec) {}
18
19    SkNi() {}
20    bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); }
21    bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(fVec) & 0xff); }
22
23private:
24    __m128i fVec;
25};
26
27template <>
28class SkNi<4, int32_t> {
29public:
30    SkNi(const __m128i& vec) : fVec(vec) {}
31
32    SkNi() {}
33    bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); }
34    bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); }
35
36private:
37    __m128i fVec;
38};
39
40template <>
41class SkNi<2, int64_t> {
42public:
43    SkNi(const __m128i& vec) : fVec(vec) {}
44
45    SkNi() {}
46    bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); }
47    bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); }
48
49private:
50    __m128i fVec;
51};
52
53
54template <>
55class SkNf<2, float> {
56    typedef SkNi<2, int32_t> Ni;
57public:
58    SkNf(const __m128& vec) : fVec(vec) {}
59
60    SkNf() {}
61    explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {}
62    static SkNf Load(const float vals[2]) {
63        return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals));
64    }
65    SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {}
66
67    void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); }
68
69    SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); }
70    SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); }
71    SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); }
72    SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); }
73
74    Ni operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); }
75    Ni operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps(fVec, o.fVec)); }
76    Ni operator  < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); }
77    Ni operator  > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); }
78    Ni operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); }
79    Ni operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); }
80
81    static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.fVec); }
82    static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); }
83
84    SkNf  sqrt() const { return _mm_sqrt_ps (fVec);  }
85    SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); }
86
87    SkNf       invert() const { return SkNf(1) / *this; }
88    SkNf approxInvert() const { return _mm_rcp_ps(fVec); }
89
90    template <int k> float kth() const {
91        SkASSERT(0 <= k && k < 2);
92        union { __m128 v; float fs[4]; } pun = {fVec};
93        return pun.fs[k&1];
94    }
95
96private:
97    __m128 fVec;
98};
99
100template <>
101class SkNf<2, double> {
102    typedef SkNi<2, int64_t> Ni;
103public:
104    SkNf(const __m128d& vec) : fVec(vec) {}
105
106    SkNf() {}
107    explicit SkNf(double val)           : fVec( _mm_set1_pd(val) ) {}
108    static SkNf Load(const double vals[2]) { return _mm_loadu_pd(vals); }
109    SkNf(double a, double b) : fVec(_mm_setr_pd(a,b)) {}
110
111    void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); }
112
113    SkNf operator + (const SkNf& o) const { return _mm_add_pd(fVec, o.fVec); }
114    SkNf operator - (const SkNf& o) const { return _mm_sub_pd(fVec, o.fVec); }
115    SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); }
116    SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); }
117
118    Ni operator == (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpeq_pd (fVec, o.fVec)); }
119    Ni operator != (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpneq_pd(fVec, o.fVec)); }
120    Ni operator  < (const SkNf& o) const { return _mm_castpd_si128(_mm_cmplt_pd (fVec, o.fVec)); }
121    Ni operator  > (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpgt_pd (fVec, o.fVec)); }
122    Ni operator <= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmple_pd (fVec, o.fVec)); }
123    Ni operator >= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpge_pd (fVec, o.fVec)); }
124
125    static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.fVec); }
126    static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.fVec); }
127
128    SkNf  sqrt() const { return _mm_sqrt_pd(fVec);  }
129    SkNf rsqrt() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); }
130
131    SkNf       invert() const { return SkNf(1) / *this; }
132    SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec))); }
133
134    template <int k> double kth() const {
135        SkASSERT(0 <= k && k < 2);
136        union { __m128d v; double ds[2]; } pun = {fVec};
137        return pun.ds[k&1];
138    }
139
140private:
141    __m128d fVec;
142};
143
144template <>
145class SkNf<4, float> {
146    typedef SkNi<4, int32_t> Ni;
147public:
148    SkNf(const __m128& vec) : fVec(vec) {}
149
150    SkNf() {}
151    explicit SkNf(float val)           : fVec( _mm_set1_ps(val) ) {}
152    static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); }
153    SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {}
154
155    void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); }
156
157    SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); }
158    SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); }
159    SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); }
160    SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); }
161
162    Ni operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); }
163    Ni operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps(fVec, o.fVec)); }
164    Ni operator  < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); }
165    Ni operator  > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); }
166    Ni operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); }
167    Ni operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); }
168
169    static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.fVec); }
170    static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); }
171
172    SkNf  sqrt() const { return _mm_sqrt_ps (fVec);  }
173    SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); }
174
175    SkNf       invert() const { return SkNf(1) / *this; }
176    SkNf approxInvert() const { return _mm_rcp_ps(fVec); }
177
178    template <int k> float kth() const {
179        SkASSERT(0 <= k && k < 4);
180        union { __m128 v; float fs[4]; } pun = {fVec};
181        return pun.fs[k&3];
182    }
183
184protected:
185    __m128 fVec;
186};
187
188
189#endif//SkNx_sse_DEFINED
190