SkNx_sse.h revision 7792dbf7ea089b3bcb81792a3ecda8a6f8b421e7
1/* 2 * Copyright 2015 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#ifndef SkNx_sse_DEFINED 9#define SkNx_sse_DEFINED 10 11// This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything more recent. 12#include <immintrin.h> 13 14template <> 15class SkNi<2, int32_t> { 16public: 17 SkNi(const __m128i& vec) : fVec(vec) {} 18 19 SkNi() {} 20 bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); } 21 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(fVec) & 0xff); } 22 23private: 24 __m128i fVec; 25}; 26 27template <> 28class SkNi<4, int32_t> { 29public: 30 SkNi(const __m128i& vec) : fVec(vec) {} 31 32 SkNi() {} 33 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } 34 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } 35 36private: 37 __m128i fVec; 38}; 39 40template <> 41class SkNi<2, int64_t> { 42public: 43 SkNi(const __m128i& vec) : fVec(vec) {} 44 45 SkNi() {} 46 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } 47 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } 48 49private: 50 __m128i fVec; 51}; 52 53 54template <> 55class SkNf<2, float> { 56 typedef SkNi<2, int32_t> Ni; 57public: 58 SkNf(const __m128& vec) : fVec(vec) {} 59 60 SkNf() {} 61 explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {} 62 static SkNf Load(const float vals[2]) { 63 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); 64 } 65 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} 66 67 void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); } 68 69 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } 70 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } 71 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } 72 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } 73 74 Ni operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); } 75 Ni operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps(fVec, o.fVec)); } 76 Ni operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); } 77 Ni operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); } 78 Ni operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); } 79 Ni operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); } 80 81 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.fVec); } 82 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); } 83 84 SkNf sqrt() const { return _mm_sqrt_ps (fVec); } 85 SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); } 86 87 SkNf invert() const { return SkNf(1) / *this; } 88 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } 89 90 template <int k> float kth() const { 91 SkASSERT(0 <= k && k < 2); 92 union { __m128 v; float fs[4]; } pun = {fVec}; 93 return pun.fs[k&1]; 94 } 95 96private: 97 __m128 fVec; 98}; 99 100template <> 101class SkNf<2, double> { 102 typedef SkNi<2, int64_t> Ni; 103public: 104 SkNf(const __m128d& vec) : fVec(vec) {} 105 106 SkNf() {} 107 explicit SkNf(double val) : fVec( _mm_set1_pd(val) ) {} 108 static SkNf Load(const double vals[2]) { return _mm_loadu_pd(vals); } 109 SkNf(double a, double b) : fVec(_mm_setr_pd(a,b)) {} 110 111 void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); } 112 113 SkNf operator + (const SkNf& o) const { return _mm_add_pd(fVec, o.fVec); } 114 SkNf operator - (const SkNf& o) const { return _mm_sub_pd(fVec, o.fVec); } 115 SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); } 116 SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); } 117 118 Ni operator == (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpeq_pd (fVec, o.fVec)); } 119 Ni operator != (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpneq_pd(fVec, o.fVec)); } 120 Ni operator < (const SkNf& o) const { return _mm_castpd_si128(_mm_cmplt_pd (fVec, o.fVec)); } 121 Ni operator > (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpgt_pd (fVec, o.fVec)); } 122 Ni operator <= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmple_pd (fVec, o.fVec)); } 123 Ni operator >= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpge_pd (fVec, o.fVec)); } 124 125 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.fVec); } 126 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.fVec); } 127 128 SkNf sqrt() const { return _mm_sqrt_pd(fVec); } 129 SkNf rsqrt() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); } 130 131 SkNf invert() const { return SkNf(1) / *this; } 132 SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec))); } 133 134 template <int k> double kth() const { 135 SkASSERT(0 <= k && k < 2); 136 union { __m128d v; double ds[2]; } pun = {fVec}; 137 return pun.ds[k&1]; 138 } 139 140private: 141 __m128d fVec; 142}; 143 144template <> 145class SkNf<4, float> { 146 typedef SkNi<4, int32_t> Ni; 147public: 148 SkNf(const __m128& vec) : fVec(vec) {} 149 150 SkNf() {} 151 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} 152 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } 153 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} 154 155 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } 156 157 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } 158 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } 159 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } 160 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } 161 162 Ni operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps (fVec, o.fVec)); } 163 Ni operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps(fVec, o.fVec)); } 164 Ni operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps (fVec, o.fVec)); } 165 Ni operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps (fVec, o.fVec)); } 166 Ni operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps (fVec, o.fVec)); } 167 Ni operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps (fVec, o.fVec)); } 168 169 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.fVec); } 170 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.fVec); } 171 172 SkNf sqrt() const { return _mm_sqrt_ps (fVec); } 173 SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); } 174 175 SkNf invert() const { return SkNf(1) / *this; } 176 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } 177 178 template <int k> float kth() const { 179 SkASSERT(0 <= k && k < 4); 180 union { __m128 v; float fs[4]; } pun = {fVec}; 181 return pun.fs[k&3]; 182 } 183 184protected: 185 __m128 fVec; 186}; 187 188 189#endif//SkNx_sse_DEFINED 190