SkNx_neon.h revision c9adb05b64fa0bfadf9d1a782afcda470da68c9e
1c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein/* 2c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein * Copyright 2015 Google Inc. 3c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein * 4c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein * Use of this source code is governed by a BSD-style license that can be 5c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein * found in the LICENSE file. 6c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein */ 7c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 8c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein#ifndef SkNx_neon_DEFINED 9c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein#define SkNx_neon_DEFINED 10c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 11c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein#include <arm_neon.h> 12c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 13c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleintemplate <> 14c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinclass SkNi<2, int32_t> { 15c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinpublic: 16c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNi(int32x2_t vec) : fVec(vec) {} 17c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 18c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNi() {} 19c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein bool allTrue() const { return fVec[0] && fVec[1]; } 20c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein bool anyTrue() const { return fVec[0] || fVec[1]; } 21c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinprivate: 22c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein int32x2_t fVec; 23c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein}; 24c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 25c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleintemplate <> 26c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinclass SkNi<4, int32_t> { 27c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinpublic: 28c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNi(int32x4_t vec) : fVec(vec) {} 29c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 30c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNi() {} 31c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein bool allTrue() const { return fVec[0] && fVec[1] && fVec[2] && fVec[3]; } 32c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein bool anyTrue() const { return fVec[0] || fVec[1] || fVec[2] || fVec[3]; } 33c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinprivate: 34c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein int32x4_t fVec; 35c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein}; 36c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 37c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleintemplate <> 38c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinclass SkNf<2, float> { 39c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein typedef SkNi<2, int32_t> Ni; 40c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinpublic: 41c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf(float32x2_t vec) : fVec(vec) {} 42c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 43c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf() {} 44c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein explicit SkNf(float val) : fVec(vdup_n_f32(val)) {} 45c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein static SkNf Load(const float vals[2]) { return vld1_f32(vals); } 46c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; } 47c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 48c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein void store(float vals[2]) const { vst1_f32(vals, fVec); } 49c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 50c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf approxInvert() const { 51c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float32x2_t est0 = vrecpe_f32(fVec), 52c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein est1 = vmul_f32(vrecps_f32(est0, fVec), est0); 53c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return est1; 54c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 55c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf invert() const { 56c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float32x2_t est1 = this->approxInvert().fVec, 57c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein est2 = vmul_f32(vrecps_f32(est1, fVec), est1); 58c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return est2; 59c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 60c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 61c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf operator + (const SkNf& o) const { return vadd_f32(fVec, o.fVec); } 62c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf operator - (const SkNf& o) const { return vsub_f32(fVec, o.fVec); } 63c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf operator * (const SkNf& o) const { return vmul_f32(fVec, o.fVec); } 64c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf operator / (const SkNf& o) const { 65c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein #if defined(SK_CPU_ARM64) 66c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return vdiv_f32(fVec, o.fVec); 67c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein #else 68c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return vmul_f32(fVec, o.invert().fVec); 69c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein #endif 70c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 71c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 72c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator == (const SkNf& o) const { return vreinterpret_s32_u32(vceq_f32(fVec, o.fVec)); } 73c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator < (const SkNf& o) const { return vreinterpret_s32_u32(vclt_f32(fVec, o.fVec)); } 74c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator > (const SkNf& o) const { return vreinterpret_s32_u32(vcgt_f32(fVec, o.fVec)); } 75c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator <= (const SkNf& o) const { return vreinterpret_s32_u32(vcle_f32(fVec, o.fVec)); } 76c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator >= (const SkNf& o) const { return vreinterpret_s32_u32(vcge_f32(fVec, o.fVec)); } 77c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator != (const SkNf& o) const { 78c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return vreinterpret_s32_u32(vmvn_u32(vceq_f32(fVec, o.fVec))); 79c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 80c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 81c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fVec); } 82c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fVec); } 83c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 84c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf rsqrt() const { 85c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float32x2_t est0 = vrsqrte_f32(fVec), 86c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); 87c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return est1; 88c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 89c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 90c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf sqrt() const { 91c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein #if defined(SK_CPU_ARM64) 92c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return vsqrt_f32(fVec); 93c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein #else 94c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float32x2_t est1 = this->rsqrt().fVec, 95c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein // An extra step of Newton's method to refine the estimate of 1/sqrt(this). 96c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1); 97c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return vmul_f32(fVec, est2); 98c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein #endif 99c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 100c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 101c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float operator[] (int k) const { 102c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkASSERT(0 <= k && k < 2); 103c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return fVec[k]; 104c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 105c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 106c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinprivate: 107c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float32x2_t fVec; 108c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein}; 109c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 110c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein#if defined(SK_CPU_ARM64) 111c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleintemplate <> 112c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinclass SkNi<2, int64_t> { 113c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinpublic: 114c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNi(int64x2_t vec) : fVec(vec) {} 115c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 116c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNi() {} 117c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein bool allTrue() const { return fVec[0] && fVec[1]; } 118c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein bool anyTrue() const { return fVec[0] || fVec[1]; } 119c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinprivate: 120c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein int64x2_t fVec; 121c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein}; 122c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 123c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleintemplate <> 124c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinclass SkNf<2, double> { 125c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein typedef SkNi<2, int64_t> Ni; 126c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinpublic: 127c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf(float64x2_t vec) : fVec(vec) {} 128c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 129c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf() {} 130c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein explicit SkNf(double val) : fVec(vdupq_n_f64(val)) {} 131c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein static SkNf Load(const double vals[2]) { return vld1q_f64(vals); } 132c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; } 133c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 134c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein void store(double vals[2]) const { vst1q_f64(vals, fVec); } 135c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 136c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); } 137c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); } 138c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); } 139c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); } 140c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 141c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator == (const SkNf& o) const { return vreinterpretq_s64_u64(vceqq_f64(fVec, o.fVec)); } 142c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator < (const SkNf& o) const { return vreinterpretq_s64_u64(vcltq_f64(fVec, o.fVec)); } 143c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator > (const SkNf& o) const { return vreinterpretq_s64_u64(vcgtq_f64(fVec, o.fVec)); } 144c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator <= (const SkNf& o) const { return vreinterpretq_s64_u64(vcleq_f64(fVec, o.fVec)); } 145c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator >= (const SkNf& o) const { return vreinterpretq_s64_u64(vcgeq_f64(fVec, o.fVec)); } 146c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator != (const SkNf& o) const { 147c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return vreinterpretq_s64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(fVec, o.fVec)))); 148c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 149c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 150c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.fVec); } 151c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.fVec); } 152c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 153c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf sqrt() const { return vsqrtq_f64(fVec); } 154c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf rsqrt() const { 155c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float64x2_t est0 = vrsqrteq_f64(fVec), 156c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0); 157c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return est1; 158c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 159c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 160c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf approxInvert() const { 161c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float64x2_t est0 = vrecpeq_f64(fVec), 162c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0); 163c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return est1; 164c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 165c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 166c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf invert() const { 167c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float64x2_t est1 = this->approxInvert().fVec, 168c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), 169c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); 170c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return est3; 171c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 172c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 173c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein double operator[] (int k) const { 174c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkASSERT(0 <= k && k < 2); 175c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return fVec[k]; 176c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 177c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 178c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinprivate: 179c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float64x2_t fVec; 180c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein}; 181c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein#endif//defined(SK_CPU_ARM64) 182c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 183c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleintemplate <> 184c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinclass SkNf<4, float> { 185c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein typedef SkNi<4, int32_t> Ni; 186c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinpublic: 187c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf(float32x4_t vec) : fVec(vec) {} 188c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float32x4_t vec() const { return fVec; } 189c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 190c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf() {} 191c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} 192c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } 193c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; } 194c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 195c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein void store(float vals[4]) const { vst1q_f32(vals, fVec); } 196c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 197c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf approxInvert() const { 198c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float32x4_t est0 = vrecpeq_f32(fVec), 199c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); 200c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return est1; 201c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 202c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf invert() const { 203c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float32x4_t est1 = this->approxInvert().fVec, 204c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1); 205c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return est2; 206c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 207c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 208c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf operator + (const SkNf& o) const { return vaddq_f32(fVec, o.fVec); } 209c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf operator - (const SkNf& o) const { return vsubq_f32(fVec, o.fVec); } 210c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf operator * (const SkNf& o) const { return vmulq_f32(fVec, o.fVec); } 211c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf operator / (const SkNf& o) const { 212c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein #if defined(SK_CPU_ARM64) 213c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return vdivq_f32(fVec, o.fVec); 214c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein #else 215c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return vmulq_f32(fVec, o.invert().fVec); 216c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein #endif 217c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 218c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 219c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator == (const SkNf& o) const { return vreinterpretq_s32_u32(vceqq_f32(fVec, o.fVec)); } 220c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator < (const SkNf& o) const { return vreinterpretq_s32_u32(vcltq_f32(fVec, o.fVec)); } 221c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator > (const SkNf& o) const { return vreinterpretq_s32_u32(vcgtq_f32(fVec, o.fVec)); } 222c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator <= (const SkNf& o) const { return vreinterpretq_s32_u32(vcleq_f32(fVec, o.fVec)); } 223c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator >= (const SkNf& o) const { return vreinterpretq_s32_u32(vcgeq_f32(fVec, o.fVec)); } 224c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein Ni operator != (const SkNf& o) const { 225c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return vreinterpretq_s32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec))); 226c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 227c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 228c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.fVec); } 229c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.fVec); } 230c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 231c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf rsqrt() const { 232c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float32x4_t est0 = vrsqrteq_f32(fVec), 233c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0); 234c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return est1; 235c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 236c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 237c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkNf sqrt() const { 238c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein #if defined(SK_CPU_ARM64) 239c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return vsqrtq_f32(fVec); 240c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein #else 241c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float32x4_t est1 = this->rsqrt().fVec, 242c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein // An extra step of Newton's method to refine the estimate of 1/sqrt(this). 243c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1); 244c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return vmulq_f32(fVec, est2); 245c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein #endif 246c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 247c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 248c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float operator[] (int k) const { 249c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein SkASSERT(0 <= k && k < 4); 250c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein return fVec[k]; 251c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein } 252c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 253c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinprivate: 254c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein float32x4_t fVec; 255c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein}; 256c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein 257c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein#endif//SkNx_neon_DEFINED 258