SkNx_neon.h revision c9adb05b64fa0bfadf9d1a782afcda470da68c9e
1c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein/*
2c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein * Copyright 2015 Google Inc.
3c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein *
4c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein * Use of this source code is governed by a BSD-style license that can be
5c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein * found in the LICENSE file.
6c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein */
7c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
8c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein#ifndef SkNx_neon_DEFINED
9c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein#define SkNx_neon_DEFINED
10c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
11c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein#include <arm_neon.h>
12c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
13c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleintemplate <>
14c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinclass SkNi<2, int32_t> {
15c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinpublic:
16c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNi(int32x2_t vec) : fVec(vec) {}
17c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
18c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNi() {}
19c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    bool allTrue() const { return fVec[0] && fVec[1]; }
20c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    bool anyTrue() const { return fVec[0] || fVec[1]; }
21c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinprivate:
22c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    int32x2_t fVec;
23c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein};
24c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
25c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleintemplate <>
26c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinclass SkNi<4, int32_t> {
27c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinpublic:
28c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNi(int32x4_t vec) : fVec(vec) {}
29c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
30c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNi() {}
31c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    bool allTrue() const { return fVec[0] && fVec[1] && fVec[2] && fVec[3]; }
32c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    bool anyTrue() const { return fVec[0] || fVec[1] || fVec[2] || fVec[3]; }
33c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinprivate:
34c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    int32x4_t fVec;
35c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein};
36c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
37c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleintemplate <>
38c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinclass SkNf<2, float> {
39c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    typedef SkNi<2, int32_t> Ni;
40c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinpublic:
41c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf(float32x2_t vec) : fVec(vec) {}
42c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
43c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf() {}
44c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    explicit SkNf(float val)           : fVec(vdup_n_f32(val)) {}
45c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    static SkNf Load(const float vals[2]) { return vld1_f32(vals); }
46c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; }
47c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
48c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    void store(float vals[2]) const { vst1_f32(vals, fVec); }
49c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
50c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf approxInvert() const {
51c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        float32x2_t est0 = vrecpe_f32(fVec),
52c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein                    est1 = vmul_f32(vrecps_f32(est0, fVec), est0);
53c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return est1;
54c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
55c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf invert() const {
56c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        float32x2_t est1 = this->approxInvert().fVec,
57c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein                    est2 = vmul_f32(vrecps_f32(est1, fVec), est1);
58c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return est2;
59c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
60c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
61c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf operator + (const SkNf& o) const { return vadd_f32(fVec, o.fVec); }
62c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf operator - (const SkNf& o) const { return vsub_f32(fVec, o.fVec); }
63c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf operator * (const SkNf& o) const { return vmul_f32(fVec, o.fVec); }
64c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf operator / (const SkNf& o) const {
65c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    #if defined(SK_CPU_ARM64)
66c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return vdiv_f32(fVec, o.fVec);
67c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    #else
68c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return vmul_f32(fVec, o.invert().fVec);
69c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    #endif
70c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
71c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
72c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator == (const SkNf& o) const { return vreinterpret_s32_u32(vceq_f32(fVec, o.fVec)); }
73c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator  < (const SkNf& o) const { return vreinterpret_s32_u32(vclt_f32(fVec, o.fVec)); }
74c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator  > (const SkNf& o) const { return vreinterpret_s32_u32(vcgt_f32(fVec, o.fVec)); }
75c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator <= (const SkNf& o) const { return vreinterpret_s32_u32(vcle_f32(fVec, o.fVec)); }
76c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator >= (const SkNf& o) const { return vreinterpret_s32_u32(vcge_f32(fVec, o.fVec)); }
77c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator != (const SkNf& o) const {
78c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return vreinterpret_s32_u32(vmvn_u32(vceq_f32(fVec, o.fVec)));
79c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
80c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
81c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fVec); }
82c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fVec); }
83c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
84c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf rsqrt() const {
85c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        float32x2_t est0 = vrsqrte_f32(fVec),
86c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein                    est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0);
87c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return est1;
88c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
89c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
90c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf sqrt() const {
91c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    #if defined(SK_CPU_ARM64)
92c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return vsqrt_f32(fVec);
93c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    #else
94c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        float32x2_t est1 = this->rsqrt().fVec,
95c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        // An extra step of Newton's method to refine the estimate of 1/sqrt(this).
96c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein                    est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1);
97c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return vmul_f32(fVec, est2);
98c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    #endif
99c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
100c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
101c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    float operator[] (int k) const {
102c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        SkASSERT(0 <= k && k < 2);
103c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return fVec[k];
104c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
105c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
106c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinprivate:
107c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    float32x2_t fVec;
108c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein};
109c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
110c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein#if defined(SK_CPU_ARM64)
111c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleintemplate <>
112c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinclass SkNi<2, int64_t> {
113c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinpublic:
114c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNi(int64x2_t vec) : fVec(vec) {}
115c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
116c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNi() {}
117c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    bool allTrue() const { return fVec[0] && fVec[1]; }
118c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    bool anyTrue() const { return fVec[0] || fVec[1]; }
119c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinprivate:
120c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    int64x2_t fVec;
121c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein};
122c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
123c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleintemplate <>
124c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinclass SkNf<2, double> {
125c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    typedef SkNi<2, int64_t> Ni;
126c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinpublic:
127c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf(float64x2_t vec) : fVec(vec) {}
128c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
129c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf() {}
130c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    explicit SkNf(double val)           : fVec(vdupq_n_f64(val))  {}
131c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    static SkNf Load(const double vals[2]) { return vld1q_f64(vals); }
132c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; }
133c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
134c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    void store(double vals[2]) const { vst1q_f64(vals, fVec); }
135c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
136c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); }
137c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); }
138c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); }
139c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); }
140c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
141c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator == (const SkNf& o) const { return vreinterpretq_s64_u64(vceqq_f64(fVec, o.fVec)); }
142c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator  < (const SkNf& o) const { return vreinterpretq_s64_u64(vcltq_f64(fVec, o.fVec)); }
143c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator  > (const SkNf& o) const { return vreinterpretq_s64_u64(vcgtq_f64(fVec, o.fVec)); }
144c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator <= (const SkNf& o) const { return vreinterpretq_s64_u64(vcleq_f64(fVec, o.fVec)); }
145c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator >= (const SkNf& o) const { return vreinterpretq_s64_u64(vcgeq_f64(fVec, o.fVec)); }
146c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator != (const SkNf& o) const {
147c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return vreinterpretq_s64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(fVec, o.fVec))));
148c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
149c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
150c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.fVec); }
151c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.fVec); }
152c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
153c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf  sqrt() const { return vsqrtq_f64(fVec);  }
154c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf rsqrt() const {
155c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        float64x2_t est0 = vrsqrteq_f64(fVec),
156c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein                    est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0);
157c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return est1;
158c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
159c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
160c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf approxInvert() const {
161c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        float64x2_t est0 = vrecpeq_f64(fVec),
162c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein                    est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0);
163c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return est1;
164c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
165c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
166c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf invert() const {
167c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        float64x2_t est1 = this->approxInvert().fVec,
168c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein                    est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1),
169c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein                    est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2);
170c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return est3;
171c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
172c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
173c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    double operator[] (int k) const {
174c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        SkASSERT(0 <= k && k < 2);
175c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return fVec[k];
176c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
177c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
178c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinprivate:
179c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    float64x2_t fVec;
180c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein};
181c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein#endif//defined(SK_CPU_ARM64)
182c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
183c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleintemplate <>
184c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinclass SkNf<4, float> {
185c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    typedef SkNi<4, int32_t> Ni;
186c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinpublic:
187c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf(float32x4_t vec) : fVec(vec) {}
188c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    float32x4_t vec() const { return fVec; }
189c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
190c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf() {}
191c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    explicit SkNf(float val)           : fVec(vdupq_n_f32(val)) {}
192c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    static SkNf Load(const float vals[4]) { return vld1q_f32(vals); }
193c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }
194c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
195c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    void store(float vals[4]) const { vst1q_f32(vals, fVec); }
196c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
197c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf approxInvert() const {
198c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        float32x4_t est0 = vrecpeq_f32(fVec),
199c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein                    est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0);
200c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return est1;
201c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
202c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf invert() const {
203c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        float32x4_t est1 = this->approxInvert().fVec,
204c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein                    est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1);
205c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return est2;
206c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
207c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
208c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf operator + (const SkNf& o) const { return vaddq_f32(fVec, o.fVec); }
209c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf operator - (const SkNf& o) const { return vsubq_f32(fVec, o.fVec); }
210c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf operator * (const SkNf& o) const { return vmulq_f32(fVec, o.fVec); }
211c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf operator / (const SkNf& o) const {
212c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    #if defined(SK_CPU_ARM64)
213c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return vdivq_f32(fVec, o.fVec);
214c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    #else
215c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return vmulq_f32(fVec, o.invert().fVec);
216c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    #endif
217c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
218c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
219c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator == (const SkNf& o) const { return vreinterpretq_s32_u32(vceqq_f32(fVec, o.fVec)); }
220c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator  < (const SkNf& o) const { return vreinterpretq_s32_u32(vcltq_f32(fVec, o.fVec)); }
221c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator  > (const SkNf& o) const { return vreinterpretq_s32_u32(vcgtq_f32(fVec, o.fVec)); }
222c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator <= (const SkNf& o) const { return vreinterpretq_s32_u32(vcleq_f32(fVec, o.fVec)); }
223c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator >= (const SkNf& o) const { return vreinterpretq_s32_u32(vcgeq_f32(fVec, o.fVec)); }
224c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    Ni operator != (const SkNf& o) const {
225c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return vreinterpretq_s32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec)));
226c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
227c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
228c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.fVec); }
229c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.fVec); }
230c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
231c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf rsqrt() const {
232c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        float32x4_t est0 = vrsqrteq_f32(fVec),
233c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein                    est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0);
234c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return est1;
235c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
236c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
237c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    SkNf sqrt() const {
238c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    #if defined(SK_CPU_ARM64)
239c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return vsqrtq_f32(fVec);
240c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    #else
241c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        float32x4_t est1 = this->rsqrt().fVec,
242c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        // An extra step of Newton's method to refine the estimate of 1/sqrt(this).
243c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein                    est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
244c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return vmulq_f32(fVec, est2);
245c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    #endif
246c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
247c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
248c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    float operator[] (int k) const {
249c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        SkASSERT(0 <= k && k < 4);
250c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein        return fVec[k];
251c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    }
252c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
253c9adb05b64fa0bfadf9d1a782afcda470da68c9emtkleinprivate:
254c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein    float32x4_t fVec;
255c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein};
256c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein
257c9adb05b64fa0bfadf9d1a782afcda470da68c9emtklein#endif//SkNx_neon_DEFINED
258