1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library
2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra.
3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//
4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//
6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla
7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed
8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_COMPLEX_NEON_H
11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_COMPLEX_NEON_H
12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen {
14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal {
16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstatic uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstatic uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//---------- float ----------
21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct Packet2cf
22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf() {}
24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet4f  v;
26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct packet_traits<std::complex<float> >  : default_packet_traits
29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  typedef Packet2cf type;
31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  enum {
32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Vectorizable = 1,
33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    AlignedOnScalar = 1,
34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    size = 2,
35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasAdd    = 1,
37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasSub    = 1,
38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasMul    = 1,
39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasDiv    = 1,
40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasNegate = 1,
41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasAbs    = 0,
42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasAbs2   = 0,
43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasMin    = 0,
44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasMax    = 0,
45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasSetLinear = 0
46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  };
47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)
52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  float32x2_t r64;
54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  r64 = vld1_f32((float *)&from);
55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vcombine_f32(r64, r64));
57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet4ui b = vreinterpretq_u32_f32(a.v);
65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR)));
66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet4f v1, v2;
71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Multiply the real a with b
77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v1 = vmulq_f32(v1, b.v);
78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Multiply the imag a with b
79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v2 = vmulq_f32(v2, b.v);
80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Conjugate v2
81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR));
82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Swap real/imag elements in v2.
837faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  v2 = vrev64q_f32(v2);
84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Add and return the result
85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vaddq_f32(v1, v2));
86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pand   <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf por    <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pxor   <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { __pld((float *)addr); }
114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  std::complex<float> EIGEN_ALIGN16 x[2];
118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  vst1q_f32((float *)x, a.v);
119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return x[0];
120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  float32x2_t a_lo, a_hi;
125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet4f a_r128;
126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a_lo = vget_low_f32(a.v);
128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a_hi = vget_high_f32(a.v);
129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a_r128 = vcombine_f32(a_hi, a_lo);
130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(a_r128);
132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vrev64q_f32(a.v));
137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  float32x2_t a1, a2;
142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  std::complex<float> s;
143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a1 = vget_low_f32(a.v);
145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a2 = vget_high_f32(a.v);
146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a2 = vadd_f32(a1, a2);
147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  vst1_f32((float *)&s, a2);
148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return s;
150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet4f sum1, sum2, sum;
155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Add the first two 64-bit float32x2_t of vecs[0]
157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  sum = vaddq_f32(sum1, sum2);
160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(sum);
162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  float32x2_t a1, a2, v1, v2, prod;
167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  std::complex<float> s;
168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a1 = vget_low_f32(a.v);
170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a2 = vget_high_f32(a.v);
171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v1 = vdup_lane_f32(a1, 0);
173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v2 = vdup_lane_f32(a1, 1);
175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Multiply the real a with b
176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v1 = vmul_f32(v1, a2);
177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Multiply the imag a with b
178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v2 = vmul_f32(v2, a2);
179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Conjugate v2
180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR));
181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Swap real/imag elements in v2.
182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v2 = vrev64_f32(v2);
183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Add v1, v2
184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  prod = vadd_f32(v1, v2);
185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  vst1_f32((float *)&s, prod);
187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return s;
189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int Offset>
192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct palign_impl<Offset,Packet2cf>
193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    if (Offset==1)
197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      first.v = vextq_f32(first.v, second.v, 2);
199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, false,true>
204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  { return padd(pmul(x,y),c); }
207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    return internal::pmul(a, pconj(b));
211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, true,false>
215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  { return padd(pmul(x,y),c); }
218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    return internal::pmul(pconj(a), b);
222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
225c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, true,true>
226c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  { return padd(pmul(x,y),c); }
229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    return pconj(internal::pmul(a, b));
233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
235c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
238c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // TODO optimize it for AltiVec
239c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
240c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet4f s, rev_s;
241c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
242c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // this computes the norm
243c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  s = vmulq_f32(b.v, b.v);
2447faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  rev_s = vrev64q_f32(s);
245c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
247c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
249c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal
250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
251c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen
252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
253c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_COMPLEX_NEON_H
254