1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library
2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra.
3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//
4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
52b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org>
6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//
7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla
8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed
9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_COMPLEX_NEON_H
12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_COMPLEX_NEON_H
13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen {
15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal {
17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wanginline uint32x4_t p4ui_CONJ_XOR() {
192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// See bug 1325, clang fails to call vld1q_u64.
202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#if EIGEN_COMP_CLANG
212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return ret;
232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else
242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return vld1q_u32( conj_XOR_DATA );
262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif
272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wanginline uint32x2_t p2ui_CONJ_XOR() {
302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return vld1_u32( conj_XOR_DATA );
322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//---------- float ----------
35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct Packet2cf
36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf() {}
38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet4f  v;
40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct packet_traits<std::complex<float> >  : default_packet_traits
43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  typedef Packet2cf type;
452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  typedef Packet2cf half;
46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  enum {
47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Vectorizable = 1,
48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    AlignedOnScalar = 1,
49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    size = 2,
502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    HasHalfPacket = 0,
51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasAdd    = 1,
53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasSub    = 1,
54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasMul    = 1,
55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasDiv    = 1,
56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasNegate = 1,
57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasAbs    = 0,
58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasAbs2   = 0,
59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasMin    = 0,
60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasMax    = 0,
61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasSetLinear = 0
62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  };
63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)
68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  float32x2_t r64;
70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  r64 = vld1_f32((float *)&from);
71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vcombine_f32(r64, r64));
73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet4ui b = vreinterpretq_u32_f32(a.v);
812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet4f v1, v2;
87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  // Get the imag values of a | a1_im | a1_im | a2_im | a2_im |
91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Multiply the real a with b
93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v1 = vmulq_f32(v1, b.v);
94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Multiply the imag a with b
95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v2 = vmulq_f32(v2, b.v);
96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Conjugate v2
972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Swap real/imag elements in v2.
997faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  v2 = vrev64q_f32(v2);
100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Add and return the result
101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vaddq_f32(v1, v2));
102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pand   <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
1062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf por    <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pxor   <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
1292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
1302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
1312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  Packet4f res = pset1<Packet4f>(0.f);
1322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
1332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
1342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
1352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
1362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return Packet2cf(res);
1372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
1382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
1392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
1402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
1412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
1422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
1432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
1442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
145615d816d068b4d0f5e8df601930b5f160bf7eda1Tim Murraytemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { EIGEN_ARM_PREFETCH((float *)addr); }
146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  std::complex<float> EIGEN_ALIGN16 x[2];
150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  vst1q_f32((float *)x, a.v);
151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return x[0];
152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  float32x2_t a_lo, a_hi;
157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet4f a_r128;
158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a_lo = vget_low_f32(a.v);
160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a_hi = vget_high_f32(a.v);
161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a_r128 = vcombine_f32(a_hi, a_lo);
162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(a_r128);
164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(vrev64q_f32(a.v));
169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  float32x2_t a1, a2;
174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  std::complex<float> s;
175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a1 = vget_low_f32(a.v);
177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a2 = vget_high_f32(a.v);
178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a2 = vadd_f32(a1, a2);
179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  vst1_f32((float *)&s, a2);
180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return s;
182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet4f sum1, sum2, sum;
187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Add the first two 64-bit float32x2_t of vecs[0]
189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  sum = vaddq_f32(sum1, sum2);
192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(sum);
194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  float32x2_t a1, a2, v1, v2, prod;
199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  std::complex<float> s;
200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a1 = vget_low_f32(a.v);
202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  a2 = vget_high_f32(a.v);
203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v1 = vdup_lane_f32(a1, 0);
205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v2 = vdup_lane_f32(a1, 1);
207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Multiply the real a with b
208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v1 = vmul_f32(v1, a2);
209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Multiply the imag a with b
210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v2 = vmul_f32(v2, a2);
211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Conjugate v2
2122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Swap real/imag elements in v2.
214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  v2 = vrev64_f32(v2);
215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // Add v1, v2
216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  prod = vadd_f32(v1, v2);
217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  vst1_f32((float *)&s, prod);
219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return s;
221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int Offset>
224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct palign_impl<Offset,Packet2cf>
225c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
226c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    if (Offset==1)
229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      first.v = vextq_f32(first.v, second.v, 2);
231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
235c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, false,true>
236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
238c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  { return padd(pmul(x,y),c); }
239c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
240c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
241c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
242c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    return internal::pmul(a, pconj(b));
243c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
244c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
245c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, true,false>
247c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
249c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  { return padd(pmul(x,y),c); }
250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
251c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
253c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    return internal::pmul(pconj(a), b);
254c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
255c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
256c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
257c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, true,true>
258c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
259c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
260c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  { return padd(pmul(x,y),c); }
261c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
262c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
263c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
264c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    return pconj(internal::pmul(a, b));
265c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
266c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
267c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
268c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
269c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
2702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  // TODO optimize it for NEON
271c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
272c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet4f s, rev_s;
273c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
274c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // this computes the norm
275c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  s = vmulq_f32(b.v, b.v);
2767faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  rev_s = vrev64q_f32(s);
277c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
278c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
279c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
280c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
2812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangEIGEN_DEVICE_FUNC inline void
2822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangptranspose(PacketBlock<Packet2cf,2>& kernel) {
2832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
2842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
2852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  kernel.packet[1].v = tmp;
2862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
2872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
2882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang//---------- double ----------
2892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
2902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
2912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// See bug 1325, clang fails to call vld1q_u64.
2922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#if EIGEN_COMP_CLANG
2932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
2942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else
2952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  const uint64_t  p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
2962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
2972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif
2982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
2992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct Packet1cd
3002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
3012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  EIGEN_STRONG_INLINE Packet1cd() {}
3022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
3032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  Packet2d v;
3042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang};
3052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct packet_traits<std::complex<double> >  : default_packet_traits
3072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
3082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  typedef Packet1cd type;
3092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  typedef Packet1cd half;
3102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  enum {
3112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    Vectorizable = 1,
3122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    AlignedOnScalar = 0,
3132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    size = 1,
3142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    HasHalfPacket = 0,
3152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    HasAdd    = 1,
3172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    HasSub    = 1,
3182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    HasMul    = 1,
3192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    HasDiv    = 1,
3202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    HasNegate = 1,
3212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    HasAbs    = 0,
3222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    HasAbs2   = 0,
3232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    HasMin    = 0,
3242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    HasMax    = 0,
3252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    HasSetLinear = 0
3262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  };
3272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang};
3282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
3302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
3322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
3332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>&  from)
3352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
3362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd<Packet2d>(a.v,b.v)); }
3382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub<Packet2d>(a.v,b.v)); }
3392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate<Packet2d>(a.v)); }
3402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
3412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
3432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
3442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  Packet2d v1, v2;
3452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  // Get the real values of a
3472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
3482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  // Get the imag values of a
3492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
3502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  // Multiply the real a with b
3512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  v1 = vmulq_f64(v1, b.v);
3522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  // Multiply the imag a with b
3532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  v2 = vmulq_f64(v2, b.v);
3542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  // Conjugate v2
3552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
3562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  // Swap real/imag elements in v2.
3572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  v2 = preverse<Packet2d>(v2);
3582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  // Add and return the result
3592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return Packet1cd(vaddq_f64(v1, v2));
3602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
3612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pand   <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
3632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
3642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
3652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
3662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd por    <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
3672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
3682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
3692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
3702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pxor   <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
3712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
3722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
3732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
3742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
3752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
3762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
3772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
3782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
3802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
3822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
3832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { EIGEN_ARM_PREFETCH((double *)addr); }
3852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
3872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
3882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  Packet2d res = pset1<Packet2d>(0.0);
3892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
3902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
3912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return Packet1cd(res);
3922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
3932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
3952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
3962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
3972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
3982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
3992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)
4012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
4022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  std::complex<double> EIGEN_ALIGN16 res;
4032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  pstore<std::complex<double> >(&res, a);
4042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return res;
4062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
4072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
4092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
4112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
4132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
4152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<int Offset>
4172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct palign_impl<Offset,Packet1cd>
4182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
4192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
4202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  {
4212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    // FIXME is it sure we never have to align a Packet1cd?
4222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
4232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  }
4242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang};
4252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct conj_helper<Packet1cd, Packet1cd, false,true>
4272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
4282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
4292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  { return padd(pmul(x,y),c); }
4302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
4322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  {
4332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    return internal::pmul(a, pconj(b));
4342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  }
4352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang};
4362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct conj_helper<Packet1cd, Packet1cd, true,false>
4382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
4392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
4402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  { return padd(pmul(x,y),c); }
4412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
4432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  {
4442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    return internal::pmul(pconj(a), b);
4452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  }
4462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang};
4472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct conj_helper<Packet1cd, Packet1cd, true,true>
4492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
4502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
4512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  { return padd(pmul(x,y),c); }
4522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
4542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  {
4552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    return pconj(internal::pmul(a, b));
4562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  }
4572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang};
4582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
4602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
4612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  // TODO optimize it for NEON
4622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
4632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  Packet2d s = pmul<Packet2d>(b.v, b.v);
4642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  Packet2d rev_s = preverse<Packet2d>(s);
4652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
4672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
4682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangEIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
4702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
4712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  return Packet1cd(preverse(Packet2d(x.v)));
4722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
4732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
4742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangEIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
4752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
4762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
4772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
4782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  kernel.packet[1].v = tmp;
4792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
4802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif // EIGEN_ARCH_ARM64
4812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
482c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal
483c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
484c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen
485c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
486c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_COMPLEX_NEON_H
487