1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> 52b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org> 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_COMPLEX_NEON_H 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_COMPLEX_NEON_H 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wanginline uint32x4_t p4ui_CONJ_XOR() { 192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// See bug 1325, clang fails to call vld1q_u64. 202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#if EIGEN_COMP_CLANG 212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; 222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return ret; 232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; 252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vld1q_u32( conj_XOR_DATA ); 262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wanginline uint32x2_t p2ui_CONJ_XOR() { 302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 }; 312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vld1_u32( conj_XOR_DATA ); 322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//---------- float ---------- 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct Packet2cf 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf() {} 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f v; 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct packet_traits<std::complex<float> > : default_packet_traits 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef Packet2cf type; 452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef Packet2cf half; 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Vectorizable = 1, 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath AlignedOnScalar = 1, 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath size = 2, 502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasHalfPacket = 0, 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAdd = 1, 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSub = 1, 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMul = 1, 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasDiv = 1, 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasNegate = 1, 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAbs = 0, 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAbs2 = 0, 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMin = 0, 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMax = 0, 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSetLinear = 0 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; }; 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath float32x2_t r64; 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath r64 = vld1_f32((float *)&from); 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vcombine_f32(r64, r64)); 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); } 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); } 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); } 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4ui b = vreinterpretq_u32_f32(a.v); 812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR()))); 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f v1, v2; 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Get the real values of a | a1_re | a1_re | a2_re | a2_re | 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0)); 902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // Get the imag values of a | a1_im | a1_im | a2_im | a2_im | 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1)); 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Multiply the real a with b 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v1 = vmulq_f32(v1, b.v); 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Multiply the imag a with b 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v2 = vmulq_f32(v2, b.v); 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Conjugate v2 972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR())); 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Swap real/imag elements in v2. 997faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez v2 = vrev64q_f32(v2); 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Add and return the result 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vaddq_f32(v1, v2)); 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 1062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); } 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); } 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); } 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 1292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride) 1302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 1312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4f res = pset1<Packet4f>(0.f); 1322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang res = vsetq_lane_f32(std::real(from[0*stride]), res, 0); 1332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1); 1342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang res = vsetq_lane_f32(std::real(from[1*stride]), res, 2); 1352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3); 1362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return Packet2cf(res); 1372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 1382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride) 1402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 1412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1)); 1422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3)); 1432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 1442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 145615d816d068b4d0f5e8df601930b5f160bf7eda1Tim Murraytemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); } 146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) 148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::complex<float> EIGEN_ALIGN16 x[2]; 150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vst1q_f32((float *)x, a.v); 151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return x[0]; 152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) 155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath float32x2_t a_lo, a_hi; 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f a_r128; 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a_lo = vget_low_f32(a.v); 160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a_hi = vget_high_f32(a.v); 161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a_r128 = vcombine_f32(a_hi, a_lo); 162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(a_r128); 164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a) 167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vrev64q_f32(a.v)); 169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) 172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath float32x2_t a1, a2; 174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::complex<float> s; 175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a1 = vget_low_f32(a.v); 177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a2 = vget_high_f32(a.v); 178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a2 = vadd_f32(a1, a2); 179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vst1_f32((float *)&s, a2); 180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return s; 182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs) 185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f sum1, sum2, sum; 187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Add the first two 64-bit float32x2_t of vecs[0] 189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v)); 190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v)); 191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum = vaddq_f32(sum1, sum2); 192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(sum); 194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a) 197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath float32x2_t a1, a2, v1, v2, prod; 199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::complex<float> s; 200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a1 = vget_low_f32(a.v); 202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a2 = vget_high_f32(a.v); 203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Get the real values of a | a1_re | a1_re | a2_re | a2_re | 204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v1 = vdup_lane_f32(a1, 0); 205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Get the real values of a | a1_im | a1_im | a2_im | a2_im | 206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v2 = vdup_lane_f32(a1, 1); 207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Multiply the real a with b 208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v1 = vmul_f32(v1, a2); 209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Multiply the imag a with b 210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v2 = vmul_f32(v2, a2); 211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Conjugate v2 2122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR())); 213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Swap real/imag elements in v2. 214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v2 = vrev64_f32(v2); 215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Add v1, v2 216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath prod = vadd_f32(v1, v2); 217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vst1_f32((float *)&s, prod); 219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return s; 221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int Offset> 224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct palign_impl<Offset,Packet2cf> 225c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 226c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second) 227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (Offset==1) 229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath first.v = vextq_f32(first.v, second.v, 2); 231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 235c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, false,true> 236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const 238c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(pmul(x,y),c); } 239c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 240c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const 241c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 242c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return internal::pmul(a, pconj(b)); 243c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 244c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 245c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, true,false> 247c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const 249c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(pmul(x,y),c); } 250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 251c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const 252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 253c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return internal::pmul(pconj(a), b); 254c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 255c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 256c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 257c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, true,true> 258c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 259c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const 260c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(pmul(x,y),c); } 261c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 262c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const 263c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 264c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pconj(internal::pmul(a, b)); 265c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 266c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 267c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 268c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) 269c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 2702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // TODO optimize it for NEON 271c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b); 272c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f s, rev_s; 273c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 274c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // this computes the norm 275c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath s = vmulq_f32(b.v, b.v); 2767faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez rev_s = vrev64q_f32(s); 277c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 278c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s))); 279c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 280c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 2812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangEIGEN_DEVICE_FUNC inline void 2822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangptranspose(PacketBlock<Packet2cf,2>& kernel) { 2832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v)); 2842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v)); 2852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[1].v = tmp; 2862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 2872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang//---------- double ---------- 2892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG 2902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// See bug 1325, clang fails to call vld1q_u64. 2922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#if EIGEN_COMP_CLANG 2932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000}; 2942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 2952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 }; 2962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA ); 2972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 2982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct Packet1cd 3002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_STRONG_INLINE Packet1cd() {} 3022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {} 3032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d v; 3042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 3052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct packet_traits<std::complex<double> > : default_packet_traits 3072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef Packet1cd type; 3092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef Packet1cd half; 3102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang enum { 3112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Vectorizable = 1, 3122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang AlignedOnScalar = 0, 3132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size = 1, 3142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasHalfPacket = 0, 3152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasAdd = 1, 3172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasSub = 1, 3182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasMul = 1, 3192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasDiv = 1, 3202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasNegate = 1, 3212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasAbs = 0, 3222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasAbs2 = 0, 3232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasMin = 0, 3242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasMax = 0, 3252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasSetLinear = 0 3262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang }; 3272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 3282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; }; 3302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); } 3322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); } 3332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) 3352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); } 3362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd<Packet2d>(a.v,b.v)); } 3382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub<Packet2d>(a.v,b.v)); } 3392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate<Packet2d>(a.v)); } 3402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); } 3412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) 3432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d v1, v2; 3452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // Get the real values of a 3472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang v1 = vdupq_lane_f64(vget_low_f64(a.v), 0); 3482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // Get the imag values of a 3492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang v2 = vdupq_lane_f64(vget_high_f64(a.v), 0); 3502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // Multiply the real a with b 3512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang v1 = vmulq_f64(v1, b.v); 3522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // Multiply the imag a with b 3532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang v2 = vmulq_f64(v2, b.v); 3542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // Conjugate v2 3552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR)); 3562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // Swap real/imag elements in v2. 3572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang v2 = preverse<Packet2d>(v2); 3582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // Add and return the result 3592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return Packet1cd(vaddq_f64(v1, v2)); 3602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 3612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) 3632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); 3652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 3662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) 3672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); 3692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 3702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) 3712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); 3732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 3742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) 3752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); 3772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 3782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); } 3802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } 3822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } 3832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); } 3852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride) 3872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d res = pset1<Packet2d>(0.0); 3892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang res = vsetq_lane_f64(std::real(from[0*stride]), res, 0); 3902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1); 3912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return Packet1cd(res); 3922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 3932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride) 3952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1)); 3972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 3982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) 4012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang std::complex<double> EIGEN_ALIGN16 res; 4032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang pstore<std::complex<double> >(&res, a); 4042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return res; 4062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 4072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; } 4092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); } 4112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; } 4132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); } 4152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<int Offset> 4172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct palign_impl<Offset,Packet1cd> 4182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/) 4202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 4212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // FIXME is it sure we never have to align a Packet1cd? 4222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary... 4232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 4252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct conj_helper<Packet1cd, Packet1cd, false,true> 4272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const 4292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { return padd(pmul(x,y),c); } 4302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const 4322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 4332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return internal::pmul(a, pconj(b)); 4342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 4362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct conj_helper<Packet1cd, Packet1cd, true,false> 4382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const 4402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { return padd(pmul(x,y),c); } 4412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const 4432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 4442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return internal::pmul(pconj(a), b); 4452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 4472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct conj_helper<Packet1cd, Packet1cd, true,true> 4492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const 4512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { return padd(pmul(x,y),c); } 4522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const 4542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 4552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return pconj(internal::pmul(a, b)); 4562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 4582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) 4602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // TODO optimize it for NEON 4622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b); 4632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d s = pmul<Packet2d>(b.v, b.v); 4642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d rev_s = preverse<Packet2d>(s); 4652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s))); 4672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 4682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangEIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x) 4702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return Packet1cd(preverse(Packet2d(x.v))); 4722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 4732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangEIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel) 4752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v)); 4772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v)); 4782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[1].v = tmp; 4792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 4802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif // EIGEN_ARCH_ARM64 4812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 482c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 483c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 484c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 485c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 486c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_COMPLEX_NEON_H 487