1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_COMPLEX_NEON_H 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_COMPLEX_NEON_H 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstatic uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000); 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstatic uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000); 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//---------- float ---------- 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct Packet2cf 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf() {} 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f v; 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct packet_traits<std::complex<float> > : default_packet_traits 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef Packet2cf type; 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Vectorizable = 1, 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath AlignedOnScalar = 1, 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath size = 2, 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAdd = 1, 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSub = 1, 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMul = 1, 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasDiv = 1, 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasNegate = 1, 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAbs = 0, 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAbs2 = 0, 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMin = 0, 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMax = 0, 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSetLinear = 0 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; }; 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath float32x2_t r64; 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath r64 = vld1_f32((float *)&from); 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vcombine_f32(r64, r64)); 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); } 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); } 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); } 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4ui b = vreinterpretq_u32_f32(a.v); 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR))); 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f v1, v2; 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Get the real values of a | a1_re | a1_re | a2_re | a2_re | 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0)); 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Get the real values of a | a1_im | a1_im | a2_im | a2_im | 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1)); 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Multiply the real a with b 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v1 = vmulq_f32(v1, b.v); 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Multiply the imag a with b 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v2 = vmulq_f32(v2, b.v); 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Conjugate v2 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR)); 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Swap real/imag elements in v2. 837faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez v2 = vrev64q_f32(v2); 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Add and return the result 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vaddq_f32(v1, v2)); 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); 99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); } 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); } 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); } 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { __pld((float *)addr); } 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::complex<float> EIGEN_ALIGN16 x[2]; 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vst1q_f32((float *)x, a.v); 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return x[0]; 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath float32x2_t a_lo, a_hi; 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f a_r128; 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a_lo = vget_low_f32(a.v); 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a_hi = vget_high_f32(a.v); 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a_r128 = vcombine_f32(a_hi, a_lo); 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(a_r128); 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a) 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vrev64q_f32(a.v)); 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath float32x2_t a1, a2; 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::complex<float> s; 143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a1 = vget_low_f32(a.v); 145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a2 = vget_high_f32(a.v); 146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a2 = vadd_f32(a1, a2); 147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vst1_f32((float *)&s, a2); 148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return s; 150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs) 153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f sum1, sum2, sum; 155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Add the first two 64-bit float32x2_t of vecs[0] 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v)); 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v)); 159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum = vaddq_f32(sum1, sum2); 160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(sum); 162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a) 165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath float32x2_t a1, a2, v1, v2, prod; 167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::complex<float> s; 168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a1 = vget_low_f32(a.v); 170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath a2 = vget_high_f32(a.v); 171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Get the real values of a | a1_re | a1_re | a2_re | a2_re | 172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v1 = vdup_lane_f32(a1, 0); 173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Get the real values of a | a1_im | a1_im | a2_im | a2_im | 174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v2 = vdup_lane_f32(a1, 1); 175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Multiply the real a with b 176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v1 = vmul_f32(v1, a2); 177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Multiply the imag a with b 178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v2 = vmul_f32(v2, a2); 179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Conjugate v2 180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR)); 181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Swap real/imag elements in v2. 182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v2 = vrev64_f32(v2); 183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Add v1, v2 184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath prod = vadd_f32(v1, v2); 185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vst1_f32((float *)&s, prod); 187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return s; 189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int Offset> 192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct palign_impl<Offset,Packet2cf> 193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second) 195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (Offset==1) 197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath first.v = vextq_f32(first.v, second.v, 2); 199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, false,true> 204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const 206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(pmul(x,y),c); } 207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const 209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return internal::pmul(a, pconj(b)); 211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, true,false> 215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const 217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(pmul(x,y),c); } 218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const 220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return internal::pmul(pconj(a), b); 222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 225c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, true,true> 226c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const 228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(pmul(x,y),c); } 229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const 231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pconj(internal::pmul(a, b)); 233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 235c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) 237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 238c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // TODO optimize it for AltiVec 239c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b); 240c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f s, rev_s; 241c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 242c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // this computes the norm 243c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath s = vmulq_f32(b.v, b.v); 2447faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez rev_s = vrev64q_f32(s); 245c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s))); 247c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 249c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 251c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 253c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_COMPLEX_NEON_H 254