1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_COMPLEX_SSE_H 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_COMPLEX_SSE_H 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//---------- float ---------- 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct Packet2cf 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf() {} 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {} 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath __m128 v; 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct packet_traits<std::complex<float> > : default_packet_traits 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef Packet2cf type; 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Vectorizable = 1, 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath AlignedOnScalar = 1, 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath size = 2, 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAdd = 1, 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSub = 1, 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMul = 1, 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasDiv = 1, 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasNegate = 1, 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAbs = 0, 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAbs2 = 0, 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMin = 0, 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMax = 0, 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSetLinear = 0 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; }; 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); } 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); } 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000)); 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(_mm_xor_ps(a.v,mask)); 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(_mm_xor_ps(a.v,mask)); 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // TODO optimize it for SSE3 and 4 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef EIGEN_VECTORIZE_SSE3 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v), 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_mul_ps(_mm_movehdup_ps(a.v), 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec4f_swizzle1(b.v, 1, 0, 3, 2)))); 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// vec4f_swizzle1(b.v, 1, 0, 3, 2)))); 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #else 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000)); 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask))); 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); } 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); } 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); } 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); } 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&real_ref(*from))); } 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&real_ref(*from))); } 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet2cf res; 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #if EIGEN_GNUC_AT_MOST(4,2) 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // workaround annoying "may be used uninitialized in this function" warning with gcc 4.2 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from)); 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #else 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res.v = _mm_loadl_pi(res.v, (const __m64*)&from); 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(_mm_movelh_ps(res.v,res.v)); 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); } 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&real_ref(*to), from.v); } 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&real_ref(*to), from.v); } 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #if EIGEN_GNUC_AT_MOST(4,3) 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Workaround gcc 4.2 ICE - this is not performance wise ideal, but who cares... 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // This workaround also fix invalid code generation with gcc 4.3 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_ALIGN16 std::complex<float> res[2]; 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_store_ps((float*)res, a.v); 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return res[0]; 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #else 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::complex<float> res; 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_storel_pi((__m64*)&res, a.v); 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return res; 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(_mm_castps_pd(a.v)))); } 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v)))); 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs) 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v))); 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a) 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v)))); 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int Offset> 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct palign_impl<Offset,Packet2cf> 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second) 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (Offset==1) 144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath first.v = _mm_movehl_ps(first.v, first.v); 146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath first.v = _mm_movelh_ps(first.v, second.v); 147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, false,true> 152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const 154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(pmul(x,y),c); } 155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef EIGEN_VECTORIZE_SSE3 159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return internal::pmul(a, pconj(b)); 160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #else 161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); 162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask), 163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), 164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec4f_swizzle1(b.v, 1, 0, 3, 2)))); 165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, true,false> 170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const 172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(pmul(x,y),c); } 173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const 175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef EIGEN_VECTORIZE_SSE3 177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return internal::pmul(pconj(a), b); 178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #else 179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); 180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), 181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), 182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask))); 183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet2cf, true,true> 188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const 190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(pmul(x,y),c); } 191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const 193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef EIGEN_VECTORIZE_SSE3 195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pconj(internal::pmul(a, b)); 196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #else 197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); 198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask), 199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), 200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec4f_swizzle1(b.v, 1, 0, 3, 2)))); 201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet4f, Packet2cf, false,false> 206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const 208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(c, pmul(x,y)); } 209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const 211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return Packet2cf(Eigen::internal::pmul(x, y.v)); } 212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2cf, Packet4f, false,false> 215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const 217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(c, pmul(x,y)); } 218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const 220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return Packet2cf(Eigen::internal::pmul(x.v, y)); } 221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) 224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 225c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // TODO optimize it for SSE3 and 4 226c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b); 227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath __m128 s = _mm_mul_ps(b.v,b.v); 228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1))))); 229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x) 232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2)); 234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 235c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//---------- double ---------- 238c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct Packet1cd 239c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 240c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet1cd() {} 241c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {} 242c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath __m128d v; 243c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 244c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 245c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct packet_traits<std::complex<double> > : default_packet_traits 246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 247c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef Packet1cd type; 248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 249c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Vectorizable = 1, 250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath AlignedOnScalar = 0, 251c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath size = 1, 252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 253c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAdd = 1, 254c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSub = 1, 255c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMul = 1, 256c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasDiv = 1, 257c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasNegate = 1, 258c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAbs = 0, 259c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAbs2 = 0, 260c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMin = 0, 261c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMax = 0, 262c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSetLinear = 0 263c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 264c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 265c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 266c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; }; 267c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 268c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); } 269c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); } 270c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(a.v)); } 271c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) 272c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 273c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); 274c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet1cd(_mm_xor_pd(a.v,mask)); 275c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 276c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 277c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) 278c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 279c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // TODO optimize it for SSE3 and 4 280c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef EIGEN_VECTORIZE_SSE3 281c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet1cd(_mm_addsub_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), 282c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), 283c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec2d_swizzle1(b.v, 1, 0)))); 284c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #else 285c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); 286c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), 287c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), 288c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec2d_swizzle1(b.v, 1, 0)), mask))); 289c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 290c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 291c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 292c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); } 293c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); } 294c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); } 295c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); } 296c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 297c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// FIXME force unaligned load, this is a temporary fix 298c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) 299c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); } 300c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) 301c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); } 302c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from) 303c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); } 304c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 305c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); } 306c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 307c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// FIXME force unaligned store, this is a temporary fix 308c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } 309c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } 310c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 311c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } 312c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 313c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) 314c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 315c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_ALIGN16 double res[2]; 316c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_store_pd(res, a.v); 317c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return std::complex<double>(res[0],res[1]); 318c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 319c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 320c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; } 321c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 322c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) 323c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 324c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pfirst(a); 325c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 326c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 327c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) 328c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 329c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return vecs[0]; 330c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 331c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 332c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) 333c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 334c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pfirst(a); 335c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 336c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 337c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int Offset> 338c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct palign_impl<Offset,Packet1cd> 339c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 340c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/) 341c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 342c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // FIXME is it sure we never have to align a Packet1cd? 343c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary... 344c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 345c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 346c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 347c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet1cd, Packet1cd, false,true> 348c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 349c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const 350c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(pmul(x,y),c); } 351c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 352c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const 353c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 354c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef EIGEN_VECTORIZE_SSE3 355c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return internal::pmul(a, pconj(b)); 356c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #else 357c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); 358c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask), 359c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), 360c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec2d_swizzle1(b.v, 1, 0)))); 361c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 362c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 363c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 364c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 365c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet1cd, Packet1cd, true,false> 366c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 367c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const 368c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(pmul(x,y),c); } 369c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 370c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const 371c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 372c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef EIGEN_VECTORIZE_SSE3 373c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return internal::pmul(pconj(a), b); 374c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #else 375c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); 376c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), 377c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), 378c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec2d_swizzle1(b.v, 1, 0)), mask))); 379c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 380c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 381c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 382c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 383c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet1cd, Packet1cd, true,true> 384c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 385c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const 386c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(pmul(x,y),c); } 387c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 388c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const 389c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 390c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef EIGEN_VECTORIZE_SSE3 391c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pconj(internal::pmul(a, b)); 392c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #else 393c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); 394c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask), 395c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), 396c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec2d_swizzle1(b.v, 1, 0)))); 397c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 398c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 399c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 400c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 401c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet2d, Packet1cd, false,false> 402c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 403c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const 404c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(c, pmul(x,y)); } 405c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 406c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const 407c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return Packet1cd(Eigen::internal::pmul(x, y.v)); } 408c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 409c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 410c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct conj_helper<Packet1cd, Packet2d, false,false> 411c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 412c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const 413c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return padd(c, pmul(x,y)); } 414c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 415c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const 416c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { return Packet1cd(Eigen::internal::pmul(x.v, y)); } 417c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 418c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 419c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) 420c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 421c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // TODO optimize it for SSE3 and 4 422c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b); 423c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath __m128d s = _mm_mul_pd(b.v,b.v); 424c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1)))); 425c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 426c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 427c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x) 428c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 429c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return Packet1cd(preverse(x.v)); 430c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 431c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 432c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 433c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 434c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 435c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 436c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_COMPLEX_SSE_H 437