1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com> 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr> 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_GEOMETRY_SSE_H 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_GEOMETRY_SSE_H 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<class Derived, class OtherDerived> 19eda03298de395cf6217486971e6529f92da8da79Miao Wangstruct quat_product<Architecture::SSE, Derived, OtherDerived, float> 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 21eda03298de395cf6217486971e6529f92da8da79Miao Wang enum { 22eda03298de395cf6217486971e6529f92da8da79Miao Wang AAlignment = traits<Derived>::Alignment, 23eda03298de395cf6217486971e6529f92da8da79Miao Wang BAlignment = traits<OtherDerived>::Alignment, 24eda03298de395cf6217486971e6529f92da8da79Miao Wang ResAlignment = traits<Quaternion<float> >::Alignment 25eda03298de395cf6217486971e6529f92da8da79Miao Wang }; 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Quaternion<float> res; 292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f); 30eda03298de395cf6217486971e6529f92da8da79Miao Wang __m128 a = _a.coeffs().template packet<AAlignment>(0); 31eda03298de395cf6217486971e6529f92da8da79Miao Wang __m128 b = _b.coeffs().template packet<BAlignment>(0); 322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); 332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1)); 34eda03298de395cf6217486971e6529f92da8da79Miao Wang pstoret<float,Packet4f,ResAlignment>( 35eda03298de395cf6217486971e6529f92da8da79Miao Wang &res.x(), 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)), 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0), 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec4f_swizzle1(b,1,2,0,0))), 392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang _mm_xor_ps(mask,_mm_add_ps(s1,s2)))); 402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return res; 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 45eda03298de395cf6217486971e6529f92da8da79Miao Wangtemplate<class Derived> 46eda03298de395cf6217486971e6529f92da8da79Miao Wangstruct quat_conj<Architecture::SSE, Derived, float> 472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 48eda03298de395cf6217486971e6529f92da8da79Miao Wang enum { 49eda03298de395cf6217486971e6529f92da8da79Miao Wang ResAlignment = traits<Quaternion<float> >::Alignment 50eda03298de395cf6217486971e6529f92da8da79Miao Wang }; 512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static inline Quaternion<float> run(const QuaternionBase<Derived>& q) 522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Quaternion<float> res; 542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f); 55eda03298de395cf6217486971e6529f92da8da79Miao Wang pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0))); 562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return res; 572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename VectorLhs,typename VectorRhs> 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 64eda03298de395cf6217486971e6529f92da8da79Miao Wang enum { 65eda03298de395cf6217486971e6529f92da8da79Miao Wang ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment 66eda03298de395cf6217486971e6529f92da8da79Miao Wang }; 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline typename plain_matrix_type<VectorLhs>::type 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath run(const VectorLhs& lhs, const VectorRhs& rhs) 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang __m128 a = lhs.template packet<traits<VectorLhs>::Alignment>(0); 712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang __m128 b = rhs.template packet<traits<VectorRhs>::Alignment>(0); 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typename plain_matrix_type<VectorLhs>::type res; 75eda03298de395cf6217486971e6529f92da8da79Miao Wang pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2)); 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return res; 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 83eda03298de395cf6217486971e6529f92da8da79Miao Wangtemplate<class Derived, class OtherDerived> 84eda03298de395cf6217486971e6529f92da8da79Miao Wangstruct quat_product<Architecture::SSE, Derived, OtherDerived, double> 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 86eda03298de395cf6217486971e6529f92da8da79Miao Wang enum { 87eda03298de395cf6217486971e6529f92da8da79Miao Wang BAlignment = traits<OtherDerived>::Alignment, 88eda03298de395cf6217486971e6529f92da8da79Miao Wang ResAlignment = traits<Quaternion<double> >::Alignment 89eda03298de395cf6217486971e6529f92da8da79Miao Wang }; 90eda03298de395cf6217486971e6529f92da8da79Miao Wang 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Quaternion<double> res; 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const double* a = _a.coeffs().data(); 98eda03298de395cf6217486971e6529f92da8da79Miao Wang Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0); 99eda03298de395cf6217486971e6529f92da8da79Miao Wang Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2); 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet2d a_xx = pset1<Packet2d>(a[0]); 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet2d a_yy = pset1<Packet2d>(a[1]); 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet2d a_zz = pset1<Packet2d>(a[2]); 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet2d a_ww = pset1<Packet2d>(a[3]); 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // two temporaries: 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet2d t1, t2; 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /* 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * t1 = ww*xy + yy*zw 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * t2 = zz*xy - xx*zw 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * res.xy = t1 +/- swap(t2) 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */ 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath t1 = padd(pmul(a_ww, b_xy), pmul(a_yy, b_zw)); 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw)); 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifdef EIGEN_VECTORIZE_SSE3 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_UNUSED_VARIABLE(mask) 117eda03298de395cf6217486971e6529f92da8da79Miao Wang pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2))); 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#else 119eda03298de395cf6217486971e6529f92da8da79Miao Wang pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2)))); 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath /* 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * t1 = ww*zw - yy*xy 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * t2 = zz*zw + xx*xy 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * res.zw = t1 -/+ swap(t2) = swap( swap(t1) +/- t2) 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */ 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath t1 = psub(pmul(a_ww, b_zw), pmul(a_yy, b_xy)); 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy)); 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifdef EIGEN_VECTORIZE_SSE3 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_UNUSED_VARIABLE(mask) 131eda03298de395cf6217486971e6529f92da8da79Miao Wang pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#else 133eda03298de395cf6217486971e6529f92da8da79Miao Wang pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2)))); 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return res; 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 140eda03298de395cf6217486971e6529f92da8da79Miao Wangtemplate<class Derived> 141eda03298de395cf6217486971e6529f92da8da79Miao Wangstruct quat_conj<Architecture::SSE, Derived, double> 1422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 143eda03298de395cf6217486971e6529f92da8da79Miao Wang enum { 144eda03298de395cf6217486971e6529f92da8da79Miao Wang ResAlignment = traits<Quaternion<double> >::Alignment 145eda03298de395cf6217486971e6529f92da8da79Miao Wang }; 1462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static inline Quaternion<double> run(const QuaternionBase<Derived>& q) 1472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 1482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Quaternion<double> res; 1492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const __m128d mask0 = _mm_setr_pd(-0.,-0.); 1502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const __m128d mask2 = _mm_setr_pd(-0.,0.); 151eda03298de395cf6217486971e6529f92da8da79Miao Wang pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0))); 152eda03298de395cf6217486971e6529f92da8da79Miao Wang pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2))); 1532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return res; 1542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 1562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_GEOMETRY_SSE_H 162