1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library
2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra.
3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//
4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>
5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//
7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla
8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed
9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_GEOMETRY_SSE_H
12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_GEOMETRY_SSE_H
13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen {
15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal {
17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<class Derived, class OtherDerived>
19eda03298de395cf6217486971e6529f92da8da79Miao Wangstruct quat_product<Architecture::SSE, Derived, OtherDerived, float>
20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
21eda03298de395cf6217486971e6529f92da8da79Miao Wang  enum {
22eda03298de395cf6217486971e6529f92da8da79Miao Wang    AAlignment = traits<Derived>::Alignment,
23eda03298de395cf6217486971e6529f92da8da79Miao Wang    BAlignment = traits<OtherDerived>::Alignment,
24eda03298de395cf6217486971e6529f92da8da79Miao Wang    ResAlignment = traits<Quaternion<float> >::Alignment
25eda03298de395cf6217486971e6529f92da8da79Miao Wang  };
26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Quaternion<float> res;
292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
30eda03298de395cf6217486971e6529f92da8da79Miao Wang    __m128 a = _a.coeffs().template packet<AAlignment>(0);
31eda03298de395cf6217486971e6529f92da8da79Miao Wang    __m128 b = _b.coeffs().template packet<BAlignment>(0);
322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
34eda03298de395cf6217486971e6529f92da8da79Miao Wang    pstoret<float,Packet4f,ResAlignment>(
35eda03298de395cf6217486971e6529f92da8da79Miao Wang              &res.x(),
36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath              _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)),
37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                                    _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0),
38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                                               vec4f_swizzle1(b,1,2,0,0))),
392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang                         _mm_xor_ps(mask,_mm_add_ps(s1,s2))));
402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    return res;
42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
45eda03298de395cf6217486971e6529f92da8da79Miao Wangtemplate<class Derived>
46eda03298de395cf6217486971e6529f92da8da79Miao Wangstruct quat_conj<Architecture::SSE, Derived, float>
472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
48eda03298de395cf6217486971e6529f92da8da79Miao Wang  enum {
49eda03298de395cf6217486971e6529f92da8da79Miao Wang    ResAlignment = traits<Quaternion<float> >::Alignment
50eda03298de395cf6217486971e6529f92da8da79Miao Wang  };
512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  static inline Quaternion<float> run(const QuaternionBase<Derived>& q)
522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  {
532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    Quaternion<float> res;
542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f);
55eda03298de395cf6217486971e6529f92da8da79Miao Wang    pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    return res;
572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  }
582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang};
592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename VectorLhs,typename VectorRhs>
62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
64eda03298de395cf6217486971e6529f92da8da79Miao Wang  enum {
65eda03298de395cf6217486971e6529f92da8da79Miao Wang    ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment
66eda03298de395cf6217486971e6529f92da8da79Miao Wang  };
67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline typename plain_matrix_type<VectorLhs>::type
68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  run(const VectorLhs& lhs, const VectorRhs& rhs)
69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    __m128 a = lhs.template packet<traits<VectorLhs>::Alignment>(0);
712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    __m128 b = rhs.template packet<traits<VectorRhs>::Alignment>(0);
72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));
73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));
74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    typename plain_matrix_type<VectorLhs>::type res;
75eda03298de395cf6217486971e6529f92da8da79Miao Wang    pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2));
76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    return res;
77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
83eda03298de395cf6217486971e6529f92da8da79Miao Wangtemplate<class Derived, class OtherDerived>
84eda03298de395cf6217486971e6529f92da8da79Miao Wangstruct quat_product<Architecture::SSE, Derived, OtherDerived, double>
85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
86eda03298de395cf6217486971e6529f92da8da79Miao Wang  enum {
87eda03298de395cf6217486971e6529f92da8da79Miao Wang    BAlignment = traits<OtherDerived>::Alignment,
88eda03298de395cf6217486971e6529f92da8da79Miao Wang    ResAlignment = traits<Quaternion<double> >::Alignment
89eda03298de395cf6217486971e6529f92da8da79Miao Wang  };
90eda03298de395cf6217486971e6529f92da8da79Miao Wang
91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Quaternion<double> res;
96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  const double* a = _a.coeffs().data();
98eda03298de395cf6217486971e6529f92da8da79Miao Wang  Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0);
99eda03298de395cf6217486971e6529f92da8da79Miao Wang  Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2);
100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet2d a_xx = pset1<Packet2d>(a[0]);
101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet2d a_yy = pset1<Packet2d>(a[1]);
102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet2d a_zz = pset1<Packet2d>(a[2]);
103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet2d a_ww = pset1<Packet2d>(a[3]);
104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // two temporaries:
106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Packet2d t1, t2;
107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  /*
109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   * t1 = ww*xy + yy*zw
110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   * t2 = zz*xy - xx*zw
111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   * res.xy = t1 +/- swap(t2)
112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   */
113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  t1 = padd(pmul(a_ww, b_xy), pmul(a_yy, b_zw));
114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw));
115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifdef EIGEN_VECTORIZE_SSE3
116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_UNUSED_VARIABLE(mask)
117eda03298de395cf6217486971e6529f92da8da79Miao Wang  pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#else
119eda03298de395cf6217486971e6529f92da8da79Miao Wang  pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2))));
120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif
121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  /*
123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   * t1 = ww*zw - yy*xy
124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   * t2 = zz*zw + xx*xy
125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   * res.zw = t1 -/+ swap(t2) = swap( swap(t1) +/- t2)
126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   */
127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  t1 = psub(pmul(a_ww, b_zw), pmul(a_yy, b_xy));
128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy));
129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifdef EIGEN_VECTORIZE_SSE3
130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_UNUSED_VARIABLE(mask)
131eda03298de395cf6217486971e6529f92da8da79Miao Wang  pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#else
133eda03298de395cf6217486971e6529f92da8da79Miao Wang  pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2))));
134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif
135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return res;
137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
140eda03298de395cf6217486971e6529f92da8da79Miao Wangtemplate<class Derived>
141eda03298de395cf6217486971e6529f92da8da79Miao Wangstruct quat_conj<Architecture::SSE, Derived, double>
1422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{
143eda03298de395cf6217486971e6529f92da8da79Miao Wang  enum {
144eda03298de395cf6217486971e6529f92da8da79Miao Wang    ResAlignment = traits<Quaternion<double> >::Alignment
145eda03298de395cf6217486971e6529f92da8da79Miao Wang  };
1462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  static inline Quaternion<double> run(const QuaternionBase<Derived>& q)
1472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  {
1482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    Quaternion<double> res;
1492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    const __m128d mask0 = _mm_setr_pd(-0.,-0.);
1502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    const __m128d mask2 = _mm_setr_pd(-0.,0.);
151eda03298de395cf6217486971e6529f92da8da79Miao Wang    pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
152eda03298de395cf6217486971e6529f92da8da79Miao Wang    pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2)));
1532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    return res;
1542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  }
1552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang};
1562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal
158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen
160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_GEOMETRY_SSE_H
162