1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>
5// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
12#define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
13
14namespace Eigen {
15
16namespace internal {
17
18/** \internal \returns the arcsin of \a a (coeff-wise) */
19template<typename Packet> inline static Packet pasin(Packet a) { return std::asin(a); }
20
21#ifdef EIGEN_VECTORIZE_SSE
22
23template<> EIGEN_DONT_INLINE Packet4f pasin(Packet4f x)
24{
25  _EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
26  _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5);
27  _EIGEN_DECLARE_CONST_Packet4f(3half, 1.5);
28
29  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
30
31  _EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654);
32  _EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654*0.5);
33
34  _EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2);
35  _EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2);
36  _EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2);
37  _EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2);
38  _EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1);
39
40  Packet4f a = pabs(x);//got the absolute value
41
42  Packet4f sign_bit= _mm_and_ps(x, p4f_sign_mask);//extracted the sign bit
43
44  Packet4f z1,z2;//will need them during computation
45
46
47//will compute the two branches for asin
48//so first compare with half
49
50  Packet4f branch_mask= _mm_cmpgt_ps(a, p4f_half);//this is to select which branch to take
51//both will be taken, and finally results will be merged
52//the branch for values >0.5
53
54    {
55//the core series expansion
56    z1=pmadd(p4f_minus_half,a,p4f_half);
57    Packet4f x1=psqrt(z1);
58    Packet4f s1=pmadd(p4f_asin1, z1, p4f_asin2);
59    Packet4f s2=pmadd(s1, z1, p4f_asin3);
60    Packet4f s3=pmadd(s2,z1, p4f_asin4);
61    Packet4f s4=pmadd(s3,z1, p4f_asin5);
62    Packet4f temp=pmul(s4,z1);//not really a madd but a mul by z so that the next term can be a madd
63    z1=pmadd(temp,x1,x1);
64    z1=padd(z1,z1);
65    z1=psub(p4f_pi_over_2,z1);
66    }
67
68    {
69//the core series expansion
70    Packet4f x2=a;
71    z2=pmul(x2,x2);
72    Packet4f s1=pmadd(p4f_asin1, z2, p4f_asin2);
73    Packet4f s2=pmadd(s1, z2, p4f_asin3);
74    Packet4f s3=pmadd(s2,z2, p4f_asin4);
75    Packet4f s4=pmadd(s3,z2, p4f_asin5);
76    Packet4f temp=pmul(s4,z2);//not really a madd but a mul by z so that the next term can be a madd
77    z2=pmadd(temp,x2,x2);
78    }
79
80/* select the correct result from the two branch evaluations */
81  z1  = _mm_and_ps(branch_mask, z1);
82  z2  = _mm_andnot_ps(branch_mask, z2);
83  Packet4f z  = _mm_or_ps(z1,z2);
84
85/* update the sign */
86  return _mm_xor_ps(z, sign_bit);
87}
88
89#endif // EIGEN_VECTORIZE_SSE
90
91} // end namespace internal
92
93} // end namespace Eigen
94
95#endif // EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
96