1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com> 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the arcsin of \a a (coeff-wise) */ 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline static Packet pasin(Packet a) { return std::asin(a); } 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifdef EIGEN_VECTORIZE_SSE 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_DONT_INLINE Packet4f pasin(Packet4f x) 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5); 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _EIGEN_DECLARE_CONST_Packet4f(3half, 1.5); 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000); 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654); 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654*0.5); 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2); 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2); 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2); 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2); 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath _EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1); 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f a = pabs(x);//got the absolute value 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f sign_bit= _mm_and_ps(x, p4f_sign_mask);//extracted the sign bit 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f z1,z2;//will need them during computation 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//will compute the two branches for asin 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//so first compare with half 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f branch_mask= _mm_cmpgt_ps(a, p4f_half);//this is to select which branch to take 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//both will be taken, and finally results will be merged 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//the branch for values >0.5 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//the core series expansion 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath z1=pmadd(p4f_minus_half,a,p4f_half); 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f x1=psqrt(z1); 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f s1=pmadd(p4f_asin1, z1, p4f_asin2); 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f s2=pmadd(s1, z1, p4f_asin3); 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f s3=pmadd(s2,z1, p4f_asin4); 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f s4=pmadd(s3,z1, p4f_asin5); 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f temp=pmul(s4,z1);//not really a madd but a mul by z so that the next term can be a madd 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath z1=pmadd(temp,x1,x1); 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath z1=padd(z1,z1); 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath z1=psub(p4f_pi_over_2,z1); 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//the core series expansion 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f x2=a; 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath z2=pmul(x2,x2); 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f s1=pmadd(p4f_asin1, z2, p4f_asin2); 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f s2=pmadd(s1, z2, p4f_asin3); 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f s3=pmadd(s2,z2, p4f_asin4); 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f s4=pmadd(s3,z2, p4f_asin5); 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f temp=pmul(s4,z2);//not really a madd but a mul by z so that the next term can be a madd 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath z2=pmadd(temp,x2,x2); 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* select the correct result from the two branch evaluations */ 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath z1 = _mm_and_ps(branch_mask, z1); 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath z2 = _mm_andnot_ps(branch_mask, z2); 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f z = _mm_or_ps(z1,z2); 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* update the sign */ 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return _mm_xor_ps(z, sign_bit); 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_VECTORIZE_SSE 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H 96