1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/* 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Copyright (c) 2011, Intel Corporation. All rights reserved. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Redistribution and use in source and binary forms, with or without modification, 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath are permitted provided that the following conditions are met: 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Redistributions of source code must retain the above copyright notice, this 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath list of conditions and the following disclaimer. 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Redistributions in binary form must reproduce the above copyright notice, 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath this list of conditions and the following disclaimer in the documentation 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath and/or other materials provided with the distribution. 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Neither the name of Intel Corporation nor the names of its contributors may 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath be used to endorse or promote products derived from this software without 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath specific prior written permission. 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ******************************************************************************** 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Content : Eigen bindings to Intel(R) MKL 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin() 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ******************************************************************************** 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath*/ 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_ASSIGN_VML_H 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_ASSIGN_VML_H 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Op> struct vml_call 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ enum { IsSupported = 0 }; }; 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Dst, typename Src, typename UnaryOp> 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathclass vml_assign_traits 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath private: 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath DstHasDirectAccess = Dst::Flags & DirectAccessBit, 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SrcHasDirectAccess = Src::Flags & DirectAccessBit, 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : int(Dst::RowsAtCompileTime), 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : int(Dst::MaxRowsAtCompileTime), 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MaxSizeAtCompileTime = Dst::SizeAtCompileTime, 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD, 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MayEnableVml = MightEnableVml && LargeEnough, 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MayLinearize = MayEnableVml && MightLinearize 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath public: 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Traversal = MayLinearize ? LinearVectorizedTraversal 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : MayEnableVml ? InnerVectorizedTraversal 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : DefaultTraversal 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling, 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal > 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct vml_assign_impl 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn> 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling> 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal> 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Derived1::Scalar Scalar; 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename Derived1::Index Index; 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src) 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // in case we want to (or have to) skip VML at runtime we can call: 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src); 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index innerSize = dst.innerSize(); 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Index outerSize = dst.outerSize(); 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index outer = 0; outer < outerSize; ++outer) { 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath &(src.nestedExpression().coeffRef(0, outer)); 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr ); 99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling> 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal> 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src) 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // in case we want to (or have to) skip VML at runtime we can call: 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src); 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() ); 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Macroses 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \ 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath template<typename Derived1, typename Derived2, typename UnaryOp> \ 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \ 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \ 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \ 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling) 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling) 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling) 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling) 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling) 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling) 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling) 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling) 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling) 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling) 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling) 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_MODE VML_HA 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#else 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_MODE VML_LA 141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ 144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \ 145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { IsSupported = 1 }; \ 146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \ 147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ 148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \ 149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ 153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \ 154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { IsSupported = 1 }; \ 155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \ 156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \ 159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ 163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \ 164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { IsSupported = 1 }; \ 165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \ 166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ 167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGENTYPE exponent = func.m_exponent; \ 168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ 169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \ 170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath (VMLTYPE*)dst, &vmlMode); \ 171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ 175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \ 176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double) 177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \ 179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \ 180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16) 181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \ 183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ 184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) 185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ 188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \ 189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double) 190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \ 192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \ 193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16) 194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \ 196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ 197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) 198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin) 201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin) 202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos) 203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos) 204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan) 205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs) 206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp) 207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln) 208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt) 209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr) 211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// The vm*powx functions are not avaibale in the windows version of MKL. 2137faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez#ifndef _WIN32 214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float) 215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double) 216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8) 217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16) 218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_ASSIGN_VML_H 225