1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*
2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Copyright (c) 2011, Intel Corporation. All rights reserved.
3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Redistribution and use in source and binary forms, with or without modification,
5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath are permitted provided that the following conditions are met:
6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Redistributions of source code must retain the above copyright notice, this
8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   list of conditions and the following disclaimer.
9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Redistributions in binary form must reproduce the above copyright notice,
10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   this list of conditions and the following disclaimer in the documentation
11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   and/or other materials provided with the distribution.
12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Neither the name of Intel Corporation nor the names of its contributors may
13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   be used to endorse or promote products derived from this software without
14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath   specific prior written permission.
15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ********************************************************************************
28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath *   Content : Eigen bindings to Intel(R) MKL
29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath *   MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ********************************************************************************
31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath*/
32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_ASSIGN_VML_H
34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_ASSIGN_VML_H
35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen {
37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal {
39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Op> struct vml_call
41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ enum { IsSupported = 0 }; };
42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Dst, typename Src, typename UnaryOp>
44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathclass vml_assign_traits
45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  private:
47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    enum {
48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      DstHasDirectAccess = Dst::Flags & DirectAccessBit,
49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SrcHasDirectAccess = Src::Flags & DirectAccessBit,
50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                : int(Dst::RowsAtCompileTime),
55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      InnerMaxSize  = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                    : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                    : int(Dst::MaxRowsAtCompileTime),
58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      MightEnableVml =  vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                     && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      MayEnableVml = MightEnableVml && LargeEnough,
66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      MayLinearize = MayEnableVml && MightLinearize
67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    };
68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  public:
69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    enum {
70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      Traversal = MayLinearize ? LinearVectorizedTraversal
71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                : MayEnableVml ? InnerVectorizedTraversal
72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                : DefaultTraversal
73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    };
74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath         int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct vml_assign_impl
79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  typedef typename Derived1::Scalar Scalar;
87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  typedef typename Derived1::Index Index;
88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // in case we want to (or have to) skip VML at runtime we can call:
91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    const Index innerSize = dst.innerSize();
93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    const Index outerSize = dst.outerSize();
94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    for(Index outer = 0; outer < outerSize; ++outer) {
95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      const Scalar *src_ptr = src.IsRowMajor ?  &(src.nestedExpression().coeffRef(outer,0)) :
96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                                                &(src.nestedExpression().coeffRef(0, outer));
97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // in case we want to (or have to) skip VML at runtime we can call:
109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Macroses
115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  template<typename Derived1, typename Derived2, typename UnaryOp> \
118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized>  {  \
119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    } \
122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  };
123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define  EIGEN_MKL_VML_MODE VML_HA
139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#else
140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define  EIGEN_MKL_VML_MODE VML_LA
141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif
142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)     \
144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    enum { IsSupported = 1 };                                                    \
146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/,        \
147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                            int size, const EIGENTYPE* src, EIGENTYPE* dst) {    \
148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst);                           \
149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }                                                                            \
150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  };
151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)  \
153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    enum { IsSupported = 1 };                                                    \
155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/,        \
156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                            int size, const EIGENTYPE* src, EIGENTYPE* dst) {    \
157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE;                                    \
158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode);                  \
159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }                                                                            \
160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  };
161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)       \
163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    enum { IsSupported = 1 };                                                    \
165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func,        \
166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                          int size, const EIGENTYPE* src, EIGENTYPE* dst) {      \
167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      EIGENTYPE exponent = func.m_exponent;                                      \
168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE;                                    \
169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent,               \
170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                        (VMLTYPE*)dst, &vmlMode);                                \
171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }                                                                            \
172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  };
173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP)                   \
175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float)             \
176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)                \
179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8)   \
180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP)                        \
183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP)                         \
184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP)                \
188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float)         \
189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)             \
192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8)  \
193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP)                     \
196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP)                      \
197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin,  Sin)
201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos,  Cos)
203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan,  Tan)
205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs,  Abs)
206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp,  Exp)
207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log,  Ln)
208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// The vm*powx functions are not avaibale in the windows version of MKL.
2137faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez#ifndef _WIN32
214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif
219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal
221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen
223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_ASSIGN_VML_H
225