1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library
2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra.
3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//
4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//
7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla
8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed
9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_GENERIC_PACKET_MATH_H
12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_GENERIC_PACKET_MATH_H
13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen {
15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal {
17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal
19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  * \file GenericPacketMath.h
20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  *
21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  * Default implementation for types not supported by the vectorization.
22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  * In practice these functions are provided to make easier the writing
23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  * of generic vectorized code.
24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  */
25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_DEBUG_ALIGNED_LOAD
27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_DEBUG_ALIGNED_LOAD
28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif
29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_DEBUG_UNALIGNED_LOAD
31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_DEBUG_UNALIGNED_LOAD
32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif
33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_DEBUG_ALIGNED_STORE
35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_DEBUG_ALIGNED_STORE
36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif
37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_DEBUG_UNALIGNED_STORE
39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_DEBUG_UNALIGNED_STORE
40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif
41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct default_packet_traits
43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  enum {
45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasAdd    = 1,
46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasSub    = 1,
47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasMul    = 1,
48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasNegate = 1,
49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasAbs    = 1,
50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasAbs2   = 1,
51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasMin    = 1,
52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasMax    = 1,
53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasConj   = 1,
54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasSetLinear = 1,
55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasDiv    = 0,
57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasSqrt   = 0,
58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasExp    = 0,
59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasLog    = 0,
60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasPow    = 0,
61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasSin    = 0,
63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasCos    = 0,
64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasTan    = 0,
65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasASin   = 0,
66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasACos   = 0,
67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasATan   = 0
68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  };
69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename T> struct packet_traits : default_packet_traits
72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  typedef T type;
74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  enum {
75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    Vectorizable = 0,
76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    size = 1,
77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    AlignedOnScalar = 0
78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  };
79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  enum {
80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasAdd    = 0,
81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasSub    = 0,
82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasMul    = 0,
83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasNegate = 0,
84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasAbs    = 0,
85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasAbs2   = 0,
86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasMin    = 0,
87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasMax    = 0,
88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasConj   = 0,
89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    HasSetLinear = 0
90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  };
91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a + b (coeff-wise) */
94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpadd(const Packet& a,
96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        const Packet& b) { return a+b; }
97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a - b (coeff-wise) */
99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpsub(const Packet& a,
101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        const Packet& b) { return a-b; }
102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns -a (coeff-wise) */
104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpnegate(const Packet& a) { return -a; }
106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns conj(a) (coeff-wise) */
108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
1097faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandezpconj(const Packet& a) { return numext::conj(a); }
110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a * b (coeff-wise) */
112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpmul(const Packet& a,
114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        const Packet& b) { return a*b; }
115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a / b (coeff-wise) */
117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpdiv(const Packet& a,
119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        const Packet& b) { return a/b; }
120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the min of \a a and \a b  (coeff-wise) */
122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpmin(const Packet& a,
124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        const Packet& b) { using std::min; return (min)(a, b); }
125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the max of \a a and \a b  (coeff-wise) */
127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpmax(const Packet& a,
129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        const Packet& b) { using std::max; return (max)(a, b); }
130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the absolute value of \a a */
132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
1337faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandezpabs(const Packet& a) { using std::abs; return abs(a); }
134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the bitwise and of \a a and \a b */
136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpand(const Packet& a, const Packet& b) { return a & b; }
138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the bitwise or of \a a and \a b */
140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpor(const Packet& a, const Packet& b) { return a | b; }
142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the bitwise xor of \a a and \a b */
144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpxor(const Packet& a, const Packet& b) { return a ^ b; }
146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the bitwise andnot of \a a and \a b */
148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpandnot(const Packet& a, const Packet& b) { return a & (!b); }
150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpload(const typename unpacket_traits<Packet>::type* from) { return *from; }
154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a packet version of \a *from, (un-aligned load) */
156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
1597faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez/** \internal \returns a packet with elements of \a *from duplicated.
1607faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and
1617faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
1627faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  * Currently, this function is only used for scalar * complex products.
1637faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez */
164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpset1(const typename unpacket_traits<Packet>::type& a) { return a; }
170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar> inline typename packet_traits<Scalar>::type
173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathplset(const Scalar& a) { return a; }
174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ (*to) = from; }
178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal copy the packet \a from to \a *to, (un-aligned store) */
180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ (*to) = from; }
182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal tries to do cache prefetching of \a addr */
184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar> inline void prefetch(const Scalar* addr)
185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#if !defined(_MSC_VER)
187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath__builtin_prefetch(addr);
188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif
189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the first element of a packet */
192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a; }
194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpreduxp(const Packet* vecs) { return vecs[0]; }
198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the sum of the elements of \a a*/
200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a; }
202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the product of the elements of \a a*/
204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a; }
206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the min of the elements of \a a*/
208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a; }
210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the max of the elements of \a a*/
212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a; }
214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the reversed elements of \a a*/
216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet preverse(const Packet& a)
217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a; }
218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet pcplxflip(const Packet& a)
2227faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez{
2237faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  // FIXME: uncomment the following in case we drop the internal imag and real functions.
2247faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez//   using std::imag;
2257faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez//   using std::real;
2267faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  return Packet(imag(a),real(a));
2277faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez}
228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/**************************
230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* Special math functions
231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath***************************/
232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the sine of \a a (coeff-wise) */
234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
2357faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket psin(const Packet& a) { using std::sin; return sin(a); }
236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the cosine of \a a (coeff-wise) */
238c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
2397faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket pcos(const Packet& a) { using std::cos; return cos(a); }
240c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
241c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the tan of \a a (coeff-wise) */
242c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
2437faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket ptan(const Packet& a) { using std::tan; return tan(a); }
244c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
245c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the arc sine of \a a (coeff-wise) */
246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
2477faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket pasin(const Packet& a) { using std::asin; return asin(a); }
248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
249c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the arc cosine of \a a (coeff-wise) */
250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
2517faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket pacos(const Packet& a) { using std::acos; return acos(a); }
252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
253c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the exp of \a a (coeff-wise) */
254c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
2557faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket pexp(const Packet& a) { using std::exp; return exp(a); }
256c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
257c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the log of \a a (coeff-wise) */
258c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
2597faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket plog(const Packet& a) { using std::log; return log(a); }
260c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
261c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the square-root of \a a (coeff-wise) */
262c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
2637faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
264c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
265c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/***************************************************************************
266c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* The following functions might not have to be overwritten for vectorized types
267c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath***************************************************************************/
268c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
269c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
270c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
271c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet>
272c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
273c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
274c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  pstore(to, pset1<Packet>(a));
275c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
276c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
277c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a * b + c (coeff-wise) */
278c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet
279c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpmadd(const Packet&  a,
280c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath         const Packet&  b,
281c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath         const Packet&  c)
282c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return padd(pmul(a, b),c); }
283c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
284c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a packet version of \a *from.
285c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
286c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet, int LoadMode>
287c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
288c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
289c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  if(LoadMode == Aligned)
290c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    return pload<Packet>(from);
291c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  else
292c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    return ploadu<Packet>(from);
293c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
294c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
295c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal copy the packet \a from to \a *to.
296c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
297c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar, typename Packet, int LoadMode>
298c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline void pstoret(Scalar* to, const Packet& from)
299c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
300c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  if(LoadMode == Aligned)
301c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    pstore(to, from);
302c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  else
303c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    pstoreu(to, from);
304c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
305c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
306c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal default implementation of palign() allowing partial specialization */
307c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int Offset,typename PacketType>
308c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct palign_impl
309c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
310c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // by default data are aligned, so there is nothing to be done :)
311c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  static inline void run(PacketType&, const PacketType&) {}
312c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
313c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
3147faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
3157faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  * of \a first and \a Offset first elements of \a second.
3167faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  *
3177faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  * This function is currently only used to optimize matrix-vector products on unligned matrices.
3187faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
3197faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  * at the position \a Offset. For instance, for packets of 4 elements, we have:
3207faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  *  Input:
3217faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  *  - first = {f0,f1,f2,f3}
3227faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  *  - second = {s0,s1,s2,s3}
3237faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  * Output:
3247faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  *   - if Offset==0 then {f0,f1,f2,f3}
3257faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  *   - if Offset==1 then {f1,f2,f3,s0}
3267faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  *   - if Offset==2 then {f2,f3,s0,s1}
3277faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  *   - if Offset==3 then {f3,s0,s1,s3}
3287faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez  */
329c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int Offset,typename PacketType>
330c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline void palign(PacketType& first, const PacketType& second)
331c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
332c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  palign_impl<Offset,PacketType>::run(first,second);
333c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
334c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
335c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/***************************************************************************
336c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* Fast complex products (GCC generates a function call which is very slow)
337c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath***************************************************************************/
338c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
339c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
340c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
341c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
342c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
343c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
344c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
345c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal
346c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
347c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen
348c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
349c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_GENERIC_PACKET_MATH_H
350c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
351