1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_GENERIC_PACKET_MATH_H 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_GENERIC_PACKET_MATH_H 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * \file GenericPacketMath.h 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Default implementation for types not supported by the vectorization. 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * In practice these functions are provided to make easier the writing 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * of generic vectorized code. 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */ 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_DEBUG_ALIGNED_LOAD 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_DEBUG_ALIGNED_LOAD 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_DEBUG_UNALIGNED_LOAD 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_DEBUG_UNALIGNED_LOAD 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_DEBUG_ALIGNED_STORE 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_DEBUG_ALIGNED_STORE 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_DEBUG_UNALIGNED_STORE 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_DEBUG_UNALIGNED_STORE 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct default_packet_traits 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAdd = 1, 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSub = 1, 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMul = 1, 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasNegate = 1, 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAbs = 1, 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAbs2 = 1, 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMin = 1, 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMax = 1, 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasConj = 1, 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSetLinear = 1, 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasDiv = 0, 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSqrt = 0, 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasExp = 0, 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasLog = 0, 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasPow = 0, 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSin = 0, 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasCos = 0, 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasTan = 0, 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasASin = 0, 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasACos = 0, 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasATan = 0 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename T> struct packet_traits : default_packet_traits 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef T type; 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Vectorizable = 0, 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath size = 1, 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath AlignedOnScalar = 0 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAdd = 0, 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSub = 0, 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMul = 0, 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasNegate = 0, 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAbs = 0, 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasAbs2 = 0, 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMin = 0, 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasMax = 0, 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasConj = 0, 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSetLinear = 0 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a + b (coeff-wise) */ 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpadd(const Packet& a, 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Packet& b) { return a+b; } 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a - b (coeff-wise) */ 99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpsub(const Packet& a, 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Packet& b) { return a-b; } 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns -a (coeff-wise) */ 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpnegate(const Packet& a) { return -a; } 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns conj(a) (coeff-wise) */ 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 1097faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandezpconj(const Packet& a) { return numext::conj(a); } 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a * b (coeff-wise) */ 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpmul(const Packet& a, 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Packet& b) { return a*b; } 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a / b (coeff-wise) */ 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpdiv(const Packet& a, 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Packet& b) { return a/b; } 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the min of \a a and \a b (coeff-wise) */ 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpmin(const Packet& a, 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Packet& b) { using std::min; return (min)(a, b); } 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the max of \a a and \a b (coeff-wise) */ 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpmax(const Packet& a, 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Packet& b) { using std::max; return (max)(a, b); } 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the absolute value of \a a */ 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 1337faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandezpabs(const Packet& a) { using std::abs; return abs(a); } 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the bitwise and of \a a and \a b */ 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpand(const Packet& a, const Packet& b) { return a & b; } 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the bitwise or of \a a and \a b */ 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpor(const Packet& a, const Packet& b) { return a | b; } 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the bitwise xor of \a a and \a b */ 144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpxor(const Packet& a, const Packet& b) { return a ^ b; } 146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the bitwise andnot of \a a and \a b */ 148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpandnot(const Packet& a, const Packet& b) { return a & (!b); } 150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */ 152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpload(const typename unpacket_traits<Packet>::type* from) { return *from; } 154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a packet version of \a *from, (un-aligned load) */ 156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathploadu(const typename unpacket_traits<Packet>::type* from) { return *from; } 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 1597faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez/** \internal \returns a packet with elements of \a *from duplicated. 1607faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and 1617faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]} 1627faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * Currently, this function is only used for scalar * complex products. 1637faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez */ 164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; } 166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ 168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpset1(const typename unpacket_traits<Packet>::type& a) { return a; } 170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ 172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar> inline typename packet_traits<Scalar>::type 173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathplset(const Scalar& a) { return a; } 174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ 176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from) 177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ (*to) = from; } 178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal copy the packet \a from to \a *to, (un-aligned store) */ 180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from) 181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ (*to) = from; } 182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal tries to do cache prefetching of \a addr */ 184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar> inline void prefetch(const Scalar* addr) 185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#if !defined(_MSC_VER) 187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath__builtin_prefetch(addr); 188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the first element of a packet */ 192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a) 193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a; } 194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */ 196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpreduxp(const Packet* vecs) { return vecs[0]; } 198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the sum of the elements of \a a*/ 200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a) 201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a; } 202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the product of the elements of \a a*/ 204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a) 205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a; } 206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the min of the elements of \a a*/ 208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) 209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a; } 210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the max of the elements of \a a*/ 212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) 213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a; } 214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the reversed elements of \a a*/ 216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet preverse(const Packet& a) 217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a; } 218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */ 221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet pcplxflip(const Packet& a) 2227faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez{ 2237faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez // FIXME: uncomment the following in case we drop the internal imag and real functions. 2247faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez// using std::imag; 2257faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez// using std::real; 2267faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez return Packet(imag(a),real(a)); 2277faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez} 228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/************************** 230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* Special math functions 231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath***************************/ 232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the sine of \a a (coeff-wise) */ 234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 2357faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket psin(const Packet& a) { using std::sin; return sin(a); } 236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the cosine of \a a (coeff-wise) */ 238c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 2397faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket pcos(const Packet& a) { using std::cos; return cos(a); } 240c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 241c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the tan of \a a (coeff-wise) */ 242c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 2437faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket ptan(const Packet& a) { using std::tan; return tan(a); } 244c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 245c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the arc sine of \a a (coeff-wise) */ 246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 2477faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket pasin(const Packet& a) { using std::asin; return asin(a); } 248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 249c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the arc cosine of \a a (coeff-wise) */ 250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 2517faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket pacos(const Packet& a) { using std::acos; return acos(a); } 252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 253c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the exp of \a a (coeff-wise) */ 254c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 2557faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket pexp(const Packet& a) { using std::exp; return exp(a); } 256c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 257c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the log of \a a (coeff-wise) */ 258c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 2597faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket plog(const Packet& a) { using std::log; return log(a); } 260c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 261c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns the square-root of \a a (coeff-wise) */ 262c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 2637faaa9f3f0df9d23790277834d426c3d992ac3baCarlos HernandezPacket psqrt(const Packet& a) { using std::sqrt; return sqrt(a); } 264c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 265c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*************************************************************************** 266c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* The following functions might not have to be overwritten for vectorized types 267c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath***************************************************************************/ 268c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 269c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */ 270c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type) 271c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> 272c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a) 273c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 274c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath pstore(to, pset1<Packet>(a)); 275c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 276c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 277c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a * b + c (coeff-wise) */ 278c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet> inline Packet 279c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathpmadd(const Packet& a, 280c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Packet& b, 281c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const Packet& c) 282c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return padd(pmul(a, b),c); } 283c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 284c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal \returns a packet version of \a *from. 285c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */ 286c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Packet, int LoadMode> 287c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline Packet ploadt(const typename unpacket_traits<Packet>::type* from) 288c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 289c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(LoadMode == Aligned) 290c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pload<Packet>(from); 291c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 292c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return ploadu<Packet>(from); 293c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 294c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 295c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal copy the packet \a from to \a *to. 296c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */ 297c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar, typename Packet, int LoadMode> 298c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline void pstoret(Scalar* to, const Packet& from) 299c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 300c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(LoadMode == Aligned) 301c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath pstore(to, from); 302c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 303c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath pstoreu(to, from); 304c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 305c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 306c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal default implementation of palign() allowing partial specialization */ 307c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int Offset,typename PacketType> 308c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct palign_impl 309c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 310c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // by default data are aligned, so there is nothing to be done :) 311c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static inline void run(PacketType&, const PacketType&) {} 312c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 313c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 3147faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements 3157faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * of \a first and \a Offset first elements of \a second. 3167faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * 3177faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * This function is currently only used to optimize matrix-vector products on unligned matrices. 3187faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * It takes 2 packets that represent a contiguous memory array, and returns a packet starting 3197faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * at the position \a Offset. For instance, for packets of 4 elements, we have: 3207faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * Input: 3217faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * - first = {f0,f1,f2,f3} 3227faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * - second = {s0,s1,s2,s3} 3237faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * Output: 3247faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * - if Offset==0 then {f0,f1,f2,f3} 3257faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * - if Offset==1 then {f1,f2,f3,s0} 3267faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * - if Offset==2 then {f2,f3,s0,s1} 3277faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez * - if Offset==3 then {f3,s0,s1,s3} 3287faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez */ 329c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int Offset,typename PacketType> 330c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline void palign(PacketType& first, const PacketType& second) 331c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 332c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath palign_impl<Offset,PacketType>::run(first,second); 333c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 334c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 335c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*************************************************************************** 336c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath* Fast complex products (GCC generates a function call which is very slow) 337c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath***************************************************************************/ 338c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 339c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) 340c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } 341c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 342c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) 343c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } 344c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 345c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 346c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 347c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 348c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 349c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_GENERIC_PACKET_MATH_H 350c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 351