1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 42b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Copyright (C) 2008-2016 Konstantinos Margaritis <markos@freevec.org> 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_PACKET_MATH_ALTIVEC_H 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_PACKET_MATH_ALTIVEC_H 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD 222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD 262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef __vector float Packet4f; 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef __vector int Packet4i; 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef __vector unsigned int Packet4ui; 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef __vector __bool int Packet4bi; 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef __vector short int Packet8i; 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef __vector unsigned char Packet16uc; 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// We don't want to write the same code all the time, but we need to reuse the constants 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// and it doesn't really work to declare them global, so we define macros instead 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \ 452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X)) 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \ 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4i p4i_##NAME = vec_splat_s32(X) 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f p4f_##NAME = pset1<Packet4f>(X) 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4i p4i_##NAME = pset1<Packet4i>(X) 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \ 572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d p2d_##NAME = pset1<Packet2d>(X) 582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \ 602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2l p2l_##NAME = pset1<Packet2l>(X) 612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ 632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X)) 642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define DST_CHAN 1 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride)) 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// These constants are endian-agnostic 702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0} 712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,} 722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1} 732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16} 742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1} 752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000} 762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifndef __VSX__ 772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0} 782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 }; 812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 }; 822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 }; 842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 }; 852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Mask alignment 872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef __PPC64__ 882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0 892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#define _EIGEN_MASK_ALIGNMENT 0xfffffff0 912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT) 942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Handle endianness properly while loading constants 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Define global static constants: 972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef _BIG_ENDIAN 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstatic Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); 992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef __VSX__ 1002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; 1012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 1022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; 1032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; 1042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; 1052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 1062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_FORWARD = p16uc_REVERSE32; 1072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; 1082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; 1092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; 1102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 0), (Packet16uc)p4i_ZERO, 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; 1112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif // _BIG_ENDIAN 1122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; 1142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; 1152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_TRANSPOSE64_HI = p16uc_PSET64_HI + p16uc_HALF64_0_16; //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23}; 1162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_TRANSPOSE64_LO = p16uc_PSET64_LO + p16uc_HALF64_0_16; //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31}; 1172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 }; 1192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef _BIG_ENDIAN 1212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; 1222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 1232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; 1242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif // _BIG_ENDIAN 1252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC 1272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang #define EIGEN_PPC_PREFETCH(ADDR) __builtin_prefetch(ADDR); 1282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 1292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang #define EIGEN_PPC_PREFETCH(ADDR) asm( " dcbt [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); 1302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct packet_traits<float> : default_packet_traits 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef Packet4f type; 1352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef Packet4f half; 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Vectorizable = 1, 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath AlignedOnScalar = 1, 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath size=4, 1402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasHalfPacket = 1, 1412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasAdd = 1, 1432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasSub = 1, 1442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasMul = 1, 1452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasDiv = 1, 1462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasMin = 1, 1472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasMax = 1, 1482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasAbs = 1, 149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasSin = 0, 150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasCos = 0, 151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath HasLog = 0, 1522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasExp = 1, 1532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef __VSX__ 1542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasSqrt = 1, 1552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#if !EIGEN_COMP_CLANG 1562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasRsqrt = 1, 1572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 1582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasRsqrt = 0, 1592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 1602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 1612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasSqrt = 0, 1622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasRsqrt = 0, 1632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 1642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasRound = 1, 1652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasFloor = 1, 1662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasCeil = 1, 1672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasNegate = 1, 1682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasBlend = 1 169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> struct packet_traits<int> : default_packet_traits 172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef Packet4i type; 1742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef Packet4i half; 175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath enum { 176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Vectorizable = 1, 177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath AlignedOnScalar = 1, 1782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size = 4, 1792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasHalfPacket = 0, 1802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasAdd = 1, 1822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasSub = 1, 1832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasMul = 1, 1842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasDiv = 0, 1852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasBlend = 1 186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath }; 187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 1892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; 1912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; 1922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wanginline std::ostream & operator <<(std::ostream & s, const Packet16uc & v) 1942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 1952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang union { 1962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet16uc v; 1972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang unsigned char n[16]; 1982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } vt; 1992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vt.v = v; 2002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (int i=0; i< 16; i++) 2012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang s << (int)vt.n[i] << ", "; 2022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return s; 2032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 2042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline std::ostream & operator <<(std::ostream & s, const Packet4f & v) 206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath union { 208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f v; 209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath float n[4]; 210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } vt; 211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vt.v = v; 212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; 213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return s; 214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline std::ostream & operator <<(std::ostream & s, const Packet4i & v) 217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath union { 219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4i v; 220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int n[4]; 221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } vt; 222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vt.v = v; 223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; 224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return s; 225c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 226c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline std::ostream & operator <<(std::ostream & s, const Packet4ui & v) 228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath union { 230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4ui v; 231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath unsigned int n[4]; 232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } vt; 233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vt.v = v; 234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; 235c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return s; 236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 2382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Need to define them first or we get specialization after instantiation errors 2392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) 240c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 2412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_ALIGNED_LOAD 2422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef __VSX__ 2432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_vsx_ld(0, from); 2442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 2452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_ld(0, from); 2462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 247c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 248c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 2492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) 2502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 2512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_ALIGNED_LOAD 2522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef __VSX__ 2532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_vsx_ld(0, from); 2542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 2552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_ld(0, from); 2562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 257c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 258c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 2592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) 2602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 2612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_ALIGNED_STORE 2622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef __VSX__ 2632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vec_vsx_st(from, 0, to); 2642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 2652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vec_st(from, 0, to); 2662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 2672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 268c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 2692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) 2702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 2712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_ALIGNED_STORE 2722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef __VSX__ 2732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vec_vsx_st(from, 0, to); 2742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 2752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vec_st(from, 0, to); 2762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 2772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 278c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 2792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { 2802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4f v = {from, from, from, from}; 2812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return v; 2822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 283c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 2842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { 2852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4i v = {from, from, from, from}; 2862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return v; 2872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 2882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void 2892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangpbroadcast4<Packet4f>(const float *a, 2902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3) 2912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 2922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a3 = pload<Packet4f>(a); 2932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a0 = vec_splat(a3, 0); 2942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a1 = vec_splat(a3, 1); 2952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a2 = vec_splat(a3, 2); 2962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a3 = vec_splat(a3, 3); 2972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 2982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void 2992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangpbroadcast4<Packet4i>(const int *a, 3002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3) 3012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a3 = pload<Packet4i>(a); 3032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a0 = vec_splat(a3, 0); 3042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a1 = vec_splat(a3, 1); 3052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a2 = vec_splat(a3, 2); 3062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a3 = vec_splat(a3, 3); 3072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 3087faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez 3092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) 3102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float EIGEN_ALIGN16 af[4]; 3122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang af[0] = from[0*stride]; 3132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang af[1] = from[1*stride]; 3142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang af[2] = from[2*stride]; 3152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang af[3] = from[3*stride]; 3162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return pload<Packet4f>(af); 3172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 3182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride) 3192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang int EIGEN_ALIGN16 ai[4]; 3212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ai[0] = from[0*stride]; 3222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ai[1] = from[1*stride]; 3232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ai[2] = from[2*stride]; 3242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ai[3] = from[3*stride]; 3252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return pload<Packet4i>(ai); 3262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 3272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride) 328c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 3292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float EIGEN_ALIGN16 af[4]; 3302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang pstore<float>(af, from); 3312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang to[0*stride] = af[0]; 3322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang to[1*stride] = af[1]; 3332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang to[2*stride] = af[2]; 3342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang to[3*stride] = af[3]; 3352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 3362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride) 3372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang int EIGEN_ALIGN16 ai[4]; 3392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang pstore<int>((int *)ai, from); 3402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang to[0*stride] = ai[0]; 3412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang to[1*stride] = ai[1]; 3422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang to[2*stride] = ai[2]; 3432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang to[3*stride] = ai[3]; 3442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 345c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 3462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return pset1<Packet4f>(a) + p4f_COUNTDOWN; } 3472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return pset1<Packet4i>(a) + p4i_COUNTDOWN; } 348c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 3492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return a + b; } 3502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return a + b; } 351c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 3522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return a - b; } 3532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return a - b; } 354c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 3552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return p4f_ZERO - a; } 3562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ZERO - a; } 357c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 3582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; } 3592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } 360c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 3612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_MZERO); } 3622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return a * b; } 363c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 364c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) 365c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 3662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifndef __VSX__ // VSX actually provides a div instruction 3672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4f t, y_0, y_1; 368c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 369c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Altivec does not offer a divide instruction, we have to do a reciprocal approximation 370c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath y_0 = vec_re(b); 371c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 372c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Do one Newton-Raphson iteration to get the needed accuracy 373c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath t = vec_nmsub(y_0, b, p4f_ONE); 374c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath y_1 = vec_madd(y_0, t, y_0); 375c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 3762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_madd(a, y_1, p4f_MZERO); 3772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 3782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_div(a, b); 3792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 380c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 381c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 382c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/) 383c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ eigen_assert(false && "packet integer division are not supported by AltiVec"); 384c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pset1<Packet4i>(0); 385c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 386c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 387c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for some weird raisons, it has to be overloaded for packet of integers 3882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); } 3892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; } 390c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 391c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); } 392c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } 393c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 394c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); } 395c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } 396c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 397c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } 398c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); } 399c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 400c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); } 401c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); } 402c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 403c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); } 404c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); } 405c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 406c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); } 407c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); } 408c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 4092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return vec_round(a); } 4102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); } 4112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); } 412c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 4132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef _BIG_ENDIAN 414c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) 415c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 416c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_DEBUG_ALIGNED_LOAD 417c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet16uc MSQ, LSQ; 418c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet16uc mask; 419c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword 420c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword 421c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath mask = vec_lvsl(0, from); // create the permute mask 422c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return (Packet4f) vec_perm(MSQ, LSQ, mask); // align the data 423c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 424c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 425c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) 426c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 427c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_DEBUG_ALIGNED_LOAD 428c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html 429c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet16uc MSQ, LSQ; 430c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet16uc mask; 431c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword 432c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword 433c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath mask = vec_lvsl(0, from); // create the permute mask 434c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data 435c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 4362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 4372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// We also need ot redefine little endian loading of Packet4i/Packet4f using VSX 4382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) 4392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_UNALIGNED_LOAD 4412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from)); 4422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 4432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) 4442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_UNALIGNED_LOAD 4462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from)); 4472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 4482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 449c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 450c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) 451c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 452c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f p; 4532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4f>(from); 4542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang else p = ploadu<Packet4f>(from); 4552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_perm(p, p, p16uc_DUPLICATE32_HI); 456c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 457c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from) 458c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 459c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4i p; 4602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet4i>(from); 4612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang else p = ploadu<Packet4i>(from); 4622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_perm(p, p, p16uc_DUPLICATE32_HI); 463c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 464c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 4652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef _BIG_ENDIAN 466c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) 467c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 468c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_DEBUG_UNALIGNED_STORE 469c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html 470c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Warning: not thread safe! 471c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet16uc MSQ, LSQ, edges; 472c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet16uc edgeAlign, align; 473c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 474c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MSQ = vec_ld(0, (unsigned char *)to); // most significant quadword 475c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LSQ = vec_ld(15, (unsigned char *)to); // least significant quadword 476c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath edgeAlign = vec_lvsl(0, to); // permute map to extract edges 477c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath edges=vec_perm(LSQ,MSQ,edgeAlign); // extract the edges 478c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath align = vec_lvsr( 0, to ); // permute map to misalign data 479c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MSQ = vec_perm(edges,(Packet16uc)from,align); // misalign the data (MSQ) 480c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LSQ = vec_perm((Packet16uc)from,edges,align); // misalign the data (LSQ) 481c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first 482c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part 483c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 484c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) 485c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 486c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_DEBUG_UNALIGNED_STORE 487c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html 488c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Warning: not thread safe! 489c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet16uc MSQ, LSQ, edges; 490c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet16uc edgeAlign, align; 491c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 492c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MSQ = vec_ld(0, (unsigned char *)to); // most significant quadword 493c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LSQ = vec_ld(15, (unsigned char *)to); // least significant quadword 494c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath edgeAlign = vec_lvsl(0, to); // permute map to extract edges 495c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath edges=vec_perm(LSQ, MSQ, edgeAlign); // extract the edges 496c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath align = vec_lvsr( 0, to ); // permute map to misalign data 497c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MSQ = vec_perm(edges, (Packet16uc) from, align); // misalign the data (MSQ) 498c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LSQ = vec_perm((Packet16uc) from, edges, align); // misalign the data (LSQ) 499c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first 500c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part 501c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 5022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 5032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// We also need ot redefine little endian loading of Packet4i/Packet4f using VSX 5042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) 5052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 5062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_ALIGNED_STORE 5072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vec_vsx_st(from, (long)to & 15, (int*) _EIGEN_ALIGNED_PTR(to)); 5082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 5092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) 5102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 5112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_ALIGNED_STORE 5122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vec_vsx_st(from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to)); 5132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 5142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 515c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 5162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_PPC_PREFETCH(addr); } 5172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_PPC_PREFETCH(addr); } 518c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 5192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; } 5202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; } 521c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 5222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) 5232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 5242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return reinterpret_cast<Packet4f>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32)); 5252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 5262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) 5272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 5282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32)); } 529c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 530c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); } 531c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); } 532c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 533c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) 534c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 535c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f b, sum; 5362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang b = vec_sld(a, a, 8); 5372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sum = a + b; 5382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang b = vec_sld(sum, sum, 4); 5392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sum += b; 540c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pfirst(sum); 541c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 542c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 543c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs) 544c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 545c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f v[4], sum[4]; 546c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 547c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // It's easier and faster to transpose then add as columns 548c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation 549c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Do the transpose, first set of moves 550c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v[0] = vec_mergeh(vecs[0], vecs[2]); 551c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v[1] = vec_mergel(vecs[0], vecs[2]); 552c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v[2] = vec_mergeh(vecs[1], vecs[3]); 553c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v[3] = vec_mergel(vecs[1], vecs[3]); 554c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Get the resulting vectors 555c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum[0] = vec_mergeh(v[0], v[2]); 556c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum[1] = vec_mergel(v[0], v[2]); 557c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum[2] = vec_mergeh(v[1], v[3]); 558c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum[3] = vec_mergel(v[1], v[3]); 559c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 560c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Now do the summation: 561c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Lines 0+1 5622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sum[0] = sum[0] + sum[1]; 563c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Lines 2+3 5642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sum[1] = sum[2] + sum[3]; 565c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Add the results 5662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sum[0] = sum[0] + sum[1]; 567c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 568c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return sum[0]; 569c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 570c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 571c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) 572c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 573c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4i sum; 574c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum = vec_sums(a, p4i_ZERO); 5752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef _BIG_ENDIAN 576c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum = vec_sld(sum, p4i_ZERO, 12); 5772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 5782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sum = vec_sld(p4i_ZERO, sum, 4); 5792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 580c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pfirst(sum); 581c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 582c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 583c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs) 584c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 585c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4i v[4], sum[4]; 586c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 587c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // It's easier and faster to transpose then add as columns 588c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation 589c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Do the transpose, first set of moves 590c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v[0] = vec_mergeh(vecs[0], vecs[2]); 591c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v[1] = vec_mergel(vecs[0], vecs[2]); 592c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v[2] = vec_mergeh(vecs[1], vecs[3]); 593c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v[3] = vec_mergel(vecs[1], vecs[3]); 594c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Get the resulting vectors 595c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum[0] = vec_mergeh(v[0], v[2]); 596c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum[1] = vec_mergel(v[0], v[2]); 597c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum[2] = vec_mergeh(v[1], v[3]); 598c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath sum[3] = vec_mergel(v[1], v[3]); 599c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 600c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Now do the summation: 601c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Lines 0+1 6022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sum[0] = sum[0] + sum[1]; 603c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Lines 2+3 6042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sum[1] = sum[2] + sum[3]; 605c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Add the results 6062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sum[0] = sum[0] + sum[1]; 607c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 608c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return sum[0]; 609c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 610c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 611c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Other reduction functions: 612c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// mul 613c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) 614c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 615c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f prod; 6162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang prod = pmul(a, vec_sld(a, a, 8)); 6172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return pfirst(pmul(prod, vec_sld(prod, prod, 4))); 618c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 619c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 620c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a) 621c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 622c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_ALIGN16 int aux[4]; 623c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath pstore(aux, a); 624c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return aux[0] * aux[1] * aux[2] * aux[3]; 625c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 626c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 627c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// min 628c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) 629c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 630c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f b, res; 631c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath b = vec_min(a, vec_sld(a, a, 8)); 632c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = vec_min(b, vec_sld(b, b, 4)); 633c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pfirst(res); 634c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 635c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 636c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a) 637c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 638c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4i b, res; 639c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath b = vec_min(a, vec_sld(a, a, 8)); 640c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = vec_min(b, vec_sld(b, b, 4)); 641c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pfirst(res); 642c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 643c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 644c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// max 645c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) 646c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 647c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4f b, res; 648c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath b = vec_max(a, vec_sld(a, a, 8)); 649c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = vec_max(b, vec_sld(b, b, 4)); 650c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pfirst(res); 651c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 652c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 653c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a) 654c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 655c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet4i b, res; 656c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath b = vec_max(a, vec_sld(a, a, 8)); 657c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath res = vec_max(b, vec_sld(b, b, 4)); 658c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return pfirst(res); 659c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 660c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 661c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int Offset> 662c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct palign_impl<Offset,Packet4f> 663c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 664c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second) 665c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 6662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef _BIG_ENDIAN 6672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang switch (Offset % 4) { 6682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case 1: 6692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = vec_sld(first, second, 4); break; 6702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case 2: 6712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = vec_sld(first, second, 8); break; 6722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case 3: 6732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = vec_sld(first, second, 12); break; 6742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 6762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang switch (Offset % 4) { 6772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case 1: 6782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = vec_sld(second, first, 12); break; 6792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case 2: 6802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = vec_sld(second, first, 8); break; 6812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case 3: 6822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = vec_sld(second, first, 4); break; 6832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 685c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 686c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 687c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 688c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<int Offset> 689c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct palign_impl<Offset,Packet4i> 690c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 691c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second) 692c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 6932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef _BIG_ENDIAN 6942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang switch (Offset % 4) { 6952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case 1: 6962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = vec_sld(first, second, 4); break; 6972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case 2: 6982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = vec_sld(first, second, 8); break; 6992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case 3: 7002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = vec_sld(first, second, 12); break; 7012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 7032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang switch (Offset % 4) { 7042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case 1: 7052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = vec_sld(second, first, 12); break; 7062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case 2: 7072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = vec_sld(second, first, 8); break; 7082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case 3: 7092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = vec_sld(second, first, 4); break; 7102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 712c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 713c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 714c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 7152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangEIGEN_DEVICE_FUNC inline void 7162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangptranspose(PacketBlock<Packet4f,4>& kernel) { 7172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4f t0, t1, t2, t3; 7182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]); 7192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang t1 = vec_mergel(kernel.packet[0], kernel.packet[2]); 7202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]); 7212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang t3 = vec_mergel(kernel.packet[1], kernel.packet[3]); 7222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[0] = vec_mergeh(t0, t2); 7232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[1] = vec_mergel(t0, t2); 7242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[2] = vec_mergeh(t1, t3); 7252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[3] = vec_mergel(t1, t3); 7262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 7272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangEIGEN_DEVICE_FUNC inline void 7292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangptranspose(PacketBlock<Packet4i,4>& kernel) { 7302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4i t0, t1, t2, t3; 7312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]); 7322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang t1 = vec_mergel(kernel.packet[0], kernel.packet[2]); 7332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]); 7342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang t3 = vec_mergel(kernel.packet[1], kernel.packet[3]); 7352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[0] = vec_mergeh(t0, t2); 7362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[1] = vec_mergel(t0, t2); 7372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[2] = vec_mergeh(t1, t3); 7382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[3] = vec_mergel(t1, t3); 7392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 7402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) { 7422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] }; 7432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE))); 7442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_sel(elsePacket, thenPacket, mask); 7452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 7462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) { 7482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] }; 7492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE))); 7502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_sel(elsePacket, thenPacket, mask); 7512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 7522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang//---------- double ---------- 7552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef __VSX__ 7562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtypedef __vector double Packet2d; 7572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtypedef __vector unsigned long long Packet2ul; 7582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtypedef __vector long long Packet2l; 7592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#if EIGEN_COMP_CLANG 7602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtypedef Packet2ul Packet2bl; 7612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 7622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtypedef __vector __bool long Packet2bl; 7632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 7642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet2l p2l_ONE = { 1, 1 }; 7662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO); 7672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet2d p2d_ONE = { 1.0, 1.0 }; 7682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO); 7692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet2d p2d_MZERO = { -0.0, -0.0 }; 7702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef _BIG_ENDIAN 7722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ZERO), reinterpret_cast<Packet4f>(p2d_ONE), 8)); 7732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 7742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstatic Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ONE), reinterpret_cast<Packet4f>(p2d_ZERO), 8)); 7752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 7762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<int index> Packet2d vec_splat_dbl(Packet2d& a); 7782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<0>(Packet2d& a) 7802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 7812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_HI)); 7822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 7832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<1>(Packet2d& a) 7852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 7862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_LO)); 7872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 7882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct packet_traits<double> : default_packet_traits 7902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 7912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef Packet2d type; 7922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef Packet2d half; 7932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang enum { 7942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Vectorizable = 1, 7952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang AlignedOnScalar = 1, 7962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size=2, 7972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasHalfPacket = 1, 7982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasAdd = 1, 8002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasSub = 1, 8012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasMul = 1, 8022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasDiv = 1, 8032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasMin = 1, 8042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasMax = 1, 8052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasAbs = 1, 8062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasSin = 0, 8072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasCos = 0, 8082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasLog = 0, 8092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasExp = 1, 8102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasSqrt = 1, 8112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasRsqrt = 1, 8122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasRound = 1, 8132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasFloor = 1, 8142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasCeil = 1, 8152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasNegate = 1, 8162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang HasBlend = 1 8172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang }; 8182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 8192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; 8212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wanginline std::ostream & operator <<(std::ostream & s, const Packet2l & v) 8232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 8242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang union { 8252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2l v; 8262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang int64_t n[2]; 8272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } vt; 8282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vt.v = v; 8292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang s << vt.n[0] << ", " << vt.n[1]; 8302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return s; 8312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 8322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wanginline std::ostream & operator <<(std::ostream & s, const Packet2d & v) 8342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 8352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang union { 8362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d v; 8372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang double n[2]; 8382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } vt; 8392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vt.v = v; 8402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang s << vt.n[0] << ", " << vt.n[1]; 8412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return s; 8422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 8432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Need to define them first or we get specialization after instantiation errors 8452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) 8462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 8472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_ALIGNED_LOAD 8482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef __VSX__ 8492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_vsx_ld(0, from); 8502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 8512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_ld(0, from); 8522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 8532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 8542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) 8562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 8572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_ALIGNED_STORE 8582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef __VSX__ 8592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vec_vsx_st(from, 0, to); 8602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 8612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vec_st(from, 0, to); 8622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 8632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 8642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { 8662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d v = {from, from}; 8672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return v; 8682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 8692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void 8712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangpbroadcast4<Packet2d>(const double *a, 8722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3) 8732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 8742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a1 = pload<Packet2d>(a); 8752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a0 = vec_splat_dbl<0>(a1); 8762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a1 = vec_splat_dbl<1>(a1); 8772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a3 = pload<Packet2d>(a+2); 8782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a2 = vec_splat_dbl<0>(a3); 8792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang a3 = vec_splat_dbl<1>(a3); 8802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 8812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) 8832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 8842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang double EIGEN_ALIGN16 af[2]; 8852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang af[0] = from[0*stride]; 8862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang af[1] = from[1*stride]; 8872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return pload<Packet2d>(af); 8882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 8892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride) 8902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 8912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang double EIGEN_ALIGN16 af[2]; 8922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang pstore<double>(af, from); 8932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang to[0*stride] = af[0]; 8942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang to[1*stride] = af[1]; 8952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 8962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return pset1<Packet2d>(a) + p2d_COUNTDOWN; } 8982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return a + b; } 9002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return a - b; } 9022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; } 9042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; } 9062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_MZERO); } 9082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); } 9092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// for some weird raisons, it has to be overloaded for packet of integers 9112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); } 9122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); } 9142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); } 9162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); } 9182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); } 9202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); } 9222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); } 9242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); } 9262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); } 9272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); } 9282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) 9302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 9312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_ALIGNED_LOAD 9322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return (Packet2d) vec_vsx_ld((long)from & 15, (const double*) _EIGEN_ALIGNED_PTR(from)); 9332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 9342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) 9362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 9372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d p; 9382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if((std::ptrdiff_t(from) % 16) == 0) p = pload<Packet2d>(from); 9392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang else p = ploadu<Packet2d>(from); 9402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_splat_dbl<0>(p); 9412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 9422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) 9442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 9452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang EIGEN_DEBUG_ALIGNED_STORE 9462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vec_vsx_st((Packet4f)from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to)); 9472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 9482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_PPC_PREFETCH(addr); } 9502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore<double>(x, a); return x[0]; } 9522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) 9542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 9552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64)); 9562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 9572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); } 9582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) 9602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 9612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d b, sum; 9622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(a), reinterpret_cast<Packet4f>(a), 8)); 9632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sum = a + b; 9642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return pfirst<Packet2d>(sum); 9652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 9662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs) 9682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 9692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d v[2], sum; 9702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8)); 9712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8)); 9722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef _BIG_ENDIAN 9742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8)); 9752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 9762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[1]), reinterpret_cast<Packet4f>(v[0]), 8)); 9772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 9782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return sum; 9802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 9812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Other reduction functions: 9822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// mul 9832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) 9842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 9852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8)))); 9862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 9872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// min 9892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) 9902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 9912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return pfirst(pmin(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8)))); 9922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 9932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 9942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// max 9952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) 9962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 9972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return pfirst(pmax(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8)))); 9982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 9992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 10002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<int Offset> 10012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct palign_impl<Offset,Packet2d> 10022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 10032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second) 10042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 10052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (Offset == 1) 10062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifdef _BIG_ENDIAN 10072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(first), reinterpret_cast<Packet4ui>(second), 8)); 10082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#else 10092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(second), reinterpret_cast<Packet4ui>(first), 8)); 10102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif 10112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 10122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 10132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 10142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangEIGEN_DEVICE_FUNC inline void 10152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangptranspose(PacketBlock<Packet2d,2>& kernel) { 10162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2d t0, t1; 10172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI); 10182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO); 10192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[0] = t0; 10202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang kernel.packet[1] = t1; 10212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 10222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 10232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) { 10242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2l select = { ifPacket.select[0], ifPacket.select[1] }; 10252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)); 10262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return vec_sel(elsePacket, thenPacket, mask); 10272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 10282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif // __VSX__ 1029c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 1030c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 1031c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 1032c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 1033c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_PACKET_MATH_ALTIVEC_H 1034