1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra. Eigen itself is part of the KDE project. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include "main.h" 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// using namespace Eigen; 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar> bool areApprox(const Scalar* a, const Scalar* b, int size) 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int i=0; i<size; ++i) 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (!ei_isApprox(a[i],b[i])) return false; 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return true; 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define CHECK_CWISE(REFOP, POP) { \ 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int i=0; i<PacketSize; ++i) \ 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ref[i] = REFOP(data1[i], data1[i+PacketSize]); \ 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ei_pstore(data2, POP(ei_pload(data1), ei_pload(data1+PacketSize))); \ 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VERIFY(areApprox(ref, data2, PacketSize) && #POP); \ 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define REF_ADD(a,b) ((a)+(b)) 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define REF_SUB(a,b) ((a)-(b)) 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define REF_MUL(a,b) ((a)*(b)) 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define REF_DIV(a,b) ((a)/(b)) 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace std { 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> const complex<float>& min(const complex<float>& a, const complex<float>& b) 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a.real() < b.real() ? a : b; } 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<> const complex<float>& max(const complex<float>& a, const complex<float>& b) 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ return a.real() < b.real() ? b : a; } 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Scalar> void packetmath() 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename ei_packet_traits<Scalar>::type Packet; 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const int PacketSize = ei_packet_traits<Scalar>::size; 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const int size = PacketSize*4; 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_ALIGN_128 Scalar data1[ei_packet_traits<Scalar>::size*4]; 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_ALIGN_128 Scalar data2[ei_packet_traits<Scalar>::size*4]; 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_ALIGN_128 Packet packets[PacketSize*2]; 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_ALIGN_128 Scalar ref[ei_packet_traits<Scalar>::size*4]; 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int i=0; i<size; ++i) 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath data1[i] = ei_random<Scalar>(); 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath data2[i] = ei_random<Scalar>(); 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ei_pstore(data2, ei_pload(data1)); 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VERIFY(areApprox(data1, data2, PacketSize) && "aligned load/store"); 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int offset=0; offset<PacketSize; ++offset) 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ei_pstore(data2, ei_ploadu(data1+offset)); 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VERIFY(areApprox(data1+offset, data2, PacketSize) && "ei_ploadu"); 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int offset=0; offset<PacketSize; ++offset) 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ei_pstoreu(data2+offset, ei_pload(data1)); 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VERIFY(areApprox(data1, data2+offset, PacketSize) && "ei_pstoreu"); 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int offset=0; offset<PacketSize; ++offset) 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath packets[0] = ei_pload(data1); 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath packets[1] = ei_pload(data1+PacketSize); 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (offset==0) ei_palign<0>(packets[0], packets[1]); 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else if (offset==1) ei_palign<1>(packets[0], packets[1]); 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else if (offset==2) ei_palign<2>(packets[0], packets[1]); 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else if (offset==3) ei_palign<3>(packets[0], packets[1]); 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ei_pstore(data2, packets[0]); 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int i=0; i<PacketSize; ++i) 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ref[i] = data1[i+offset]; 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef Matrix<Scalar, PacketSize, 1> Vector; 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VERIFY(areApprox(ref, data2, PacketSize) && "ei_palign"); 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CHECK_CWISE(REF_ADD, ei_padd); 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CHECK_CWISE(REF_SUB, ei_psub); 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CHECK_CWISE(REF_MUL, ei_pmul); 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifndef EIGEN_VECTORIZE_ALTIVEC 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (!ei_is_same_type<Scalar,int>::ret) 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CHECK_CWISE(REF_DIV, ei_pdiv); 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CHECK_CWISE(std::min, ei_pmin); 99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CHECK_CWISE(std::max, ei_pmax); 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int i=0; i<PacketSize; ++i) 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ref[i] = data1[0]; 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ei_pstore(data2, ei_pset1(data1[0])); 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VERIFY(areApprox(ref, data2, PacketSize) && "ei_pset1"); 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VERIFY(ei_isApprox(data1[0], ei_pfirst(ei_pload(data1))) && "ei_pfirst"); 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ref[0] = 0; 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int i=0; i<PacketSize; ++i) 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ref[0] += data1[i]; 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VERIFY(ei_isApprox(ref[0], ei_predux(ei_pload(data1))) && "ei_predux"); 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int j=0; j<PacketSize; ++j) 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ref[j] = 0; 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int i=0; i<PacketSize; ++i) 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ref[j] += data1[i+j*PacketSize]; 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath packets[j] = ei_pload(data1+j*PacketSize); 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ei_pstore(data2, ei_preduxp(packets)); 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VERIFY(areApprox(ref, data2, PacketSize) && "ei_preduxp"); 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid test_eigen2_packetmath() 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(int i = 0; i < g_repeat; i++) { 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CALL_SUBTEST_1( packetmath<float>() ); 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CALL_SUBTEST_2( packetmath<double>() ); 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CALL_SUBTEST_3( packetmath<int>() ); 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CALL_SUBTEST_4( packetmath<std::complex<float> >() ); 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 133