1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// g++ -DNDEBUG -O3 -I.. benchLLT.cpp -o benchLLT && ./benchLLT 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// options: 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// -DEIGEN_DONT_VECTORIZE 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// -msse2 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// -DREPEAT=100 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// -DTRIES=10 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// -DSCALAR=double 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <iostream> 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <Eigen/Core> 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <Eigen/Cholesky> 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <bench/BenchUtil.h> 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathusing namespace Eigen; 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef REPEAT 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define REPEAT 10000 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef TRIES 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define TRIES 10 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef float Scalar; 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate <typename MatrixType> 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath__attribute__ ((noinline)) void benchLLT(const MatrixType& m) 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int rows = m.rows(); 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int cols = m.cols(); 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int cost = 0; 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int j=0; j<rows; ++j) 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int r = std::max(rows - j -1,0); 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath cost += 2*(r*j+r+j); 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int repeats = (REPEAT*1000)/(rows*rows); 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename MatrixType::Scalar Scalar; 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType; 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MatrixType a = MatrixType::Random(rows,cols); 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath SquareMatrixType covMat = a * a.adjoint(); 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BenchTimer timerNoSqrt, timerSqrt; 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar acc = 0; 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int r = internal::random<int>(0,covMat.rows()-1); 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int c = internal::random<int>(0,covMat.cols()-1); 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int t=0; t<TRIES; ++t) 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath timerNoSqrt.start(); 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int k=0; k<repeats; ++k) 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LDLT<SquareMatrixType> cholnosqrt(covMat); 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath acc += cholnosqrt.matrixL().coeff(r,c); 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath timerNoSqrt.stop(); 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int t=0; t<TRIES; ++t) 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath timerSqrt.start(); 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int k=0; k<repeats; ++k) 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath LLT<SquareMatrixType> chol(covMat); 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath acc += chol.matrixL().coeff(r,c); 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath timerSqrt.stop(); 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (MatrixType::RowsAtCompileTime==Dynamic) 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "dyn "; 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "fixed "; 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << covMat.rows() << " \t" 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath << (timerNoSqrt.value() * REPEAT) / repeats << "s " 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath << "(" << 1e-6 * cost*repeats/timerNoSqrt.value() << " MFLOPS)\t" 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath << (timerSqrt.value() * REPEAT) / repeats << "s " 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath << "(" << 1e-6 * cost*repeats/timerSqrt.value() << " MFLOPS)\n"; 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef BENCH_GSL 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (MatrixType::RowsAtCompileTime==Dynamic) 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath timerSqrt.reset(); 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols()); 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols()); 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eiToGsl(covMat, &gslCovMat); 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int t=0; t<TRIES; ++t) 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath timerSqrt.start(); 99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int k=0; k<repeats; ++k) 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath gsl_matrix_memcpy(gslCopy,gslCovMat); 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath gsl_linalg_cholesky_decomp(gslCopy); 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath acc += gsl_matrix_get(gslCopy,r,c); 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath timerSqrt.stop(); 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << " | \t" 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath << timerSqrt.value() * REPEAT / repeats << "s"; 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath gsl_matrix_free(gslCovMat); 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "\n"; 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // make sure the compiler does not optimize too much 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (acc==123) 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << acc; 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathint main(int argc, char* argv[]) 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,0}; 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "size no sqrt standard"; 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// #ifdef BENCH_GSL 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// std::cout << " GSL (standard + double + ATLAS) "; 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// #endif 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "\n"; 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (uint i=0; dynsizes[i]>0; ++i) 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath benchLLT(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i])); 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath benchLLT(Matrix<Scalar,2,2>()); 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath benchLLT(Matrix<Scalar,3,3>()); 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath benchLLT(Matrix<Scalar,4,4>()); 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath benchLLT(Matrix<Scalar,5,5>()); 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath benchLLT(Matrix<Scalar,6,6>()); 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath benchLLT(Matrix<Scalar,7,7>()); 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath benchLLT(Matrix<Scalar,8,8>()); 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath benchLLT(Matrix<Scalar,12,12>()); 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath benchLLT(Matrix<Scalar,16,16>()); 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return 0; 141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 143