1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// g++ -DNDEBUG -O3 -I.. benchLLT.cpp  -o benchLLT && ./benchLLT
3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// options:
4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//  -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//  -DEIGEN_DONT_VECTORIZE
6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//  -msse2
7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//  -DREPEAT=100
8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//  -DTRIES=10
9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//  -DSCALAR=double
10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <iostream>
12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <Eigen/Core>
14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <Eigen/Cholesky>
15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <bench/BenchUtil.h>
16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathusing namespace Eigen;
17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef REPEAT
19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define REPEAT 10000
20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif
21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef TRIES
23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define TRIES 10
24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif
25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef float Scalar;
27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate <typename MatrixType>
29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath__attribute__ ((noinline)) void benchLLT(const MatrixType& m)
30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int rows = m.rows();
32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int cols = m.cols();
33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int cost = 0;
35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  for (int j=0; j<rows; ++j)
36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int r = std::max(rows - j -1,0);
38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    cost += 2*(r*j+r+j);
39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int repeats = (REPEAT*1000)/(rows*rows);
42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  typedef typename MatrixType::Scalar Scalar;
44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  MatrixType a = MatrixType::Random(rows,cols);
47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  SquareMatrixType covMat =  a * a.adjoint();
48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  BenchTimer timerNoSqrt, timerSqrt;
50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Scalar acc = 0;
52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int r = internal::random<int>(0,covMat.rows()-1);
53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int c = internal::random<int>(0,covMat.cols()-1);
54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  for (int t=0; t<TRIES; ++t)
55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    timerNoSqrt.start();
57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    for (int k=0; k<repeats; ++k)
58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      LDLT<SquareMatrixType> cholnosqrt(covMat);
60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      acc += cholnosqrt.matrixL().coeff(r,c);
61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    timerNoSqrt.stop();
63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  for (int t=0; t<TRIES; ++t)
66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    timerSqrt.start();
68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    for (int k=0; k<repeats; ++k)
69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      LLT<SquareMatrixType> chol(covMat);
71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      acc += chol.matrixL().coeff(r,c);
72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    timerSqrt.stop();
74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  if (MatrixType::RowsAtCompileTime==Dynamic)
77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "dyn   ";
78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  else
79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "fixed ";
80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  std::cout << covMat.rows() << " \t"
81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath            << (timerNoSqrt.value() * REPEAT) / repeats << "s "
82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath            << "(" << 1e-6 * cost*repeats/timerNoSqrt.value() << " MFLOPS)\t"
83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath            << (timerSqrt.value() * REPEAT) / repeats << "s "
84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath            << "(" << 1e-6 * cost*repeats/timerSqrt.value() << " MFLOPS)\n";
85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  #ifdef BENCH_GSL
88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  if (MatrixType::RowsAtCompileTime==Dynamic)
89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    timerSqrt.reset();
91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    eiToGsl(covMat, &gslCovMat);
96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    for (int t=0; t<TRIES; ++t)
97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      timerSqrt.start();
99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      for (int k=0; k<repeats; ++k)
100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      {
101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        gsl_matrix_memcpy(gslCopy,gslCovMat);
102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        gsl_linalg_cholesky_decomp(gslCopy);
103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        acc += gsl_matrix_get(gslCopy,r,c);
104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      }
105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      timerSqrt.stop();
106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << " | \t"
109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath              << timerSqrt.value() * REPEAT / repeats << "s";
110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    gsl_matrix_free(gslCovMat);
112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  #endif
114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  std::cout << "\n";
115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // make sure the compiler does not optimize too much
116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  if (acc==123)
117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << acc;
118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathint main(int argc, char* argv[])
121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,0};
123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  std::cout << "size            no sqrt                           standard";
124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   #ifdef BENCH_GSL
125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   std::cout << "       GSL (standard + double + ATLAS)  ";
126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   #endif
127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  std::cout << "\n";
128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  for (uint i=0; dynsizes[i]>0; ++i)
129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    benchLLT(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  benchLLT(Matrix<Scalar,2,2>());
132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  benchLLT(Matrix<Scalar,3,3>());
133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  benchLLT(Matrix<Scalar,4,4>());
134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  benchLLT(Matrix<Scalar,5,5>());
135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  benchLLT(Matrix<Scalar,6,6>());
136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  benchLLT(Matrix<Scalar,7,7>());
137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  benchLLT(Matrix<Scalar,8,8>());
138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  benchLLT(Matrix<Scalar,12,12>());
139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  benchLLT(Matrix<Scalar,16,16>());
140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return 0;
141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
143