1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//g++-4.4 -DNOMTL  -Wl,-rpath /usr/local/lib/oski -L /usr/local/lib/oski/ -l oski -l oski_util -l oski_util_Tid  -DOSKI -I ~/Coding/LinearAlgebra/mtl4/  spmv.cpp  -I .. -O2 -DNDEBUG -lrt  -lm -l oski_mat_CSC_Tid  -loskilt && ./a.out r200000 c200000 n100 t1 p1
3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define SCALAR double
5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <iostream>
7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <algorithm>
8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include "BenchTimer.h"
9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include "BenchSparseUtil.h"
10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define SPMV_BENCH(CODE) BENCH(t,tries,repeats,CODE);
12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// #ifdef MKL
14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//
15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// #include "mkl_types.h"
16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// #include "mkl_spblas.h"
17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//
18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// template<typename Lhs,typename Rhs,typename Res>
19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// void mkl_multiply(const Lhs& lhs, const Rhs& rhs, Res& res)
20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// {
21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   char n = 'N';
22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   float alpha = 1;
23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   char matdescra[6];
24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   matdescra[0] = 'G';
25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   matdescra[1] = 0;
26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   matdescra[2] = 0;
27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   matdescra[3] = 'C';
28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   mkl_scscmm(&n, lhs.rows(), rhs.cols(), lhs.cols(), &alpha, matdescra,
29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//              lhs._valuePtr(), lhs._innerIndexPtr(), lhs.outerIndexPtr(),
30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//              pntre, b, &ldb, &beta, c, &ldc);
31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// //   mkl_somatcopy('C', 'T', lhs.rows(), lhs.cols(), 1,
32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// //                 lhs._valuePtr(), lhs.rows(), DST, dst_stride);
33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// }
34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//
35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// #endif
36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathint main(int argc, char *argv[])
38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int size = 10000;
40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int rows = size;
41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int cols = size;
42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int nnzPerCol = 40;
43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int tries = 2;
44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int repeats = 2;
45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  bool need_help = false;
47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  for(int i = 1; i < argc; i++)
48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    if(argv[i][0] == 'r')
50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      rows = atoi(argv[i]+1);
52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    else if(argv[i][0] == 'c')
54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      cols = atoi(argv[i]+1);
56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    else if(argv[i][0] == 'n')
58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      nnzPerCol = atoi(argv[i]+1);
60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    else if(argv[i][0] == 't')
62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      tries = atoi(argv[i]+1);
64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    else if(argv[i][0] == 'p')
66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      repeats = atoi(argv[i]+1);
68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    else
70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      need_help = true;
72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  if(need_help)
75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << argv[0] << " r<nb rows> c<nb columns> n<non zeros per column> t<nb tries> p<nb repeats>\n";
77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    return 1;
78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  std::cout << "SpMV " << rows << " x " << cols << " with " << nnzPerCol << " non zeros per column. (" << repeats << " repeats, and " << tries << " tries)\n\n";
81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  EigenSparseMatrix sm(rows,cols);
83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  DenseVector dv(cols), res(rows);
84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  dv.setRandom();
85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  BenchTimer t;
87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  while (nnzPerCol>=4)
88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "nnz: " << nnzPerCol << "\n";
90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    sm.setZero();
91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    fillMatrix2(nnzPerCol, rows, cols, sm);
92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // dense matrices
94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    #ifdef DENSEMATRIX
95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      DenseMatrix dm(rows,cols), (rows,cols);
97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      eiToDense(sm, dm);
98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SPMV_BENCH(res = dm * sm);
100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << "Dense       " << t.value()/repeats << "\t";
101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
1027faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez      SPMV_BENCH(res = dm.transpose() * sm);
103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << t.value()/repeats << endl;
104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    #endif
106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // eigen sparse matrices
108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SPMV_BENCH(res.noalias() += sm * dv; )
110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << "Eigen       " << t.value()/repeats << "\t";
111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SPMV_BENCH(res.noalias() += sm.transpose() * dv; )
113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << t.value()/repeats << endl;
114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // CSparse
117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    #ifdef CSPARSE
118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << "CSparse \n";
120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      cs *csm;
121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      eiToCSparse(sm, csm);
122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//       BENCH();
124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//       timer.stop();
125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//       std::cout << "   a * b:\t" << timer.value() << endl;
126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//       BENCH( { m3 = cs_sorted_multiply2(m1, m2); cs_spfree(m3); } );
128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//       std::cout << "   a * b:\t" << timer.value() << endl;
129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    #endif
131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    #ifdef OSKI
133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      oski_matrix_t om;
135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      oski_vecview_t ov, ores;
136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      oski_Init();
137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      om = oski_CreateMatCSC(sm._outerIndexPtr(), sm._innerIndexPtr(), sm._valuePtr(), rows, cols,
138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath                             SHARE_INPUTMAT, 1, INDEX_ZERO_BASED);
139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      ov = oski_CreateVecView(dv.data(), cols, STRIDE_UNIT);
140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      ores = oski_CreateVecView(res.data(), rows, STRIDE_UNIT);
141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SPMV_BENCH( oski_MatMult(om, OP_NORMAL, 1, ov, 0, ores) );
143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << "OSKI        " << t.value()/repeats << "\t";
144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SPMV_BENCH( oski_MatMult(om, OP_TRANS, 1, ov, 0, ores) );
146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << t.value()/repeats << "\n";
147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      // tune
149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      t.reset();
150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      t.start();
151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      oski_SetHintMatMult(om, OP_NORMAL, 1.0, SYMBOLIC_VEC, 0.0, SYMBOLIC_VEC, ALWAYS_TUNE_AGGRESSIVELY);
152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      oski_TuneMat(om);
153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      t.stop();
154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      double tuning = t.value();
155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SPMV_BENCH( oski_MatMult(om, OP_NORMAL, 1, ov, 0, ores) );
157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << "OSKI tuned  " << t.value()/repeats << "\t";
158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SPMV_BENCH( oski_MatMult(om, OP_TRANS, 1, ov, 0, ores) );
160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << t.value()/repeats << "\t(" << tuning <<  ")\n";
161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      oski_DestroyMat(om);
164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      oski_DestroyVecView(ov);
165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      oski_DestroyVecView(ores);
166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      oski_Close();
167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    #endif
169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    #ifndef NOUBLAS
171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      using namespace boost::numeric;
173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      UblasMatrix um(rows,cols);
174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      eiToUblas(sm, um);
175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      boost::numeric::ublas::vector<Scalar> uv(cols), ures(rows);
177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      Map<Matrix<Scalar,Dynamic,1> >(&uv[0], cols) = dv;
178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      Map<Matrix<Scalar,Dynamic,1> >(&ures[0], rows) = res;
179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SPMV_BENCH(ublas::axpy_prod(um, uv, ures, true));
181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << "ublas       " << t.value()/repeats << "\t";
182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SPMV_BENCH(ublas::axpy_prod(boost::numeric::ublas::trans(um), uv, ures, true));
184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << t.value()/repeats << endl;
185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    #endif
187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // GMM++
189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    #ifndef NOGMM
190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      GmmSparse gm(rows,cols);
192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      eiToGmm(sm, gm);
193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::vector<Scalar> gv(cols), gres(rows);
195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      Map<Matrix<Scalar,Dynamic,1> >(&gv[0], cols) = dv;
196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      Map<Matrix<Scalar,Dynamic,1> >(&gres[0], rows) = res;
197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SPMV_BENCH(gmm::mult(gm, gv, gres));
199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << "GMM++       " << t.value()/repeats << "\t";
200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SPMV_BENCH(gmm::mult(gmm::transposed(gm), gv, gres));
202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << t.value()/repeats << endl;
203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    #endif
205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    // MTL4
207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    #ifndef NOMTL
208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      MtlSparse mm(rows,cols);
210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      eiToMtl(sm, mm);
211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      mtl::dense_vector<Scalar> mv(cols, 1.0);
212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      mtl::dense_vector<Scalar> mres(rows, 1.0);
213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SPMV_BENCH(mres = mm * mv);
215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << "MTL4        " << t.value()/repeats << "\t";
216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      SPMV_BENCH(mres = trans(mm) * mv);
218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << t.value()/repeats << endl;
219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    #endif
221c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "\n";
223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    if(nnzPerCol==1)
225c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      break;
226c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    nnzPerCol -= nnzPerCol/2;
227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return 0;
230c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
234