1// g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
2// possible options:
3//    -DEIGEN_DONT_VECTORIZE
4//    -msse2
5
6// #define EIGEN_DEFAULT_TO_ROW_MAJOR
7#define _FLOAT
8
9#include <iostream>
10
11#include <Eigen/Core>
12#include "BenchTimer.h"
13
14// include the BLAS headers
15extern "C" {
16#include <cblas.h>
17}
18#include <string>
19
20#ifdef _FLOAT
21typedef float Scalar;
22#define CBLAS_GEMM cblas_sgemm
23#else
24typedef double Scalar;
25#define CBLAS_GEMM cblas_dgemm
26#endif
27
28
29typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
30void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
31void check_product(int M, int N, int K);
32void check_product(void);
33
34int main(int argc, char *argv[])
35{
36  // disable SSE exceptions
37  #ifdef __GNUC__
38  {
39    int aux;
40    asm(
41    "stmxcsr   %[aux]           \n\t"
42    "orl       $32832, %[aux]   \n\t"
43    "ldmxcsr   %[aux]           \n\t"
44    : : [aux] "m" (aux));
45  }
46  #endif
47
48  int nbtries=1, nbloops=1, M, N, K;
49
50  if (argc==2)
51  {
52    if (std::string(argv[1])=="check")
53      check_product();
54    else
55      M = N = K = atoi(argv[1]);
56  }
57  else if ((argc==3) && (std::string(argv[1])=="auto"))
58  {
59    M = N = K = atoi(argv[2]);
60    nbloops = 1000000000/(M*M*M);
61    if (nbloops<1)
62      nbloops = 1;
63    nbtries = 6;
64  }
65  else if (argc==4)
66  {
67    M = N = K = atoi(argv[1]);
68    nbloops = atoi(argv[2]);
69    nbtries = atoi(argv[3]);
70  }
71  else if (argc==6)
72  {
73    M = atoi(argv[1]);
74    N = atoi(argv[2]);
75    K = atoi(argv[3]);
76    nbloops = atoi(argv[4]);
77    nbtries = atoi(argv[5]);
78  }
79  else
80  {
81    std::cout << "Usage: " << argv[0] << " size  \n";
82    std::cout << "Usage: " << argv[0] << " auto size\n";
83    std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
84    std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
85    std::cout << "Usage: " << argv[0] << " check\n";
86    std::cout << "Options:\n";
87    std::cout << "    size       unique size of the 2 matrices (integer)\n";
88    std::cout << "    auto       automatically set the number of repetitions and tries\n";
89    std::cout << "    nbloops    number of times the GEMM routines is executed\n";
90    std::cout << "    nbtries    number of times the loop is benched (return the best try)\n";
91    std::cout << "    M N K      sizes of the matrices: MxN  =  MxK * KxN (integers)\n";
92    std::cout << "    check      check eigen product using cblas as a reference\n";
93    exit(1);
94  }
95
96  double nbmad = double(M) * double(N) * double(K) * double(nbloops);
97
98  if (!(std::string(argv[1])=="auto"))
99    std::cout << M << " x " << N << " x " << K << "\n";
100
101  Scalar alpha, beta;
102  MyMatrix ma(M,K), mb(K,N), mc(M,N);
103  ma = MyMatrix::Random(M,K);
104  mb = MyMatrix::Random(K,N);
105  mc = MyMatrix::Random(M,N);
106
107  Eigen::BenchTimer timer;
108
109  // we simply compute c += a*b, so:
110  alpha = 1;
111  beta = 1;
112
113  // bench cblas
114  // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
115  if (!(std::string(argv[1])=="auto"))
116  {
117    timer.reset();
118    for (uint k=0 ; k<nbtries ; ++k)
119    {
120        timer.start();
121        for (uint j=0 ; j<nbloops ; ++j)
122              #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
123              CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
124              #else
125              CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
126              #endif
127        timer.stop();
128    }
129    if (!(std::string(argv[1])=="auto"))
130      std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
131    else
132        std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
133  }
134
135  // clear
136  ma = MyMatrix::Random(M,K);
137  mb = MyMatrix::Random(K,N);
138  mc = MyMatrix::Random(M,N);
139
140  // eigen
141//   if (!(std::string(argv[1])=="auto"))
142  {
143      timer.reset();
144      for (uint k=0 ; k<nbtries ; ++k)
145      {
146          timer.start();
147          bench_eigengemm(mc, ma, mb, nbloops);
148          timer.stop();
149      }
150      if (!(std::string(argv[1])=="auto"))
151        std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
152      else
153        std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
154  }
155
156  std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
157  std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
158
159
160  return 0;
161}
162
163using namespace Eigen;
164
165void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
166{
167  for (uint j=0 ; j<nbloops ; ++j)
168      mc.noalias() += ma * mb;
169}
170
171#define MYVERIFY(A,M) if (!(A)) { \
172    std::cout << "FAIL: " << M << "\n"; \
173  }
174void check_product(int M, int N, int K)
175{
176  MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
177  ma = MyMatrix::Random(M,K);
178  mb = MyMatrix::Random(K,N);
179  maT = ma.transpose();
180  mbT = mb.transpose();
181  mc = MyMatrix::Random(M,N);
182
183  MyMatrix::Scalar eps = 1e-4;
184
185  meigen = mref = mc;
186  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
187  meigen += ma * mb;
188  MYVERIFY(meigen.isApprox(mref, eps),". * .");
189
190  meigen = mref = mc;
191  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
192  meigen += maT.transpose() * mb;
193  MYVERIFY(meigen.isApprox(mref, eps),"T * .");
194
195  meigen = mref = mc;
196  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
197  meigen += (maT.transpose()) * (mbT.transpose());
198  MYVERIFY(meigen.isApprox(mref, eps),"T * T");
199
200  meigen = mref = mc;
201  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
202  meigen += ma * mbT.transpose();
203  MYVERIFY(meigen.isApprox(mref, eps),". * T");
204}
205
206void check_product(void)
207{
208  int M, N, K;
209  for (uint i=0; i<1000; ++i)
210  {
211    M = internal::random<int>(1,64);
212    N = internal::random<int>(1,768);
213    K = internal::random<int>(1,768);
214    M = (0 + M) * 1;
215    std::cout << M << " x " << N << " x " << K << "\n";
216    check_product(M, N, K);
217  }
218}
219
220