1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// possible options:
3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//    -DEIGEN_DONT_VECTORIZE
4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//    -msse2
5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// #define EIGEN_DEFAULT_TO_ROW_MAJOR
7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define _FLOAT
8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <iostream>
10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <Eigen/Core>
12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include "BenchTimer.h"
13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// include the BLAS headers
15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathextern "C" {
16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <cblas.h>
17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <string>
19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifdef _FLOAT
21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef float Scalar;
22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define CBLAS_GEMM cblas_sgemm
23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#else
24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef double Scalar;
25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define CBLAS_GEMM cblas_dgemm
26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif
27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid check_product(int M, int N, int K);
32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid check_product(void);
33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathint main(int argc, char *argv[])
35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // disable SSE exceptions
37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  #ifdef __GNUC__
38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    int aux;
40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    asm(
41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    "stmxcsr   %[aux]           \n\t"
42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    "orl       $32832, %[aux]   \n\t"
43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    "ldmxcsr   %[aux]           \n\t"
44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    : : [aux] "m" (aux));
45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  #endif
47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int nbtries=1, nbloops=1, M, N, K;
49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  if (argc==2)
51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    if (std::string(argv[1])=="check")
53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      check_product();
54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    else
55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      M = N = K = atoi(argv[1]);
56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  else if ((argc==3) && (std::string(argv[1])=="auto"))
58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    M = N = K = atoi(argv[2]);
60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    nbloops = 1000000000/(M*M*M);
61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    if (nbloops<1)
62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      nbloops = 1;
63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    nbtries = 6;
64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  else if (argc==4)
66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    M = N = K = atoi(argv[1]);
68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    nbloops = atoi(argv[2]);
69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    nbtries = atoi(argv[3]);
70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  else if (argc==6)
72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    M = atoi(argv[1]);
74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    N = atoi(argv[2]);
75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    K = atoi(argv[3]);
76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    nbloops = atoi(argv[4]);
77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    nbtries = atoi(argv[5]);
78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  else
80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "Usage: " << argv[0] << " size  \n";
82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "Usage: " << argv[0] << " auto size\n";
83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "Usage: " << argv[0] << " check\n";
86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "Options:\n";
87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "    size       unique size of the 2 matrices (integer)\n";
88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "    auto       automatically set the number of repetitions and tries\n";
89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "    nbloops    number of times the GEMM routines is executed\n";
90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "    nbtries    number of times the loop is benched (return the best try)\n";
91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "    M N K      sizes of the matrices: MxN  =  MxK * KxN (integers)\n";
92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "    check      check eigen product using cblas as a reference\n";
93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    exit(1);
94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  double nbmad = double(M) * double(N) * double(K) * double(nbloops);
97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  if (!(std::string(argv[1])=="auto"))
99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << M << " x " << N << " x " << K << "\n";
100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Scalar alpha, beta;
102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  MyMatrix ma(M,K), mb(K,N), mc(M,N);
103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  ma = MyMatrix::Random(M,K);
104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  mb = MyMatrix::Random(K,N);
105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  mc = MyMatrix::Random(M,N);
106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  Eigen::BenchTimer timer;
108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // we simply compute c += a*b, so:
110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  alpha = 1;
111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  beta = 1;
112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // bench cblas
114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  if (!(std::string(argv[1])=="auto"))
116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    timer.reset();
118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    for (uint k=0 ; k<nbtries ; ++k)
119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    {
120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        timer.start();
121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        for (uint j=0 ; j<nbloops ; ++j)
122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath              #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath              CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath              #else
125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath              CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath              #endif
127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        timer.stop();
128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    }
129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    if (!(std::string(argv[1])=="auto"))
130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    else
132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // clear
136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  ma = MyMatrix::Random(M,K);
137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  mb = MyMatrix::Random(K,N);
138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  mc = MyMatrix::Random(M,N);
139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  // eigen
141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath//   if (!(std::string(argv[1])=="auto"))
142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      timer.reset();
144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      for (uint k=0 ; k<nbtries ; ++k)
145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      {
146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          timer.start();
147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          bench_eigengemm(mc, ma, mb, nbloops);
148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath          timer.stop();
149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      }
150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      if (!(std::string(argv[1])=="auto"))
151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      else
153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath        std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  return 0;
161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathusing namespace Eigen;
164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  for (uint j=0 ; j<nbloops ; ++j)
168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath      mc.noalias() += ma * mb;
169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define MYVERIFY(A,M) if (!(A)) { \
172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << "FAIL: " << M << "\n"; \
173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid check_product(int M, int N, int K)
175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  ma = MyMatrix::Random(M,K);
178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  mb = MyMatrix::Random(K,N);
179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  maT = ma.transpose();
180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  mbT = mb.transpose();
181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  mc = MyMatrix::Random(M,N);
182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  MyMatrix::Scalar eps = 1e-4;
184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  meigen = mref = mc;
186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  meigen += ma * mb;
188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  MYVERIFY(meigen.isApprox(mref, eps),". * .");
189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  meigen = mref = mc;
191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  meigen += maT.transpose() * mb;
193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  MYVERIFY(meigen.isApprox(mref, eps),"T * .");
194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  meigen = mref = mc;
196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  meigen += (maT.transpose()) * (mbT.transpose());
198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  MYVERIFY(meigen.isApprox(mref, eps),"T * T");
199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  meigen = mref = mc;
201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  meigen += ma * mbT.transpose();
203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  MYVERIFY(meigen.isApprox(mref, eps),". * T");
204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid check_product(void)
207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  int M, N, K;
209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  for (uint i=0; i<1000; ++i)
210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  {
211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    M = internal::random<int>(1,64);
212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    N = internal::random<int>(1,768);
213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    K = internal::random<int>(1,768);
214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    M = (0 + M) * 1;
215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    std::cout << M << " x " << N << " x " << K << "\n";
216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath    check_product(M, N, K);
217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath  }
218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
220