1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// possible options: 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// -DEIGEN_DONT_VECTORIZE 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// -msse2 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// #define EIGEN_DEFAULT_TO_ROW_MAJOR 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define _FLOAT 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <iostream> 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <Eigen/Core> 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include "BenchTimer.h" 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// include the BLAS headers 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathextern "C" { 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <cblas.h> 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <string> 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifdef _FLOAT 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef float Scalar; 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define CBLAS_GEMM cblas_sgemm 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#else 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef double Scalar; 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define CBLAS_GEMM cblas_dgemm 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtypedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix; 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops); 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid check_product(int M, int N, int K); 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid check_product(void); 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathint main(int argc, char *argv[]) 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // disable SSE exceptions 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef __GNUC__ 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int aux; 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath asm( 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath "stmxcsr %[aux] \n\t" 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath "orl $32832, %[aux] \n\t" 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath "ldmxcsr %[aux] \n\t" 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath : : [aux] "m" (aux)); 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int nbtries=1, nbloops=1, M, N, K; 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (argc==2) 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (std::string(argv[1])=="check") 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath check_product(); 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath M = N = K = atoi(argv[1]); 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else if ((argc==3) && (std::string(argv[1])=="auto")) 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath M = N = K = atoi(argv[2]); 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath nbloops = 1000000000/(M*M*M); 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (nbloops<1) 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath nbloops = 1; 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath nbtries = 6; 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else if (argc==4) 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath M = N = K = atoi(argv[1]); 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath nbloops = atoi(argv[2]); 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath nbtries = atoi(argv[3]); 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else if (argc==6) 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath M = atoi(argv[1]); 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath N = atoi(argv[2]); 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath K = atoi(argv[3]); 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath nbloops = atoi(argv[4]); 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath nbtries = atoi(argv[5]); 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "Usage: " << argv[0] << " size \n"; 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "Usage: " << argv[0] << " auto size\n"; 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n"; 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n"; 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "Usage: " << argv[0] << " check\n"; 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "Options:\n"; 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << " size unique size of the 2 matrices (integer)\n"; 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << " auto automatically set the number of repetitions and tries\n"; 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << " nbloops number of times the GEMM routines is executed\n"; 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << " nbtries number of times the loop is benched (return the best try)\n"; 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n"; 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << " check check eigen product using cblas as a reference\n"; 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath exit(1); 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath double nbmad = double(M) * double(N) * double(K) * double(nbloops); 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (!(std::string(argv[1])=="auto")) 99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << M << " x " << N << " x " << K << "\n"; 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar alpha, beta; 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MyMatrix ma(M,K), mb(K,N), mc(M,N); 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ma = MyMatrix::Random(M,K); 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath mb = MyMatrix::Random(K,N); 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath mc = MyMatrix::Random(M,N); 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Eigen::BenchTimer timer; 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // we simply compute c += a*b, so: 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath alpha = 1; 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath beta = 1; 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // bench cblas 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B); 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (!(std::string(argv[1])=="auto")) 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath timer.reset(); 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (uint k=0 ; k<nbtries ; ++k) 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath timer.start(); 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (uint j=0 ; j<nbloops ; ++j) 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N); 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #else 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M); 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath timer.stop(); 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (!(std::string(argv[1])=="auto")) 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n"; 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n"; 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // clear 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ma = MyMatrix::Random(M,K); 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath mb = MyMatrix::Random(K,N); 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath mc = MyMatrix::Random(M,N); 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // eigen 141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// if (!(std::string(argv[1])=="auto")) 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath timer.reset(); 144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (uint k=0 ; k<nbtries ; ++k) 145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath timer.start(); 147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath bench_eigengemm(mc, ma, mb, nbloops); 148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath timer.stop(); 149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (!(std::string(argv[1])=="auto")) 151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n"; 152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n"; 154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "l1: " << Eigen::l1CacheSize() << std::endl; 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "l2: " << Eigen::l2CacheSize() << std::endl; 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return 0; 161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathusing namespace Eigen; 164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) 166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (uint j=0 ; j<nbloops ; ++j) 168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath mc.noalias() += ma * mb; 169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define MYVERIFY(A,M) if (!(A)) { \ 172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "FAIL: " << M << "\n"; \ 173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid check_product(int M, int N, int K) 175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N); 177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ma = MyMatrix::Random(M,K); 178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath mb = MyMatrix::Random(K,N); 179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath maT = ma.transpose(); 180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath mbT = mb.transpose(); 181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath mc = MyMatrix::Random(M,N); 182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MyMatrix::Scalar eps = 1e-4; 184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath meigen = mref = mc; 186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M); 187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath meigen += ma * mb; 188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MYVERIFY(meigen.isApprox(mref, eps),". * ."); 189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath meigen = mref = mc; 191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M); 192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath meigen += maT.transpose() * mb; 193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MYVERIFY(meigen.isApprox(mref, eps),"T * ."); 194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath meigen = mref = mc; 196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M); 197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath meigen += (maT.transpose()) * (mbT.transpose()); 198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MYVERIFY(meigen.isApprox(mref, eps),"T * T"); 199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath meigen = mref = mc; 201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M); 202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath meigen += ma * mbT.transpose(); 203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath MYVERIFY(meigen.isApprox(mref, eps),". * T"); 204c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 205c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid check_product(void) 207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int M, N, K; 209c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (uint i=0; i<1000; ++i) 210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath M = internal::random<int>(1,64); 212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath N = internal::random<int>(1,768); 213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath K = internal::random<int>(1,768); 214c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath M = (0 + M) * 1; 215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << M << " x " << N << " x " << K << "\n"; 216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath check_product(M, N, K); 217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 220