1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This file is part of Eigen, a lightweight C++ template library 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// for linear algebra. 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// This Source Code Form is subject to the terms of the Mozilla 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Public License v. 2.0. If a copy of the MPL was not distributed 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifndef EIGEN_PARALLELIZER_H 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define EIGEN_PARALLELIZER_H 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace Eigen { 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \internal */ 18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline void manage_multi_threading(Action action, int* v) 19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static EIGEN_UNUSED int m_maxThreads = -1; 21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(action==SetAction) 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eigen_internal_assert(v!=0); 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath m_maxThreads = *v; 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else if(action==GetAction) 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eigen_internal_assert(v!=0); 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifdef EIGEN_HAS_OPENMP 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(m_maxThreads>0) 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath *v = m_maxThreads; 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath *v = omp_get_max_threads(); 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #else 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath *v = 1; 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eigen_internal_assert(false); 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** Must be call first when calling Eigen from multiple threads */ 48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline void initParallel() 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int nbt; 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath internal::manage_multi_threading(GetAction, &nbt); 52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::ptrdiff_t l1, l2; 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath internal::manage_caching_sizes(GetAction, &l1, &l2); 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** \returns the max number of threads reserved for Eigen 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * \sa setNbThreads */ 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline int nbThreads() 59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int ret; 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath internal::manage_multi_threading(GetAction, &ret); 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return ret; 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/** Sets the max number of threads reserved for Eigen 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * \sa nbThreads */ 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathinline void setNbThreads(int v) 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath internal::manage_multi_threading(SetAction, &v); 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathnamespace internal { 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename Index> struct GemmParallelInfo 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {} 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int volatile sync; 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int volatile users; 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index rhs_start; 82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index rhs_length; 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}; 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<bool Condition, typename Functor, typename Index> 86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose) 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // TODO when EIGEN_USE_BLAS is defined, 89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // we should still enable OMP for other scalar types 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS) 91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // FIXME the transpose variable is only needed to properly split 92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // the matrix product when multithreading is enabled. This is a temporary 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // fix to support row-major destination matrices. This whole 94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // parallelizer mechanism has to be redisigned anyway. 95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath EIGEN_UNUSED_VARIABLE(transpose); 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath func(0,rows, 0,cols); 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#else 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Dynamically check whether we should enable or disable OpenMP. 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // The conditions are: 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // - the max number of threads we can create is greater than 1 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // - we are not already in a parallel code 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // - the sizes are large enough 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // 1- are we already in a parallel session? 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp? 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if((!Condition) || (omp_get_num_threads()>1)) 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return func(0,rows, 0,cols); 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index size = transpose ? cols : rows; 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // 2- compute the maximal number of threads from the size of the product: 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // FIXME this has to be fine tuned 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index max_threads = std::max<Index>(1,size / 32); 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // 3 - compute the number of threads we are going to use 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index threads = std::min<Index>(nbThreads(), max_threads); 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(threads==1) 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return func(0,rows, 0,cols); 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Eigen::initParallel(); 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath func.initParallelSession(); 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(transpose) 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::swap(rows,cols); 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index blockCols = (cols / threads) & ~Index(0x3); 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index blockRows = (rows / threads) & ~Index(0x7); 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads]; 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #pragma omp parallel for schedule(static,1) num_threads(threads) 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(Index i=0; i<threads; ++i) 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index r0 = i*blockRows; 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows; 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index c0 = i*blockCols; 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols; 141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath info[i].rhs_start = c0; 143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath info[i].rhs_length = actualBlockCols; 144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(transpose) 146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath func(0, cols, r0, actualBlockRows, info); 147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath func(r0, actualBlockRows, 0,cols, info); 149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath delete[] info; 152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace internal 156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} // end namespace Eigen 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif // EIGEN_PARALLELIZER_H 160