12b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// This file is part of Eigen, a lightweight C++ template library 22b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// for linear algebra. 32b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// 42b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Mehdi Goli Codeplay Software Ltd. 52b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Ralph Potter Codeplay Software Ltd. 62b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Luke Iwanski Codeplay Software Ltd. 72b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Cummins Chris PhD student at The University of Edinburgh. 82b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Contact: <eigen@codeplay.com> 92b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// 102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// This Source Code Form is subject to the terms of the Mozilla 112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Public License v. 2.0. If a copy of the MPL was not distributed 122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang/***************************************************************** 152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang * TensorSyclRun.h 162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang * 172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang * \brief: 182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang * Schedule_kernel invoke an specialised version of kernel struct. The 192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang * specialisation is based on the data dimension in sycl buffer 202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang * 212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang*****************************************************************/ 222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP 242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP 252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangnamespace Eigen { 272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangnamespace TensorSycl { 282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang/// The run function in tensor sycl convert the expression tree to a buffer 292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang/// based expression tree; 302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang/// creates the expression tree for the device with accessor to buffers; 312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang/// construct the kernel and submit it to the sycl queue. 322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate <typename Expr, typename Dev> 332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangvoid run(Expr &expr, Dev &dev) { 342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev); 352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); 362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (needs_assign) { 372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename internal::createPlaceHolderExpression<Expr>::Type PlaceHolderExpr; 382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang auto functors = internal::extractFunctors(evaluator); 392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2; 412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang dev.m_queue.submit([&](cl::sycl::handler &cgh) { 422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // create a tuple of accessors from Evaluator 442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang auto tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator); 452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const auto range = utility::tuple::get<0>(tuple_of_accessors).get_range()[0]; 462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t GRange=range; 472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (tileSize>GRange) tileSize=GRange; 482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang else if(GRange>tileSize){ 492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t xMode = GRange % tileSize; 502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (xMode != 0) GRange += (tileSize - xMode); 512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // run the kernel 532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) { 542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang typedef typename internal::ConvertToDeviceExpression<Expr>::Type DevExpr; 552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang auto device_expr =internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); 562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang auto device_evaluator = Eigen::TensorEvaluator<decltype(device_expr.expr), Eigen::DefaultDevice>(device_expr.expr, Eigen::DefaultDevice()); 572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (itemID.get_global_linear_id() < range) { 582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang device_evaluator.evalScalar(static_cast<int>(itemID.get_global_linear_id())); 592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang }); 612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang }); 622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang dev.m_queue.throw_asynchronous(); 632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang evaluator.cleanup(); 662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} // namespace TensorSycl 682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} // namespace Eigen 692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP 71