12b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// This file is part of Eigen, a lightweight C++ template library
22b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// for linear algebra.
32b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang//
42b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Mehdi Goli    Codeplay Software Ltd.
52b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Ralph Potter  Codeplay Software Ltd.
62b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Luke Iwanski  Codeplay Software Ltd.
72b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Cummins Chris PhD student at The University of Edinburgh.
82b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Contact: <eigen@codeplay.com>
92b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang//
102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// This Source Code Form is subject to the terms of the Mozilla
112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Public License v. 2.0. If a copy of the MPL was not distributed
122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang/*****************************************************************
152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang * TensorSyclRun.h
162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang *
172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang * \brief:
182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang * Schedule_kernel invoke an specialised version of kernel struct. The
192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang * specialisation is based on the data dimension in sycl buffer
202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang *
212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang*****************************************************************/
222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangnamespace Eigen {
272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangnamespace TensorSycl {
282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang/// The run function in tensor sycl convert the expression tree to a buffer
292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang/// based expression tree;
302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang/// creates the expression tree for the device with accessor to buffers;
312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang/// construct the kernel and submit it to the sycl queue.
322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate <typename Expr, typename Dev>
332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangvoid run(Expr &expr, Dev &dev) {
342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev);
352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  if (needs_assign) {
372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    typedef  typename internal::createPlaceHolderExpression<Expr>::Type PlaceHolderExpr;
382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    auto functors = internal::extractFunctors(evaluator);
392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2;
412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    dev.m_queue.submit([&](cl::sycl::handler &cgh) {
422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang      // create a tuple of accessors from Evaluator
442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang      auto tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator);
452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang      const auto range = utility::tuple::get<0>(tuple_of_accessors).get_range()[0];
462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang      size_t GRange=range;
472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang      if (tileSize>GRange) tileSize=GRange;
482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang      else if(GRange>tileSize){
492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang        size_t xMode = GRange % tileSize;
502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang        if (xMode != 0) GRange += (tileSize - xMode);
512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang      }
522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang      // run the kernel
532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang      cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) {
542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang        typedef  typename internal::ConvertToDeviceExpression<Expr>::Type DevExpr;
552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang        auto device_expr =internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang        auto device_evaluator = Eigen::TensorEvaluator<decltype(device_expr.expr), Eigen::DefaultDevice>(device_expr.expr, Eigen::DefaultDevice());
572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang        if (itemID.get_global_linear_id() < range) {
582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang          device_evaluator.evalScalar(static_cast<int>(itemID.get_global_linear_id()));
592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang        }
602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang      });
612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    });
622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang    dev.m_queue.throw_asynchronous();
632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  }
642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang  evaluator.cleanup();
662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}
672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}  // namespace TensorSycl
682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}  // namespace Eigen
692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang
702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
71