11e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 21e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 31e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsLicensed under the Apache License, Version 2.0 (the "License"); 41e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsyou may not use this file except in compliance with the License. 51e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsYou may obtain a copy of the License at 61e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 71e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins http://www.apache.org/licenses/LICENSE-2.0 81e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 91e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsUnless required by applicable law or agreed to in writing, software 101e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsdistributed under the License is distributed on an "AS IS" BASIS, 111e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 121e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsSee the License for the specific language governing permissions and 131e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinslimitations under the License. 141e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins==============================================================================*/ 151e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 161e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 171e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#define TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 181e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 191e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <map> 201e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <memory> 211e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <string> 221e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <vector> 231e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 241e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/service/compiler.h" 257d3497a639670d9c31d09185ff97b852f0fbe101HyoukJoong Lee#include "tensorflow/compiler/xla/service/computation_placer.h" 261e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/service/device_memory_allocator.h" 2761197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower#include "tensorflow/compiler/xla/service/pool.h" 281e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/service/transfer_manager.h" 291e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/statusor.h" 301e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/types.h" 311e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/lib/gtl/array_slice.h" 321e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/lib/strings/strcat.h" 331e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/platform/mutex.h" 341e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/platform/stream_executor_no_cuda.h" 351e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/platform/thread_annotations.h" 361e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 371e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsnamespace Eigen { 38b4d091d5a372f97af48192cb431985b20b447158Peter Hawkinsstruct ThreadPoolDevice; 391e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins} 401e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 411e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsnamespace xla { 421e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 43a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower// Options to configure the backend when it is created. 44a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlowerclass BackendOptions { 45a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower public: 46a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower // Set the platform backing the backend, or nullptr for the default platform. 47a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower BackendOptions& set_platform(perftools::gputools::Platform* platform); 48a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower perftools::gputools::Platform* platform() const; 49a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower 50a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower // Sets the thread pool size for parallel execution of an individual operator. 51a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower // The default value of -1 will result in initializing the thread pool with 52a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower // the number of threads equal to the number of cores in the system. 53a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower BackendOptions& set_intra_op_parallelism_threads(int num_threads); 54a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower int intra_op_parallelism_threads() const; 55a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower 56a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower private: 57a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower perftools::gputools::Platform* platform_ = nullptr; 58a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower int intra_op_parallelism_threads_ = -1; 59a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower}; 60a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower 611e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Class which encapsulates an XLA backend. It includes everything necessary 621e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// to compile and execute computations on a particular platform. 631e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// 641e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// It also offers a pooling API for creation/use of initialized streams: 651e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// 6661197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower// StreamPtr stream = backend->BorrowStream().ConsumeValueOrDie(); 671e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsclass Backend { 681e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins public: 6961197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower using StreamPtr = Pool<perftools::gputools::Stream>::SmartPtr; 7061197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower 7135af7113de0f15360246234f76e5dda5e927c556Eli Bendersky // Creates a new backend. 721e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins static StatusOr<std::unique_ptr<Backend>> CreateBackend( 73a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower const BackendOptions& options); 741e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 751e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Creates a backend for the default platform. The default platform is defined 761e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // in PlatformUtil. 771e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins static StatusOr<std::unique_ptr<Backend>> CreateDefaultBackend(); 781e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 791e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins ~Backend(); 801e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 811e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Accessors for the various objects. 821e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins perftools::gputools::Platform* platform() const { return platform_; } 831e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins Compiler* compiler() const { return compiler_; } 841e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins DeviceMemoryAllocator* memory_allocator() const { 851e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins return memory_allocator_.get(); 861e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins } 871e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins TransferManager* transfer_manager() const { return transfer_manager_; } 887d3497a639670d9c31d09185ff97b852f0fbe101HyoukJoong Lee ComputationPlacer* computation_placer() const { return computation_placer_; } 891e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 901e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Returns the number of devices of the platform type which are visible. Not 911e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // all of these devices may be usable by XLA. 921e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins int device_count() const { return stream_executors_.size(); } 931e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 941e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Returns the device ordinal number of the default device. 951e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins int default_device_ordinal() const; 961e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 971e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Returns stream executors of all supported devices for this backend. The 981e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // executors are ordered by the device ordinal. 991e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins const std::vector<perftools::gputools::StreamExecutor*>& stream_executors() 1001e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins const { 1011e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins return stream_executors_; 1021e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins } 1031e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1047d3497a639670d9c31d09185ff97b852f0fbe101HyoukJoong Lee // Returns the stream executor for the given device ordinal. 1051e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins StatusOr<perftools::gputools::StreamExecutor*> stream_executor( 1061e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins int device_ordinal) const; 1071e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1087d3497a639670d9c31d09185ff97b852f0fbe101HyoukJoong Lee // Returns the stream executor for the default device ordinal. This stream 1097d3497a639670d9c31d09185ff97b852f0fbe101HyoukJoong Lee // executor can only be used when the number of computations is 1 (replication 1107d3497a639670d9c31d09185ff97b852f0fbe101HyoukJoong Lee // can be > 1). 1111e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins perftools::gputools::StreamExecutor* default_stream_executor() const { 1121e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins CHECK(!stream_executors_.empty()); 1131e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins return stream_executors_[0]; 1141e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins } 1151e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 11661197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower // Borrows a stream for use by the caller, either by grabbing it from an 1171e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // internal pool, or by constructing/initializating it, and returns the result 1181e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // to the caller. 119112a534b50c0a23dec95382941ac0556f2866b29A. Unique TensorFlower StatusOr<StreamPtr> BorrowStream(int device_ordinal); 12061197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower StatusOr<StreamPtr> BorrowStream( 1211e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins perftools::gputools::StreamExecutor* executor); 1221e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 123112a534b50c0a23dec95382941ac0556f2866b29A. Unique TensorFlower // Returns a function to borrow a stream, as `BorrowStream` above does. 124112a534b50c0a23dec95382941ac0556f2866b29A. Unique TensorFlower // Purely for convenience, the caller could rather make this anonymous 125112a534b50c0a23dec95382941ac0556f2866b29A. Unique TensorFlower // function itself. 126112a534b50c0a23dec95382941ac0556f2866b29A. Unique TensorFlower std::function<StatusOr<StreamPtr>(int)> StreamBorrower() { 127112a534b50c0a23dec95382941ac0556f2866b29A. Unique TensorFlower return [this](int device_ordinal) { return BorrowStream(device_ordinal); }; 128112a534b50c0a23dec95382941ac0556f2866b29A. Unique TensorFlower } 129112a534b50c0a23dec95382941ac0556f2866b29A. Unique TensorFlower 1301e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Returns whether the given device ordinal of the backend is supported. 1311e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins bool device_ordinal_supported(int device_ordinal) const { 1321e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins return (device_ordinal >= 0 && device_ordinal < device_count() && 1331e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins stream_executors_[device_ordinal] != nullptr); 1341e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins } 1351e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1361e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Return a string identifier for the given device, eg: "GPU:3". 1371e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins string device_name(int device_ordinal) const { 1381e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins return tensorflow::strings::StrCat(platform_->Name(), ":", device_ordinal); 1391e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins } 1401e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1411e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Returns true if the devices with the given ordinals are equivalent from 1421e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // XLA's perspective. That is, an executable compiled for one device would 1431e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // be equivalent to an executable compiled for the other. 1441e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins StatusOr<bool> devices_equivalent(int device_ordinal_a, int device_ordinal_b); 1451e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1461e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // For the host platform, returns the threadpool to use when scheduling 1471e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // parallel operators. For other platforms, returns NULL. 1481e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins tensorflow::thread::ThreadPool* inter_op_thread_pool() const; 1491e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1501e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // For the host platform, returns the configured eigen threadpool device to be 1511e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // used for scheduling work. For other platforms, returns NULL. 1521e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins const Eigen::ThreadPoolDevice* eigen_intra_op_thread_pool_device() const; 153a20ebced22db1be959cdc9875f1a797fd3367712A. Unique TensorFlower tensorflow::thread::ThreadPool* eigen_intra_op_thread_pool() const; 1541e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 15599e1b19ceba32b8354dddc2841b81864c9ba96bbJacques Pienaar // Resets the devices associated with this backend. 15699e1b19ceba32b8354dddc2841b81864c9ba96bbJacques Pienaar Status ResetDevices(); 15799e1b19ceba32b8354dddc2841b81864c9ba96bbJacques Pienaar 1581e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins private: 1591e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins struct EigenThreadPoolWrapper; 16035af7113de0f15360246234f76e5dda5e927c556Eli Bendersky Backend(perftools::gputools::Platform* platform, Compiler* compiler, 1611e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins tensorflow::gtl::ArraySlice<perftools::gputools::StreamExecutor*> 1621e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins stream_executors, 1637d3497a639670d9c31d09185ff97b852f0fbe101HyoukJoong Lee TransferManager* transfer_manager, 1647d3497a639670d9c31d09185ff97b852f0fbe101HyoukJoong Lee ComputationPlacer* computation_placer, 1657d3497a639670d9c31d09185ff97b852f0fbe101HyoukJoong Lee int intra_op_parallelism_threads); 1661e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins Backend(const Backend&) = delete; 1671e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins Backend& operator=(const Backend&) = delete; 1681e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1691e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins perftools::gputools::Platform* platform_; 1701e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins Compiler* compiler_; 1711e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins TransferManager* transfer_manager_; 1727d3497a639670d9c31d09185ff97b852f0fbe101HyoukJoong Lee ComputationPlacer* computation_placer_; 1731e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1741e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Vector of stream executors. stream_executors_[0] is the default executor. 1751e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins std::vector<perftools::gputools::StreamExecutor*> stream_executors_; 1761e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 177414470329b203158a7ac670e99d73e7d04dbd724Jacques Pienaar tensorflow::mutex mu_; 178414470329b203158a7ac670e99d73e7d04dbd724Jacques Pienaar 17961197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower // Mapping from stream executor to stream pools, used by `BorrowStream` above. 1801e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins std::map<perftools::gputools::StreamExecutor*, 18161197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower Pool<perftools::gputools::Stream>> 182414470329b203158a7ac670e99d73e7d04dbd724Jacques Pienaar stream_pools_ GUARDED_BY(mu_); 1831e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1841e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // The default memory allocator to use. 1851e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins std::unique_ptr<StreamExecutorMemoryAllocator> memory_allocator_; 1861e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1871e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // For the CPU backend, a threadpool for scheduling parallel operators. 1881e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins std::unique_ptr<tensorflow::thread::ThreadPool> inter_op_thread_pool_; 1891e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1901e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // For the CPU backend, an Eigen threadpool device for use by Eigen code. 1911e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins std::unique_ptr<EigenThreadPoolWrapper> intra_op_thread_pool_wrapper_; 1921e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins}; 1931e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1941e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins} // namespace xla 1951e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1961e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#endif // TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 197