1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 17#define TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 18 19#include <map> 20#include <memory> 21#include <string> 22#include <vector> 23 24#include "tensorflow/compiler/xla/service/compiler.h" 25#include "tensorflow/compiler/xla/service/computation_placer.h" 26#include "tensorflow/compiler/xla/service/device_memory_allocator.h" 27#include "tensorflow/compiler/xla/service/pool.h" 28#include "tensorflow/compiler/xla/service/transfer_manager.h" 29#include "tensorflow/compiler/xla/statusor.h" 30#include "tensorflow/compiler/xla/types.h" 31#include "tensorflow/core/lib/gtl/array_slice.h" 32#include "tensorflow/core/lib/strings/strcat.h" 33#include "tensorflow/core/platform/mutex.h" 34#include "tensorflow/core/platform/stream_executor_no_cuda.h" 35#include "tensorflow/core/platform/thread_annotations.h" 36 37namespace Eigen { 38struct ThreadPoolDevice; 39} 40 41namespace xla { 42 43// Options to configure the backend when it is created. 44class BackendOptions { 45 public: 46 // Set the platform backing the backend, or nullptr for the default platform. 47 BackendOptions& set_platform(perftools::gputools::Platform* platform); 48 perftools::gputools::Platform* platform() const; 49 50 // Sets the thread pool size for parallel execution of an individual operator. 51 // The default value of -1 will result in initializing the thread pool with 52 // the number of threads equal to the number of cores in the system. 53 BackendOptions& set_intra_op_parallelism_threads(int num_threads); 54 int intra_op_parallelism_threads() const; 55 56 private: 57 perftools::gputools::Platform* platform_ = nullptr; 58 int intra_op_parallelism_threads_ = -1; 59}; 60 61// Class which encapsulates an XLA backend. It includes everything necessary 62// to compile and execute computations on a particular platform. 63// 64// It also offers a pooling API for creation/use of initialized streams: 65// 66// StreamPtr stream = backend->BorrowStream().ConsumeValueOrDie(); 67class Backend { 68 public: 69 using StreamPtr = Pool<perftools::gputools::Stream>::SmartPtr; 70 71 // Creates a new backend. 72 static StatusOr<std::unique_ptr<Backend>> CreateBackend( 73 const BackendOptions& options); 74 75 // Creates a backend for the default platform. The default platform is defined 76 // in PlatformUtil. 77 static StatusOr<std::unique_ptr<Backend>> CreateDefaultBackend(); 78 79 ~Backend(); 80 81 // Accessors for the various objects. 82 perftools::gputools::Platform* platform() const { return platform_; } 83 Compiler* compiler() const { return compiler_; } 84 DeviceMemoryAllocator* memory_allocator() const { 85 return memory_allocator_.get(); 86 } 87 TransferManager* transfer_manager() const { return transfer_manager_; } 88 ComputationPlacer* computation_placer() const { return computation_placer_; } 89 90 // Returns the number of devices of the platform type which are visible. Not 91 // all of these devices may be usable by XLA. 92 int device_count() const { return stream_executors_.size(); } 93 94 // Returns the device ordinal number of the default device. 95 int default_device_ordinal() const; 96 97 // Returns stream executors of all supported devices for this backend. The 98 // executors are ordered by the device ordinal. 99 const std::vector<perftools::gputools::StreamExecutor*>& stream_executors() 100 const { 101 return stream_executors_; 102 } 103 104 // Returns the stream executor for the given device ordinal. 105 StatusOr<perftools::gputools::StreamExecutor*> stream_executor( 106 int device_ordinal) const; 107 108 // Returns the stream executor for the default device ordinal. This stream 109 // executor can only be used when the number of computations is 1 (replication 110 // can be > 1). 111 perftools::gputools::StreamExecutor* default_stream_executor() const { 112 CHECK(!stream_executors_.empty()); 113 return stream_executors_[0]; 114 } 115 116 // Borrows a stream for use by the caller, either by grabbing it from an 117 // internal pool, or by constructing/initializating it, and returns the result 118 // to the caller. 119 StatusOr<StreamPtr> BorrowStream(int device_ordinal); 120 StatusOr<StreamPtr> BorrowStream( 121 perftools::gputools::StreamExecutor* executor); 122 123 // Returns a function to borrow a stream, as `BorrowStream` above does. 124 // Purely for convenience, the caller could rather make this anonymous 125 // function itself. 126 std::function<StatusOr<StreamPtr>(int)> StreamBorrower() { 127 return [this](int device_ordinal) { return BorrowStream(device_ordinal); }; 128 } 129 130 // Returns whether the given device ordinal of the backend is supported. 131 bool device_ordinal_supported(int device_ordinal) const { 132 return (device_ordinal >= 0 && device_ordinal < device_count() && 133 stream_executors_[device_ordinal] != nullptr); 134 } 135 136 // Return a string identifier for the given device, eg: "GPU:3". 137 string device_name(int device_ordinal) const { 138 return tensorflow::strings::StrCat(platform_->Name(), ":", device_ordinal); 139 } 140 141 // Returns true if the devices with the given ordinals are equivalent from 142 // XLA's perspective. That is, an executable compiled for one device would 143 // be equivalent to an executable compiled for the other. 144 StatusOr<bool> devices_equivalent(int device_ordinal_a, int device_ordinal_b); 145 146 // For the host platform, returns the threadpool to use when scheduling 147 // parallel operators. For other platforms, returns NULL. 148 tensorflow::thread::ThreadPool* inter_op_thread_pool() const; 149 150 // For the host platform, returns the configured eigen threadpool device to be 151 // used for scheduling work. For other platforms, returns NULL. 152 const Eigen::ThreadPoolDevice* eigen_intra_op_thread_pool_device() const; 153 tensorflow::thread::ThreadPool* eigen_intra_op_thread_pool() const; 154 155 // Resets the devices associated with this backend. 156 Status ResetDevices(); 157 158 private: 159 struct EigenThreadPoolWrapper; 160 Backend(perftools::gputools::Platform* platform, Compiler* compiler, 161 tensorflow::gtl::ArraySlice<perftools::gputools::StreamExecutor*> 162 stream_executors, 163 TransferManager* transfer_manager, 164 ComputationPlacer* computation_placer, 165 int intra_op_parallelism_threads); 166 Backend(const Backend&) = delete; 167 Backend& operator=(const Backend&) = delete; 168 169 perftools::gputools::Platform* platform_; 170 Compiler* compiler_; 171 TransferManager* transfer_manager_; 172 ComputationPlacer* computation_placer_; 173 174 // Vector of stream executors. stream_executors_[0] is the default executor. 175 std::vector<perftools::gputools::StreamExecutor*> stream_executors_; 176 177 tensorflow::mutex mu_; 178 179 // Mapping from stream executor to stream pools, used by `BorrowStream` above. 180 std::map<perftools::gputools::StreamExecutor*, 181 Pool<perftools::gputools::Stream>> 182 stream_pools_ GUARDED_BY(mu_); 183 184 // The default memory allocator to use. 185 std::unique_ptr<StreamExecutorMemoryAllocator> memory_allocator_; 186 187 // For the CPU backend, a threadpool for scheduling parallel operators. 188 std::unique_ptr<tensorflow::thread::ThreadPool> inter_op_thread_pool_; 189 190 // For the CPU backend, an Eigen threadpool device for use by Eigen code. 191 std::unique_ptr<EigenThreadPoolWrapper> intra_op_thread_pool_wrapper_; 192}; 193 194} // namespace xla 195 196#endif // TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 197