backend.h revision 61197393ab39929e945e9adf1378659a5c2bbab1
11e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 21e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 31e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsLicensed under the Apache License, Version 2.0 (the "License"); 41e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsyou may not use this file except in compliance with the License. 51e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsYou may obtain a copy of the License at 61e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 71e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins http://www.apache.org/licenses/LICENSE-2.0 81e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 91e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsUnless required by applicable law or agreed to in writing, software 101e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsdistributed under the License is distributed on an "AS IS" BASIS, 111e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 121e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsSee the License for the specific language governing permissions and 131e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinslimitations under the License. 141e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins==============================================================================*/ 151e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 161e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 171e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#define TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 181e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 191e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <map> 201e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <memory> 211e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <string> 221e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <vector> 231e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 241e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/service/compiler.h" 251e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/service/device_memory_allocator.h" 2661197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower#include "tensorflow/compiler/xla/service/pool.h" 271e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/service/transfer_manager.h" 281e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/statusor.h" 291e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/types.h" 301e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/lib/gtl/array_slice.h" 311e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/lib/strings/strcat.h" 321e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/platform/mutex.h" 331e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/platform/stream_executor_no_cuda.h" 341e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/platform/thread_annotations.h" 351e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 361e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsnamespace Eigen { 371e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsclass ThreadPoolDevice; 381e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins} 391e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 401e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsnamespace xla { 411e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 421e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Class which encapsulates an XLA backend. It includes everything necessary 431e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// to compile and execute computations on a particular platform. 441e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// 451e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// It also offers a pooling API for creation/use of initialized streams: 461e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// 4761197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower// StreamPtr stream = backend->BorrowStream().ConsumeValueOrDie(); 481e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsclass Backend { 491e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins public: 5061197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower using StreamPtr = Pool<perftools::gputools::Stream>::SmartPtr; 5161197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower 521e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // The number of streams we create for the pool at initialization time. 531e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins static constexpr int kInitialStreamsToPool = 8; 541e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 551e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Creates a new backend for the given platform with the given number of 561e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // replicas. A value of -1 means to use the flag value. 571e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins static StatusOr<std::unique_ptr<Backend>> CreateBackend( 581e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins perftools::gputools::Platform* platform, int64 replica_count = -1); 591e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 601e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Creates a backend for the default platform. The default platform is defined 611e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // in PlatformUtil. 621e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins static StatusOr<std::unique_ptr<Backend>> CreateDefaultBackend(); 631e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 641e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins ~Backend(); 651e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 661e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Accessors for the various objects. 671e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins perftools::gputools::Platform* platform() const { return platform_; } 681e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins Compiler* compiler() const { return compiler_; } 691e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins DeviceMemoryAllocator* memory_allocator() const { 701e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins return memory_allocator_.get(); 711e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins } 721e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins TransferManager* transfer_manager() const { return transfer_manager_; } 731e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 741e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Returns the number of devices of the platform type which are visible. Not 751e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // all of these devices may be usable by XLA. 761e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins int device_count() const { return stream_executors_.size(); } 771e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 781e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Returns the device ordinal number of the default device. 791e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins int default_device_ordinal() const; 801e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 811e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Returns stream executors of all supported devices for this backend. The 821e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // executors are ordered by the device ordinal. 831e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins const std::vector<perftools::gputools::StreamExecutor*>& stream_executors() 841e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins const { 851e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins return stream_executors_; 861e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins } 871e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 881e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Returns the replicas for the default stream executor. 891e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // 901e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // When the number of replicas is R, the first R stream executors are assigned 911e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // to the replicas of the default stream executor. 921e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins std::vector<perftools::gputools::StreamExecutor*> Replicas() const; 931e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 941e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Returns the replicas for the given device_ordinal. The given device ordinal 951e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // is considered to be the first device ordinal among the replicas. Returns an 961e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // error status if the stream executor for the given given device ordinal does 971e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // not exist or if there are not enough stream executors for the replicas. 981e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins StatusOr<std::vector<perftools::gputools::StreamExecutor*>> Replicas( 991e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins int device_ordinal) const; 1001e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1011e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Return the stream executor for the given device ordinal. 1021e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins StatusOr<perftools::gputools::StreamExecutor*> stream_executor( 1031e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins int device_ordinal) const; 1041e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1051e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Return the stream executor for the default device ordinal. 1061e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins perftools::gputools::StreamExecutor* default_stream_executor() const { 1071e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins CHECK(!stream_executors_.empty()); 1081e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins return stream_executors_[0]; 1091e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins } 1101e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 11161197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower // Primes the internal pool of streams for BorrowStream with n initialized 11261197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower // stream instances. 1131e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins tensorflow::Status PoolStreams(int n, 1141e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins perftools::gputools::StreamExecutor* executor); 1151e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 11661197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower // Borrows a stream for use by the caller, either by grabbing it from an 1171e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // internal pool, or by constructing/initializating it, and returns the result 1181e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // to the caller. 11961197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower StatusOr<StreamPtr> BorrowStream( 1201e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins perftools::gputools::StreamExecutor* executor); 1211e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1221e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Returns whether the given device ordinal of the backend is supported. 1231e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins bool device_ordinal_supported(int device_ordinal) const { 1241e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins return (device_ordinal >= 0 && device_ordinal < device_count() && 1251e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins stream_executors_[device_ordinal] != nullptr); 1261e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins } 1271e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1281e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Return a string identifier for the given device, eg: "GPU:3". 1291e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins string device_name(int device_ordinal) const { 1301e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins return tensorflow::strings::StrCat(platform_->Name(), ":", device_ordinal); 1311e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins } 1321e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1331e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Returns true if the devices with the given ordinals are equivalent from 1341e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // XLA's perspective. That is, an executable compiled for one device would 1351e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // be equivalent to an executable compiled for the other. 1361e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins StatusOr<bool> devices_equivalent(int device_ordinal_a, int device_ordinal_b); 1371e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1381e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // For the host platform, returns the threadpool to use when scheduling 1391e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // parallel operators. For other platforms, returns NULL. 1401e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins tensorflow::thread::ThreadPool* inter_op_thread_pool() const; 1411e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1421e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // For the host platform, returns the configured eigen threadpool device to be 1431e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // used for scheduling work. For other platforms, returns NULL. 1441e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins const Eigen::ThreadPoolDevice* eigen_intra_op_thread_pool_device() const; 1451e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 14699e1b19ceba32b8354dddc2841b81864c9ba96bbJacques Pienaar // Resets the devices associated with this backend. 14799e1b19ceba32b8354dddc2841b81864c9ba96bbJacques Pienaar Status ResetDevices(); 14899e1b19ceba32b8354dddc2841b81864c9ba96bbJacques Pienaar 1491e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins private: 1501e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins struct EigenThreadPoolWrapper; 1511e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins Backend(int64 replica_count, perftools::gputools::Platform* platform, 1521e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins Compiler* compiler, 1531e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins tensorflow::gtl::ArraySlice<perftools::gputools::StreamExecutor*> 1541e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins stream_executors, 1551e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins TransferManager* transfer_manager); 1561e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins Backend(const Backend&) = delete; 1571e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins Backend& operator=(const Backend&) = delete; 1581e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1591e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins perftools::gputools::Platform* platform_; 1601e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins Compiler* compiler_; 1611e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins TransferManager* transfer_manager_; 1621e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins int64 replica_count_ = -1; 1631e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1641e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // Vector of stream executors. stream_executors_[0] is the default executor. 1651e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins std::vector<perftools::gputools::StreamExecutor*> stream_executors_; 1661e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 16761197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower // Mapping from stream executor to stream pools, used by `BorrowStream` above. 1681e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins std::map<perftools::gputools::StreamExecutor*, 16961197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower Pool<perftools::gputools::Stream>> 17061197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower stream_pools_; 1711e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1721e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // The default memory allocator to use. 1731e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins std::unique_ptr<StreamExecutorMemoryAllocator> memory_allocator_; 1741e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1751e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // For the CPU backend, a threadpool for scheduling parallel operators. 1761e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins std::unique_ptr<tensorflow::thread::ThreadPool> inter_op_thread_pool_; 1771e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1781e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins // For the CPU backend, an Eigen threadpool device for use by Eigen code. 1791e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins std::unique_ptr<EigenThreadPoolWrapper> intra_op_thread_pool_wrapper_; 1801e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins}; 1811e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1821e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins} // namespace xla 1831e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins 1841e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#endif // TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ 185