backend.h revision 61197393ab39929e945e9adf1378659a5c2bbab1
11e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
21e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
31e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsLicensed under the Apache License, Version 2.0 (the "License");
41e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsyou may not use this file except in compliance with the License.
51e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsYou may obtain a copy of the License at
61e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
71e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    http://www.apache.org/licenses/LICENSE-2.0
81e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
91e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsUnless required by applicable law or agreed to in writing, software
101e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsdistributed under the License is distributed on an "AS IS" BASIS,
111e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
121e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsSee the License for the specific language governing permissions and
131e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinslimitations under the License.
141e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins==============================================================================*/
151e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
161e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_
171e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#define TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_
181e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
191e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <map>
201e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <memory>
211e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <string>
221e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <vector>
231e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
241e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/service/compiler.h"
251e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/service/device_memory_allocator.h"
2661197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower#include "tensorflow/compiler/xla/service/pool.h"
271e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/service/transfer_manager.h"
281e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/statusor.h"
291e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/types.h"
301e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/lib/gtl/array_slice.h"
311e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/lib/strings/strcat.h"
321e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/platform/mutex.h"
331e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/platform/stream_executor_no_cuda.h"
341e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/platform/thread_annotations.h"
351e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
361e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsnamespace Eigen {
371e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsclass ThreadPoolDevice;
381e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins}
391e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
401e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsnamespace xla {
411e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
421e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// Class which encapsulates an XLA backend. It includes everything necessary
431e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// to compile and execute computations on a particular platform.
441e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins//
451e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// It also offers a pooling API for creation/use of initialized streams:
461e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins//
4761197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower//    StreamPtr stream = backend->BorrowStream().ConsumeValueOrDie();
481e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsclass Backend {
491e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins public:
5061197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower  using StreamPtr = Pool<perftools::gputools::Stream>::SmartPtr;
5161197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower
521e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // The number of streams we create for the pool at initialization time.
531e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  static constexpr int kInitialStreamsToPool = 8;
541e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
551e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Creates a new backend for the given platform with the given number of
561e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // replicas. A value of -1 means to use the flag value.
571e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  static StatusOr<std::unique_ptr<Backend>> CreateBackend(
581e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      perftools::gputools::Platform* platform, int64 replica_count = -1);
591e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
601e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Creates a backend for the default platform. The default platform is defined
611e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // in PlatformUtil.
621e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  static StatusOr<std::unique_ptr<Backend>> CreateDefaultBackend();
631e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
641e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  ~Backend();
651e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
661e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Accessors for the various objects.
671e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  perftools::gputools::Platform* platform() const { return platform_; }
681e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  Compiler* compiler() const { return compiler_; }
691e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  DeviceMemoryAllocator* memory_allocator() const {
701e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    return memory_allocator_.get();
711e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  }
721e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  TransferManager* transfer_manager() const { return transfer_manager_; }
731e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
741e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Returns the number of devices of the platform type which are visible. Not
751e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // all of these devices may be usable by XLA.
761e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  int device_count() const { return stream_executors_.size(); }
771e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
781e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Returns the device ordinal number of the default device.
791e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  int default_device_ordinal() const;
801e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
811e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Returns stream executors of all supported devices for this backend. The
821e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // executors are ordered by the device ordinal.
831e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  const std::vector<perftools::gputools::StreamExecutor*>& stream_executors()
841e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      const {
851e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    return stream_executors_;
861e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  }
871e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
881e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Returns the replicas for the default stream executor.
891e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  //
901e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // When the number of replicas is R, the first R stream executors are assigned
911e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // to the replicas of the default stream executor.
921e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  std::vector<perftools::gputools::StreamExecutor*> Replicas() const;
931e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
941e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Returns the replicas for the given device_ordinal. The given device ordinal
951e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // is considered to be the first device ordinal among the replicas. Returns an
961e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // error status if the stream executor for the given given device ordinal does
971e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // not exist or if there are not enough stream executors for the replicas.
981e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  StatusOr<std::vector<perftools::gputools::StreamExecutor*>> Replicas(
991e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      int device_ordinal) const;
1001e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1011e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Return the stream executor for the given device ordinal.
1021e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  StatusOr<perftools::gputools::StreamExecutor*> stream_executor(
1031e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      int device_ordinal) const;
1041e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1051e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Return the stream executor for the default device ordinal.
1061e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  perftools::gputools::StreamExecutor* default_stream_executor() const {
1071e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    CHECK(!stream_executors_.empty());
1081e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    return stream_executors_[0];
1091e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  }
1101e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
11161197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower  // Primes the internal pool of streams for BorrowStream with n initialized
11261197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower  // stream instances.
1131e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  tensorflow::Status PoolStreams(int n,
1141e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins                                 perftools::gputools::StreamExecutor* executor);
1151e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
11661197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower  // Borrows a stream for use by the caller, either by grabbing it from an
1171e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // internal pool, or by constructing/initializating it, and returns the result
1181e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // to the caller.
11961197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower  StatusOr<StreamPtr> BorrowStream(
1201e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      perftools::gputools::StreamExecutor* executor);
1211e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1221e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Returns whether the given device ordinal of the backend is supported.
1231e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  bool device_ordinal_supported(int device_ordinal) const {
1241e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    return (device_ordinal >= 0 && device_ordinal < device_count() &&
1251e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins            stream_executors_[device_ordinal] != nullptr);
1261e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  }
1271e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1281e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Return a string identifier for the given device, eg: "GPU:3".
1291e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  string device_name(int device_ordinal) const {
1301e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    return tensorflow::strings::StrCat(platform_->Name(), ":", device_ordinal);
1311e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  }
1321e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1331e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Returns true if the devices with the given ordinals are equivalent from
1341e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // XLA's perspective. That is, an executable compiled for one device would
1351e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // be equivalent to an executable compiled for the other.
1361e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  StatusOr<bool> devices_equivalent(int device_ordinal_a, int device_ordinal_b);
1371e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1381e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // For the host platform, returns the threadpool to use when scheduling
1391e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // parallel operators. For other platforms, returns NULL.
1401e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  tensorflow::thread::ThreadPool* inter_op_thread_pool() const;
1411e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1421e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // For the host platform, returns the configured eigen threadpool device to be
1431e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // used for scheduling work. For other platforms, returns NULL.
1441e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  const Eigen::ThreadPoolDevice* eigen_intra_op_thread_pool_device() const;
1451e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
14699e1b19ceba32b8354dddc2841b81864c9ba96bbJacques Pienaar  // Resets the devices associated with this backend.
14799e1b19ceba32b8354dddc2841b81864c9ba96bbJacques Pienaar  Status ResetDevices();
14899e1b19ceba32b8354dddc2841b81864c9ba96bbJacques Pienaar
1491e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins private:
1501e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  struct EigenThreadPoolWrapper;
1511e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  Backend(int64 replica_count, perftools::gputools::Platform* platform,
1521e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins          Compiler* compiler,
1531e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins          tensorflow::gtl::ArraySlice<perftools::gputools::StreamExecutor*>
1541e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins              stream_executors,
1551e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins          TransferManager* transfer_manager);
1561e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  Backend(const Backend&) = delete;
1571e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  Backend& operator=(const Backend&) = delete;
1581e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1591e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  perftools::gputools::Platform* platform_;
1601e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  Compiler* compiler_;
1611e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  TransferManager* transfer_manager_;
1621e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  int64 replica_count_ = -1;
1631e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1641e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // Vector of stream executors. stream_executors_[0] is the default executor.
1651e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  std::vector<perftools::gputools::StreamExecutor*> stream_executors_;
1661e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
16761197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower  // Mapping from stream executor to stream pools, used by `BorrowStream` above.
1681e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  std::map<perftools::gputools::StreamExecutor*,
16961197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower           Pool<perftools::gputools::Stream>>
17061197393ab39929e945e9adf1378659a5c2bbab1A. Unique TensorFlower      stream_pools_;
1711e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1721e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // The default memory allocator to use.
1731e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  std::unique_ptr<StreamExecutorMemoryAllocator> memory_allocator_;
1741e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1751e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // For the CPU backend, a threadpool for scheduling parallel operators.
1761e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  std::unique_ptr<tensorflow::thread::ThreadPool> inter_op_thread_pool_;
1771e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1781e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // For the CPU backend, an Eigen threadpool device for use by Eigen code.
1791e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  std::unique_ptr<EigenThreadPoolWrapper> intra_op_thread_pool_wrapper_;
1801e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins};
1811e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1821e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins}  // namespace xla
1831e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1841e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_
185