gpu_transfer_manager.cc revision 70062d11bf11d6579bfdbc87c3350a0074a12ae8
1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.h"
17
18#include <string>
19#include <utility>
20#include <vector>
21
22#include "llvm/IR/DataLayout.h"
23#include "tensorflow/compiler/xla/literal_util.h"
24#include "tensorflow/compiler/xla/service/gpu/gpu_compiler.h"
25#include "tensorflow/compiler/xla/shape_util.h"
26#include "tensorflow/compiler/xla/status_macros.h"
27#include "tensorflow/compiler/xla/statusor.h"
28#include "tensorflow/compiler/xla/types.h"
29#include "tensorflow/compiler/xla/util.h"
30#include "tensorflow/compiler/xla/xla_data.pb.h"
31#include "tensorflow/core/lib/core/errors.h"
32#include "tensorflow/core/lib/gtl/cleanup.h"
33#include "tensorflow/core/platform/logging.h"
34#include "tensorflow/core/platform/stream_executor_no_cuda.h"
35
36namespace se = ::perftools::gputools;
37
38namespace xla {
39
40// TODO(b/30467474) Once GPU infeed implementation settles, consider
41// folding back the cpu and gpu infeed implementations into a generic
42// one if possible.
43GpuTransferManager::GpuTransferManager()
44    : GenericTransferManager(
45          se::cuda::kCudaPlatformId,
46          /*pointer_size=*/llvm::DataLayout(gpu::GpuCompiler::kDataLayout)
47              .getPointerSize(0 /* default address space */)) {}
48
49Status GpuTransferManager::TransferLiteralToInfeed(se::StreamExecutor* executor,
50                                                   const Literal& literal) {
51  const Shape& shape = literal.shape();
52  VLOG(2) << "Transferring literal to infeed with shape: "
53          << ShapeUtil::HumanString(shape);
54
55  if (!ShapeUtil::IsTuple(shape)) {
56    int64 size = GetByteSizeRequirement(shape);
57    return TransferBufferToInfeed(executor, size, literal.InternalData());
58  }
59
60  if (ShapeUtil::IsNestedTuple(shape)) {
61    return Unimplemented(
62        "Infeed with a nested tuple shape is not supported: %s",
63        ShapeUtil::HumanString(literal.shape()).c_str());
64  }
65
66  // For a tuple, we transfer each of its elements to the device and
67  // enqueue the resulting destination device addresses with the
68  // infeed manager.
69  std::vector<gpu::InfeedBuffer*> buffers;
70  buffers.reserve(literal.tuple_literals_size());
71  auto cleanup = tensorflow::gtl::MakeCleanup([buffers]() {
72    for (gpu::InfeedBuffer* b : buffers) {
73      b->Done();
74    }
75  });
76
77  for (const auto& tuple_element : literal.tuple_literals()) {
78    const Shape& tuple_element_shape = tuple_element.shape();
79    int64 tuple_element_size = GetByteSizeRequirement(tuple_element_shape);
80    TF_ASSIGN_OR_RETURN(
81        gpu::InfeedBuffer * buffer,
82        TransferBufferToInfeedInternal(executor, tuple_element_size,
83                                       tuple_element.InternalData()));
84    buffers.push_back(buffer);
85  }
86
87  cleanup.release();
88  return EnqueueBuffersToInfeed(executor, buffers);
89}
90
91Status GpuTransferManager::TransferBufferToInfeed(se::StreamExecutor* executor,
92                                                  int64 size,
93                                                  const void* source) {
94  TF_ASSIGN_OR_RETURN(gpu::InfeedBuffer * buffer,
95                      TransferBufferToInfeedInternal(executor, size, source));
96  return EnqueueBuffersToInfeed(executor, {buffer});
97}
98
99Status GpuTransferManager::EnqueueBuffersToInfeed(
100    se::StreamExecutor* executor, std::vector<gpu::InfeedBuffer*> buffers) {
101  gpu::InfeedManager* infeed_manager = gpu::GetOrCreateInfeedManager();
102  se::Stream* stream = infeed_manager->GetStream(executor);
103
104  // TODO(b/30467474): Since this stream is shared across different
105  // infeed requests, blocking on the stream might be
106  // heavy-handed. Figure out if finer-grained acknowledgement is
107  // possible.
108  Status block_status = stream->BlockHostUntilDone();
109  if (!block_status.ok()) {
110    for (gpu::InfeedBuffer* b : buffers) {
111      b->Done();
112    }
113    return InternalError("Failed to complete data transfer on stream %p: %s",
114                         stream, block_status.error_message().c_str());
115  }
116
117  infeed_manager->EnqueueBuffers(buffers);
118
119  VLOG(2) << "Infeed data transferred";
120
121  return Status::OK();
122}
123
124StatusOr<gpu::InfeedBuffer*> GpuTransferManager::TransferBufferToInfeedInternal(
125    se::StreamExecutor* executor, int64 size, const void* source) {
126  if (size > std::numeric_limits<int32>::max()) {
127    return InvalidArgument("Infeed shape is too large: needs %lld bytes", size);
128  }
129
130  if (size == 0) {
131    return InvalidArgument("Infeed shape needs 0 bytes");
132  }
133
134  gpu::InfeedManager* infeed_manager = gpu::GetOrCreateInfeedManager();
135  se::Stream* stream = infeed_manager->GetStream(executor);
136  if (stream == nullptr) {
137    return InternalError("Failed to obtain a stream");
138  }
139
140  gpu::InfeedBuffer* buffer = new gpu::InfeedBuffer(executor, size);
141  stream->ThenMemcpy(buffer->device_memory(), source, size);
142
143  VLOG(2) << "Queued infeed data on stream " << stream;
144
145  return buffer;
146}
147
148}  // namespace xla
149
150static std::unique_ptr<xla::TransferManager> CreateGpuTransferManager() {
151  return xla::MakeUnique<xla::GpuTransferManager>();
152}
153
154static bool InitModule() {
155  xla::TransferManager::RegisterTransferManager(se::cuda::kCudaPlatformId,
156                                                &CreateGpuTransferManager);
157  return true;
158}
159static bool module_initialized = InitModule();
160