1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.h"
17
18#include <string>
19#include <utility>
20#include <vector>
21
22#include "llvm/IR/DataLayout.h"
23#include "tensorflow/compiler/xla/literal_util.h"
24#include "tensorflow/compiler/xla/service/gpu/gpu_compiler.h"
25#include "tensorflow/compiler/xla/shape_util.h"
26#include "tensorflow/compiler/xla/status_macros.h"
27#include "tensorflow/compiler/xla/statusor.h"
28#include "tensorflow/compiler/xla/types.h"
29#include "tensorflow/compiler/xla/util.h"
30#include "tensorflow/compiler/xla/xla_data.pb.h"
31#include "tensorflow/core/lib/core/errors.h"
32#include "tensorflow/core/lib/gtl/cleanup.h"
33#include "tensorflow/core/platform/logging.h"
34#include "tensorflow/core/platform/stream_executor_no_cuda.h"
35
36namespace se = ::perftools::gputools;
37
38namespace xla {
39
40// TODO(b/30467474) Once GPU infeed implementation settles, consider
41// folding back the cpu and gpu infeed implementations into a generic
42// one if possible.
43GpuTransferManager::GpuTransferManager()
44    : GenericTransferManager(
45          se::cuda::kCudaPlatformId,
46          /*pointer_size=*/llvm::DataLayout(gpu::GpuCompiler::kDataLayout)
47              .getPointerSize(0 /* default address space */)) {}
48
49Status GpuTransferManager::TransferLiteralToInfeed(se::StreamExecutor* executor,
50                                                   const Literal& literal) {
51  const Shape& shape = literal.shape();
52  VLOG(2) << "Transferring literal to infeed with shape: "
53          << ShapeUtil::HumanString(shape);
54
55  if (!ShapeUtil::IsTuple(shape)) {
56    int64 size = GetByteSizeRequirement(shape);
57    return TransferBufferToInfeed(executor, size, literal.untyped_data());
58  }
59
60  if (ShapeUtil::IsNestedTuple(shape)) {
61    return Unimplemented(
62        "Infeed with a nested tuple shape is not supported: %s",
63        ShapeUtil::HumanString(literal.shape()).c_str());
64  }
65
66  // For a tuple, we transfer each of its elements to the device and
67  // enqueue the resulting destination device addresses with the
68  // infeed manager.
69  std::vector<gpu::InfeedBuffer*> buffers;
70  buffers.reserve(ShapeUtil::TupleElementCount(shape));
71  auto cleanup = tensorflow::gtl::MakeCleanup([buffers]() {
72    for (gpu::InfeedBuffer* b : buffers) {
73      b->Done();
74    }
75  });
76
77  for (int64 i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) {
78    const Shape& tuple_element_shape =
79        ShapeUtil::GetTupleElementShape(shape, i);
80    int64 tuple_element_size = GetByteSizeRequirement(tuple_element_shape);
81    TF_ASSIGN_OR_RETURN(
82        gpu::InfeedBuffer * buffer,
83        TransferBufferToInfeedInternal(executor, tuple_element_size,
84                                       literal.untyped_data({i})));
85    buffers.push_back(buffer);
86  }
87
88  cleanup.release();
89  return EnqueueBuffersToInfeed(executor, buffers);
90}
91
92Status GpuTransferManager::TransferBufferToInfeed(se::StreamExecutor* executor,
93                                                  int64 size,
94                                                  const void* source) {
95  TF_ASSIGN_OR_RETURN(gpu::InfeedBuffer * buffer,
96                      TransferBufferToInfeedInternal(executor, size, source));
97  return EnqueueBuffersToInfeed(executor, {buffer});
98}
99
100Status GpuTransferManager::EnqueueBuffersToInfeed(
101    se::StreamExecutor* executor, std::vector<gpu::InfeedBuffer*> buffers) {
102  gpu::InfeedManager* infeed_manager = gpu::GetOrCreateInfeedManager();
103  se::Stream* stream = infeed_manager->GetStream(executor);
104
105  // TODO(b/30467474): Since this stream is shared across different
106  // infeed requests, blocking on the stream might be
107  // heavy-handed. Figure out if finer-grained acknowledgement is
108  // possible.
109  Status block_status = stream->BlockHostUntilDone();
110  if (!block_status.ok()) {
111    for (gpu::InfeedBuffer* b : buffers) {
112      b->Done();
113    }
114    return InternalError("Failed to complete data transfer on stream %p: %s",
115                         stream, block_status.error_message().c_str());
116  }
117
118  infeed_manager->EnqueueBuffers(buffers);
119
120  VLOG(2) << "Infeed data transferred";
121
122  return Status::OK();
123}
124
125StatusOr<gpu::InfeedBuffer*> GpuTransferManager::TransferBufferToInfeedInternal(
126    se::StreamExecutor* executor, int64 size, const void* source) {
127  if (size > std::numeric_limits<int32>::max()) {
128    return InvalidArgument("Infeed shape is too large: needs %lld bytes", size);
129  }
130
131  if (size == 0) {
132    return InvalidArgument("Infeed shape needs 0 bytes");
133  }
134
135  gpu::InfeedManager* infeed_manager = gpu::GetOrCreateInfeedManager();
136  se::Stream* stream = infeed_manager->GetStream(executor);
137  if (stream == nullptr) {
138    return InternalError("Failed to obtain a stream");
139  }
140
141  gpu::InfeedBuffer* buffer = new gpu::InfeedBuffer(executor, size);
142  stream->ThenMemcpy(buffer->device_memory(), source, size);
143
144  VLOG(2) << "Queued infeed data on stream " << stream;
145
146  return buffer;
147}
148
149}  // namespace xla
150
151static std::unique_ptr<xla::TransferManager> CreateGpuTransferManager() {
152  return xla::MakeUnique<xla::GpuTransferManager>();
153}
154
155static bool InitModule() {
156  xla::TransferManager::RegisterTransferManager(se::cuda::kCudaPlatformId,
157                                                &CreateGpuTransferManager);
158  return true;
159}
160static bool module_initialized = InitModule();
161