gpu_transfer_manager.cc revision 70062d11bf11d6579bfdbc87c3350a0074a12ae8
1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16#include "tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.h" 17 18#include <string> 19#include <utility> 20#include <vector> 21 22#include "llvm/IR/DataLayout.h" 23#include "tensorflow/compiler/xla/literal_util.h" 24#include "tensorflow/compiler/xla/service/gpu/gpu_compiler.h" 25#include "tensorflow/compiler/xla/shape_util.h" 26#include "tensorflow/compiler/xla/status_macros.h" 27#include "tensorflow/compiler/xla/statusor.h" 28#include "tensorflow/compiler/xla/types.h" 29#include "tensorflow/compiler/xla/util.h" 30#include "tensorflow/compiler/xla/xla_data.pb.h" 31#include "tensorflow/core/lib/core/errors.h" 32#include "tensorflow/core/lib/gtl/cleanup.h" 33#include "tensorflow/core/platform/logging.h" 34#include "tensorflow/core/platform/stream_executor_no_cuda.h" 35 36namespace se = ::perftools::gputools; 37 38namespace xla { 39 40// TODO(b/30467474) Once GPU infeed implementation settles, consider 41// folding back the cpu and gpu infeed implementations into a generic 42// one if possible. 43GpuTransferManager::GpuTransferManager() 44 : GenericTransferManager( 45 se::cuda::kCudaPlatformId, 46 /*pointer_size=*/llvm::DataLayout(gpu::GpuCompiler::kDataLayout) 47 .getPointerSize(0 /* default address space */)) {} 48 49Status GpuTransferManager::TransferLiteralToInfeed(se::StreamExecutor* executor, 50 const Literal& literal) { 51 const Shape& shape = literal.shape(); 52 VLOG(2) << "Transferring literal to infeed with shape: " 53 << ShapeUtil::HumanString(shape); 54 55 if (!ShapeUtil::IsTuple(shape)) { 56 int64 size = GetByteSizeRequirement(shape); 57 return TransferBufferToInfeed(executor, size, literal.InternalData()); 58 } 59 60 if (ShapeUtil::IsNestedTuple(shape)) { 61 return Unimplemented( 62 "Infeed with a nested tuple shape is not supported: %s", 63 ShapeUtil::HumanString(literal.shape()).c_str()); 64 } 65 66 // For a tuple, we transfer each of its elements to the device and 67 // enqueue the resulting destination device addresses with the 68 // infeed manager. 69 std::vector<gpu::InfeedBuffer*> buffers; 70 buffers.reserve(literal.tuple_literals_size()); 71 auto cleanup = tensorflow::gtl::MakeCleanup([buffers]() { 72 for (gpu::InfeedBuffer* b : buffers) { 73 b->Done(); 74 } 75 }); 76 77 for (const auto& tuple_element : literal.tuple_literals()) { 78 const Shape& tuple_element_shape = tuple_element.shape(); 79 int64 tuple_element_size = GetByteSizeRequirement(tuple_element_shape); 80 TF_ASSIGN_OR_RETURN( 81 gpu::InfeedBuffer * buffer, 82 TransferBufferToInfeedInternal(executor, tuple_element_size, 83 tuple_element.InternalData())); 84 buffers.push_back(buffer); 85 } 86 87 cleanup.release(); 88 return EnqueueBuffersToInfeed(executor, buffers); 89} 90 91Status GpuTransferManager::TransferBufferToInfeed(se::StreamExecutor* executor, 92 int64 size, 93 const void* source) { 94 TF_ASSIGN_OR_RETURN(gpu::InfeedBuffer * buffer, 95 TransferBufferToInfeedInternal(executor, size, source)); 96 return EnqueueBuffersToInfeed(executor, {buffer}); 97} 98 99Status GpuTransferManager::EnqueueBuffersToInfeed( 100 se::StreamExecutor* executor, std::vector<gpu::InfeedBuffer*> buffers) { 101 gpu::InfeedManager* infeed_manager = gpu::GetOrCreateInfeedManager(); 102 se::Stream* stream = infeed_manager->GetStream(executor); 103 104 // TODO(b/30467474): Since this stream is shared across different 105 // infeed requests, blocking on the stream might be 106 // heavy-handed. Figure out if finer-grained acknowledgement is 107 // possible. 108 Status block_status = stream->BlockHostUntilDone(); 109 if (!block_status.ok()) { 110 for (gpu::InfeedBuffer* b : buffers) { 111 b->Done(); 112 } 113 return InternalError("Failed to complete data transfer on stream %p: %s", 114 stream, block_status.error_message().c_str()); 115 } 116 117 infeed_manager->EnqueueBuffers(buffers); 118 119 VLOG(2) << "Infeed data transferred"; 120 121 return Status::OK(); 122} 123 124StatusOr<gpu::InfeedBuffer*> GpuTransferManager::TransferBufferToInfeedInternal( 125 se::StreamExecutor* executor, int64 size, const void* source) { 126 if (size > std::numeric_limits<int32>::max()) { 127 return InvalidArgument("Infeed shape is too large: needs %lld bytes", size); 128 } 129 130 if (size == 0) { 131 return InvalidArgument("Infeed shape needs 0 bytes"); 132 } 133 134 gpu::InfeedManager* infeed_manager = gpu::GetOrCreateInfeedManager(); 135 se::Stream* stream = infeed_manager->GetStream(executor); 136 if (stream == nullptr) { 137 return InternalError("Failed to obtain a stream"); 138 } 139 140 gpu::InfeedBuffer* buffer = new gpu::InfeedBuffer(executor, size); 141 stream->ThenMemcpy(buffer->device_memory(), source, size); 142 143 VLOG(2) << "Queued infeed data on stream " << stream; 144 145 return buffer; 146} 147 148} // namespace xla 149 150static std::unique_ptr<xla::TransferManager> CreateGpuTransferManager() { 151 return xla::MakeUnique<xla::GpuTransferManager>(); 152} 153 154static bool InitModule() { 155 xla::TransferManager::RegisterTransferManager(se::cuda::kCudaPlatformId, 156 &CreateGpuTransferManager); 157 return true; 158} 159static bool module_initialized = InitModule(); 160