1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16#include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h" 17 18#include <utility> 19 20#include "tensorflow/compiler/xla/map_util.h" 21#include "tensorflow/compiler/xla/ptr_util.h" 22#include "tensorflow/compiler/xla/service/gpu/gpu_constants.h" 23#include "tensorflow/compiler/xla/status_macros.h" 24#include "tensorflow/compiler/xla/types.h" 25#include "tensorflow/compiler/xla/util.h" 26#include "tensorflow/core/lib/core/errors.h" 27#include "tensorflow/core/lib/strings/numbers.h" 28#include "tensorflow/core/platform/logging.h" 29#include "tensorflow/core/platform/types.h" 30 31namespace se = ::perftools::gputools; 32 33namespace xla { 34namespace gpu { 35 36void BufferAllocations::Builder::RegisterBuffer(BufferAllocation::Index index, 37 se::DeviceMemoryBase address) { 38 InsertOrDie(®istered_buffers_, index, address); 39} 40 41StatusOr<std::unique_ptr<BufferAllocations>> BufferAllocations::Builder::Build( 42 const BufferAssignment& buffer_assignment, int device_ordinal, 43 DeviceMemoryAllocator* memory_allocator) { 44 const int64 num_buffers = buffer_assignment.Allocations().size(); 45 auto buffer_allocations = WrapUnique( 46 new BufferAllocations(num_buffers, device_ordinal, memory_allocator)); 47 48 for (BufferAllocation::Index i = 0; i < num_buffers; ++i) { 49 // If buffer #i's address is already registered (e.g. external arguments or 50 // result buffers), use that registered buffer. 51 if (registered_buffers_.count(i)) { 52 se::DeviceMemoryBase address = FindOrDie(registered_buffers_, i); 53 if (reinterpret_cast<uintptr_t>(address.opaque()) % 54 kCudaMallocAlignBytes != 55 0) { 56 return InternalError( 57 "Address of registered buffer %lld must be a multiple of %llx, but " 58 "was %p", 59 i, kCudaMallocAlignBytes, address.opaque()); 60 } 61 buffer_allocations->SetBuffer(i, FindOrDie(registered_buffers_, i)); 62 continue; 63 } 64 65 // Allocate each allocation that might escape, or is the temp buffer. 66 bool seen_temp_buffer = false; 67 const BufferAllocation& allocation = buffer_assignment.GetAllocation(i); 68 if (allocation.maybe_live_out() || allocation.IsPreallocatedTempBuffer()) { 69 const int64 buffer_size = allocation.size(); 70 se::DeviceMemoryBase buffer_address; 71 if (buffer_size > 0) { 72 TF_ASSIGN_OR_RETURN(buffer_address, memory_allocator->Allocate( 73 device_ordinal, buffer_size)); 74 if (buffer_address == nullptr) { 75 return ResourceExhausted( 76 "Out of memory when allocating %s for buffer %lld.", 77 tensorflow::strings::HumanReadableNumBytes(buffer_size).c_str(), 78 i); 79 } 80 if (reinterpret_cast<uintptr_t>(buffer_address.opaque()) % 81 kCudaMallocAlignBytes != 82 0) { 83 return InternalError( 84 "Address returned by memory_allocator->Allocate must be a " 85 "multiple of %llx, but was %p", 86 kCudaMallocAlignBytes, buffer_address.opaque()); 87 } 88 } 89 buffer_allocations->SetBuffer(i, buffer_address); 90 if (allocation.IsPreallocatedTempBuffer()) { 91 if (seen_temp_buffer) { 92 LOG(FATAL) << "Multiple temporary buffers detected. BufferAssigner " 93 << "must guarantee at most one temporary buffer."; 94 } 95 seen_temp_buffer = true; 96 buffer_allocations->temp_buffer_base_ = buffer_address; 97 } 98 } 99 } 100 101 if (VLOG_IS_ON(2)) { 102 for (BufferAllocation::Index i = 0; i < num_buffers; ++i) { 103 const auto& buf = buffer_allocations->buffers_[i]; 104 VLOG(2) << "Buffer " << i << " -> " << buf.opaque() << " (" << buf.size() 105 << "B)"; 106 } 107 } 108 109 return std::move(buffer_allocations); 110} 111 112tensorflow::Status BufferAllocations::TearDown( 113 const std::set<se::DeviceMemoryBase>& live_addresses, 114 const BufferAssignment& buffer_assignment) { 115 // Deallocate temporary buffers. 116 const int64 num_buffers = buffer_assignment.Allocations().size(); 117 for (BufferAllocation::Index i = 0; i < num_buffers; ++i) { 118 const BufferAllocation& allocation = buffer_assignment.GetAllocation(i); 119 se::DeviceMemoryBase buffer_address = GetDeviceAddress(allocation.index()); 120 // Deallocate buffers marked "maybe_live_out" but aren't actually live out, 121 // and temp buffers. 122 if ((allocation.maybe_live_out() && 123 !live_addresses.count(buffer_address)) || 124 allocation.IsPreallocatedTempBuffer()) { 125 TF_RETURN_IF_ERROR( 126 memory_allocator_->Deallocate(device_ordinal_, &buffer_address)); 127 } 128 } 129 return tensorflow::Status::OK(); 130} 131 132se::DeviceMemoryBase BufferAllocations::GetDeviceAddress( 133 BufferAllocation::Index buffer_index) const { 134 CHECK_GE(buffer_index, 0); 135 CHECK_LT(buffer_index, buffers_.size()); 136 return buffers_[buffer_index]; 137} 138 139se::DeviceMemoryBase BufferAllocations::GetDeviceAddress( 140 const BufferAllocation::Slice& buffer_slice) const { 141 se::DeviceMemoryBase base = GetDeviceAddress(buffer_slice.index()); 142 CHECK_LE(buffer_slice.offset(), base.size()); 143 CHECK_LE(buffer_slice.offset() + buffer_slice.size(), base.size()); 144 return se::DeviceMemoryBase( 145 static_cast<char*>(base.opaque()) + buffer_slice.offset(), 146 buffer_slice.size(), /*is_sub_buffer=*/true); 147} 148 149void BufferAllocations::SetBuffer(BufferAllocation::Index buffer_index, 150 se::DeviceMemoryBase buffer) { 151 CHECK_GE(buffer_index, 0); 152 CHECK_LT(buffer_index, buffers_.size()); 153 buffers_[buffer_index] = buffer; 154} 155 156} // namespace gpu 157} // namespace xla 158