1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16#include "tensorflow/compiler/xla/client/local_client.h" 17 18#include <utility> 19 20#include "llvm/ADT/Triple.h" 21#include "tensorflow/compiler/xla/ptr_util.h" 22#include "tensorflow/compiler/xla/service/backend.h" 23#include "tensorflow/compiler/xla/service/service_executable_run_options.h" 24#include "tensorflow/compiler/xla/service/source_map_util.h" 25#include "tensorflow/compiler/xla/status_macros.h" 26 27namespace se = ::perftools::gputools; 28 29using xla::source_map_util::InvalidParameterArgument; 30 31namespace xla { 32 33namespace { 34StatusOr<Backend::StreamPtr> BorrowStreamForDevice(int device_ordinal, 35 Backend* backend) { 36 if (device_ordinal < 0) { 37 device_ordinal = backend->default_device_ordinal(); 38 } 39 return backend->BorrowStream(device_ordinal); 40} 41} // namespace 42 43LocalExecutable::LocalExecutable(std::unique_ptr<Executable> executable, 44 Backend* backend, 45 ExecutableBuildOptions build_options) 46 : executable_(std::move(executable)), 47 backend_(backend), 48 build_options_(std::move(build_options)) { 49 CHECK_GE(build_options_.device_ordinal(), 0) 50 << "Must have a valid device ordinal that the executable was built for."; 51} 52 53tensorflow::Status LocalExecutable::ValidateExecutionOptions( 54 const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments, 55 const ExecutableRunOptions& run_options, const Backend& backend) { 56 const ComputationLayout& computation_layout = 57 executable_->module_config().entry_computation_layout(); 58 59 // Check argument number, shapes, and layouts. 60 if (arguments.size() != computation_layout.parameter_count()) { 61 return InvalidArgument( 62 "invalid number of arguments for computation: expected %d, got %zu", 63 computation_layout.parameter_count(), arguments.size()); 64 } 65 for (int i = 0; i < arguments.size(); ++i) { 66 if (!computation_layout.parameter_layout(i).MatchesLayoutInShape( 67 arguments[i]->on_host_shape())) { 68 return InvalidParameterArgument( 69 executable_.get(), i, 70 "Argument does not match shape or layout of computation parameter " 71 "%d: want %s, got %s", 72 i, 73 ShapeUtil::HumanString(computation_layout.parameter_layout(i).shape()) 74 .c_str(), 75 ShapeUtil::HumanString(arguments[i]->on_host_shape()).c_str()); 76 } 77 } 78 79 if (run_options.stream() != nullptr) { 80 if (!run_options.stream()->ok()) { 81 return InvalidArgument("stream is uninitialized or in an error state"); 82 } 83 84 // Check stream matches service platform. 85 const se::Platform* stream_platform = 86 run_options.stream()->parent()->platform(); 87 if (stream_platform != backend_->platform()) { 88 return InvalidArgument( 89 "stream is for platform %s, but service targets platform %s", 90 stream_platform->Name().c_str(), 91 backend_->platform()->Name().c_str()); 92 } 93 94 // Cannot specify device_ordinal with a stream. The stream determines these 95 // values. 96 if (run_options.device_ordinal() != -1) { 97 return InvalidArgument( 98 "cannot set both device ordinal and stream options in " 99 "ExecutableRunOptions; the stream determines the device ordinal"); 100 } 101 } 102 103 // Verify that the device the executable was built for is equivalent to the 104 // device it will run on. 105 int run_device_ordinal = run_options.device_ordinal() == -1 106 ? backend_->default_device_ordinal() 107 : run_options.device_ordinal(); 108 TF_ASSIGN_OR_RETURN(bool devices_equivalent, 109 backend_->devices_equivalent( 110 run_device_ordinal, build_options_.device_ordinal())); 111 if (!devices_equivalent) { 112 TF_ASSIGN_OR_RETURN(se::StreamExecutor * run_executor, 113 backend_->stream_executor(run_device_ordinal)); 114 TF_ASSIGN_OR_RETURN(se::StreamExecutor * build_executor, 115 backend_->stream_executor(build_device_ordinal())); 116 return InvalidArgument( 117 "executable is built for device %s of type \"%s\"; cannot run it on " 118 "device %s of type \"%s\"", 119 backend_->device_name(build_device_ordinal()).c_str(), 120 build_executor->GetDeviceDescription().name().c_str(), 121 backend_->device_name(run_device_ordinal).c_str(), 122 run_executor->GetDeviceDescription().name().c_str()); 123 } 124 125 if (!run_options.allocator()) { 126 return InvalidArgument("an allocator must be provided to ExecuteLocally"); 127 } 128 129 if (run_options.allocator()->platform() != backend.platform()) { 130 return InvalidArgument( 131 "allocator platform (%s) does not match service platform (%s)", 132 run_options.allocator()->platform()->Name().c_str(), 133 backend.platform()->Name().c_str()); 134 } 135 136 return Status::OK(); 137} 138 139StatusOr<std::unique_ptr<ScopedShapedBuffer>> LocalExecutable::Run( 140 const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments, 141 ExecutableRunOptions run_options) { 142 TF_RETURN_IF_ERROR( 143 ValidateExecutionOptions(arguments, run_options, *backend_)); 144 145 Backend::StreamPtr stream; 146 if (run_options.stream() == nullptr) { 147 // NB! The lifetime of `stream` needs to match the lifetime of 148 // `actual_options` (otherwise we will end up using a returned stream in 149 // ExecuteOnStreamWrapper), which is why it isn't declared in the inner "if" 150 // scope. 151 TF_ASSIGN_OR_RETURN( 152 stream, BorrowStreamForDevice(run_options.device_ordinal(), backend_)); 153 run_options.set_stream(stream.get()); 154 } 155 if (run_options.allocator() == nullptr) { 156 run_options.set_allocator(backend_->memory_allocator()); 157 } 158 159 // For local client execution on CPU backends: 160 // *) The thread pool used for eigen CPU ops is from 161 // ExecutableRunOptions.eigen_intra_op_thread_pool. 162 // *) The thread pool used for XLA CPU ops is from 163 // backend_->eigen_intra_op_thread_pool(). 164 ServiceExecutableRunOptions service_options( 165 run_options, backend_->StreamBorrower(), 166 backend_->eigen_intra_op_thread_pool()); 167 168 if (executable_->dumping()) { 169 return ExecuteAndDump(&service_options, arguments); 170 } 171 TF_ASSIGN_OR_RETURN( 172 std::unique_ptr<ShapedBuffer> result, 173 executable_->ExecuteOnStreamWrapper( 174 &service_options, run_options.execution_profile(), arguments)); 175 176 return MakeUnique<ScopedShapedBuffer>(std::move(*result), 177 run_options.allocator()); 178} 179 180StatusOr<std::unique_ptr<ScopedShapedBuffer>> LocalExecutable::ExecuteAndDump( 181 const ServiceExecutableRunOptions* run_options, 182 const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) { 183 executable_->session_module()->set_execution_platform( 184 backend_->platform()->Name()); 185 TF_RETURN_IF_ERROR(RecordArguments(arguments, executable_->session_module())); 186 TF_ASSIGN_OR_RETURN( 187 std::unique_ptr<ShapedBuffer> result, 188 executable_->ExecuteOnStream(run_options, arguments, 189 /*hlo_execution_profile=*/nullptr)); 190 TF_RETURN_IF_ERROR(RecordResult(result.get(), executable_->session_module())); 191 TF_RETURN_IF_ERROR(executable_->DumpSessionModule()); 192 return ScopedShapedBuffer::MakeScoped(result.get(), run_options->allocator()); 193} 194 195tensorflow::Status LocalExecutable::RecordArguments( 196 const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments, 197 SessionModule* session_module) { 198 session_module->clear_arguments(); 199 for (const ShapedBuffer* argument : arguments) { 200 TF_ASSIGN_OR_RETURN(std::unique_ptr<Literal> literal, 201 LiteralFromShapedBuffer(*argument)); 202 *session_module->add_arguments() = literal->ToProto(); 203 } 204 return Status::OK(); 205} 206 207tensorflow::Status LocalExecutable::RecordResult( 208 const ShapedBuffer* result, SessionModule* session_module) { 209 session_module->clear_result(); 210 TF_ASSIGN_OR_RETURN(std::unique_ptr<Literal> literal, 211 LiteralFromShapedBuffer(*result)); 212 *session_module->mutable_result() = literal->ToProto(); 213 return Status::OK(); 214} 215 216StatusOr<std::unique_ptr<Literal>> LocalExecutable::LiteralFromShapedBuffer( 217 const ShapedBuffer& shaped_buffer) { 218 TF_ASSIGN_OR_RETURN( 219 se::StreamExecutor * executor, 220 backend_->stream_executor(shaped_buffer.device_ordinal())); 221 return backend_->transfer_manager()->TransferLiteralFromDevice(executor, 222 shaped_buffer); 223} 224 225se::Platform* LocalClient::platform() const { 226 return local_service_->backend().platform(); 227} 228 229int LocalClient::device_count() const { 230 return local_service_->backend().device_count(); 231} 232 233bool LocalClient::device_ordinal_supported(int device_ordinal) const { 234 return local_service_->backend().device_ordinal_supported(device_ordinal); 235} 236 237int LocalClient::default_device_ordinal() const { 238 return local_service_->backend().default_device_ordinal(); 239} 240 241const Backend& LocalClient::backend() const { 242 return local_service_->backend(); 243} 244 245Backend* LocalClient::mutable_backend() { 246 return local_service_->mutable_backend(); 247} 248 249StatusOr<std::unique_ptr<LocalExecutable>> LocalClient::Compile( 250 const Computation& computation, 251 const tensorflow::gtl::ArraySlice<const Shape*> argument_layouts, 252 const ExecutableBuildOptions& options) { 253 ExecutableBuildOptions updated_options = options; 254 if (options.device_ordinal() == -1) { 255 updated_options.set_device_ordinal(default_device_ordinal()); 256 VLOG(3) << "Set device ordinal to default value of: " 257 << updated_options.device_ordinal(); 258 } 259 TF_ASSIGN_OR_RETURN( 260 std::unique_ptr<Executable> executable, 261 local_service_->CompileExecutable(computation.handle(), argument_layouts, 262 updated_options)); 263 return WrapUnique(new LocalExecutable(std::move(executable), 264 local_service_->mutable_backend(), 265 updated_options)); 266} 267 268StatusOr<std::unique_ptr<ScopedShapedBuffer>> 269LocalClient::LiteralToShapedBuffer(const Literal& literal, int device_ordinal, 270 DeviceMemoryAllocator* allocator) { 271 if (allocator == nullptr) { 272 allocator = backend().memory_allocator(); 273 } 274 TF_ASSIGN_OR_RETURN(auto scoped_buffer, 275 backend().transfer_manager()->AllocateScopedShapedBuffer( 276 literal.shape(), allocator, device_ordinal)); 277 TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor, 278 backend().stream_executor(device_ordinal)); 279 TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice( 280 executor, literal, *scoped_buffer)); 281 return std::move(scoped_buffer); 282} 283 284StatusOr<std::unique_ptr<Literal>> LocalClient::ShapedBufferToLiteral( 285 const ShapedBuffer& shaped_buffer) { 286 TF_ASSIGN_OR_RETURN( 287 se::StreamExecutor * executor, 288 backend().stream_executor(shaped_buffer.device_ordinal())); 289 return backend().transfer_manager()->TransferLiteralFromDevice(executor, 290 shaped_buffer); 291} 292 293Status LocalClient::TransferToInfeedLocal(const Literal& literal, 294 int device_ordinal) { 295 TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor, 296 backend().stream_executor(device_ordinal)); 297 return backend().transfer_manager()->TransferLiteralToInfeed(executor, 298 literal); 299} 300 301StatusOr<std::unique_ptr<Literal>> LocalClient::TransferFromOutfeedLocal( 302 const Shape& shape, int device_ordinal) { 303 TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor, 304 backend().stream_executor(device_ordinal)); 305 auto literal = MakeUnique<Literal>(); 306 TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralFromOutfeed( 307 executor, shape, literal.get())); 308 return std::move(literal); 309} 310 311StatusOr<int> LocalClient::ReplicaNumberToDeviceOrdinal(int replica_number) { 312 return local_service_->ReplicaNumberToDeviceOrdinal(replica_number); 313} 314 315} // namespace xla 316