1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/compiler/xla/client/local_client.h"
17
18#include <utility>
19
20#include "llvm/ADT/Triple.h"
21#include "tensorflow/compiler/xla/ptr_util.h"
22#include "tensorflow/compiler/xla/service/backend.h"
23#include "tensorflow/compiler/xla/service/service_executable_run_options.h"
24#include "tensorflow/compiler/xla/service/source_map_util.h"
25#include "tensorflow/compiler/xla/status_macros.h"
26
27namespace se = ::perftools::gputools;
28
29using xla::source_map_util::InvalidParameterArgument;
30
31namespace xla {
32
33namespace {
34StatusOr<Backend::StreamPtr> BorrowStreamForDevice(int device_ordinal,
35                                                   Backend* backend) {
36  if (device_ordinal < 0) {
37    device_ordinal = backend->default_device_ordinal();
38  }
39  return backend->BorrowStream(device_ordinal);
40}
41}  // namespace
42
43LocalExecutable::LocalExecutable(std::unique_ptr<Executable> executable,
44                                 Backend* backend,
45                                 ExecutableBuildOptions build_options)
46    : executable_(std::move(executable)),
47      backend_(backend),
48      build_options_(std::move(build_options)) {
49  CHECK_GE(build_options_.device_ordinal(), 0)
50      << "Must have a valid device ordinal that the executable was built for.";
51}
52
53tensorflow::Status LocalExecutable::ValidateExecutionOptions(
54    const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
55    const ExecutableRunOptions& run_options, const Backend& backend) {
56  const ComputationLayout& computation_layout =
57      executable_->module_config().entry_computation_layout();
58
59  // Check argument number, shapes, and layouts.
60  if (arguments.size() != computation_layout.parameter_count()) {
61    return InvalidArgument(
62        "invalid number of arguments for computation: expected %d, got %zu",
63        computation_layout.parameter_count(), arguments.size());
64  }
65  for (int i = 0; i < arguments.size(); ++i) {
66    if (!computation_layout.parameter_layout(i).MatchesLayoutInShape(
67            arguments[i]->on_host_shape())) {
68      return InvalidParameterArgument(
69          executable_.get(), i,
70          "Argument does not match shape or layout of computation parameter "
71          "%d: want %s, got %s",
72          i,
73          ShapeUtil::HumanString(computation_layout.parameter_layout(i).shape())
74              .c_str(),
75          ShapeUtil::HumanString(arguments[i]->on_host_shape()).c_str());
76    }
77  }
78
79  if (run_options.stream() != nullptr) {
80    if (!run_options.stream()->ok()) {
81      return InvalidArgument("stream is uninitialized or in an error state");
82    }
83
84    // Check stream matches service platform.
85    const se::Platform* stream_platform =
86        run_options.stream()->parent()->platform();
87    if (stream_platform != backend_->platform()) {
88      return InvalidArgument(
89          "stream is for platform %s, but service targets platform %s",
90          stream_platform->Name().c_str(),
91          backend_->platform()->Name().c_str());
92    }
93
94    // Cannot specify device_ordinal with a stream. The stream determines these
95    // values.
96    if (run_options.device_ordinal() != -1) {
97      return InvalidArgument(
98          "cannot set both device ordinal and stream options in "
99          "ExecutableRunOptions; the stream determines the device ordinal");
100    }
101  }
102
103  // Verify that the device the executable was built for is equivalent to the
104  // device it will run on.
105  int run_device_ordinal = run_options.device_ordinal() == -1
106                               ? backend_->default_device_ordinal()
107                               : run_options.device_ordinal();
108  TF_ASSIGN_OR_RETURN(bool devices_equivalent,
109                      backend_->devices_equivalent(
110                          run_device_ordinal, build_options_.device_ordinal()));
111  if (!devices_equivalent) {
112    TF_ASSIGN_OR_RETURN(se::StreamExecutor * run_executor,
113                        backend_->stream_executor(run_device_ordinal));
114    TF_ASSIGN_OR_RETURN(se::StreamExecutor * build_executor,
115                        backend_->stream_executor(build_device_ordinal()));
116    return InvalidArgument(
117        "executable is built for device %s of type \"%s\"; cannot run it on "
118        "device %s of type \"%s\"",
119        backend_->device_name(build_device_ordinal()).c_str(),
120        build_executor->GetDeviceDescription().name().c_str(),
121        backend_->device_name(run_device_ordinal).c_str(),
122        run_executor->GetDeviceDescription().name().c_str());
123  }
124
125  if (!run_options.allocator()) {
126    return InvalidArgument("an allocator must be provided to ExecuteLocally");
127  }
128
129  if (run_options.allocator()->platform() != backend.platform()) {
130    return InvalidArgument(
131        "allocator platform (%s) does not match service platform (%s)",
132        run_options.allocator()->platform()->Name().c_str(),
133        backend.platform()->Name().c_str());
134  }
135
136  return Status::OK();
137}
138
139StatusOr<std::unique_ptr<ScopedShapedBuffer>> LocalExecutable::Run(
140    const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
141    ExecutableRunOptions run_options) {
142  TF_RETURN_IF_ERROR(
143      ValidateExecutionOptions(arguments, run_options, *backend_));
144
145  Backend::StreamPtr stream;
146  if (run_options.stream() == nullptr) {
147    // NB!  The lifetime of `stream` needs to match the lifetime of
148    // `actual_options` (otherwise we will end up using a returned stream in
149    // ExecuteOnStreamWrapper), which is why it isn't declared in the inner "if"
150    // scope.
151    TF_ASSIGN_OR_RETURN(
152        stream, BorrowStreamForDevice(run_options.device_ordinal(), backend_));
153    run_options.set_stream(stream.get());
154  }
155  if (run_options.allocator() == nullptr) {
156    run_options.set_allocator(backend_->memory_allocator());
157  }
158
159  // For local client execution on CPU backends:
160  // *) The thread pool used for eigen CPU ops is from
161  //    ExecutableRunOptions.eigen_intra_op_thread_pool.
162  // *) The thread pool used for XLA CPU ops is from
163  //    backend_->eigen_intra_op_thread_pool().
164  ServiceExecutableRunOptions service_options(
165      run_options, backend_->StreamBorrower(),
166      backend_->eigen_intra_op_thread_pool());
167
168  if (executable_->dumping()) {
169    return ExecuteAndDump(&service_options, arguments);
170  }
171  TF_ASSIGN_OR_RETURN(
172      std::unique_ptr<ShapedBuffer> result,
173      executable_->ExecuteOnStreamWrapper(
174          &service_options, run_options.execution_profile(), arguments));
175
176  return MakeUnique<ScopedShapedBuffer>(std::move(*result),
177                                        run_options.allocator());
178}
179
180StatusOr<std::unique_ptr<ScopedShapedBuffer>> LocalExecutable::ExecuteAndDump(
181    const ServiceExecutableRunOptions* run_options,
182    const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
183  executable_->session_module()->set_execution_platform(
184      backend_->platform()->Name());
185  TF_RETURN_IF_ERROR(RecordArguments(arguments, executable_->session_module()));
186  TF_ASSIGN_OR_RETURN(
187      std::unique_ptr<ShapedBuffer> result,
188      executable_->ExecuteOnStream(run_options, arguments,
189                                   /*hlo_execution_profile=*/nullptr));
190  TF_RETURN_IF_ERROR(RecordResult(result.get(), executable_->session_module()));
191  TF_RETURN_IF_ERROR(executable_->DumpSessionModule());
192  return ScopedShapedBuffer::MakeScoped(result.get(), run_options->allocator());
193}
194
195tensorflow::Status LocalExecutable::RecordArguments(
196    const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
197    SessionModule* session_module) {
198  session_module->clear_arguments();
199  for (const ShapedBuffer* argument : arguments) {
200    TF_ASSIGN_OR_RETURN(std::unique_ptr<Literal> literal,
201                        LiteralFromShapedBuffer(*argument));
202    *session_module->add_arguments() = literal->ToProto();
203  }
204  return Status::OK();
205}
206
207tensorflow::Status LocalExecutable::RecordResult(
208    const ShapedBuffer* result, SessionModule* session_module) {
209  session_module->clear_result();
210  TF_ASSIGN_OR_RETURN(std::unique_ptr<Literal> literal,
211                      LiteralFromShapedBuffer(*result));
212  *session_module->mutable_result() = literal->ToProto();
213  return Status::OK();
214}
215
216StatusOr<std::unique_ptr<Literal>> LocalExecutable::LiteralFromShapedBuffer(
217    const ShapedBuffer& shaped_buffer) {
218  TF_ASSIGN_OR_RETURN(
219      se::StreamExecutor * executor,
220      backend_->stream_executor(shaped_buffer.device_ordinal()));
221  return backend_->transfer_manager()->TransferLiteralFromDevice(executor,
222                                                                 shaped_buffer);
223}
224
225se::Platform* LocalClient::platform() const {
226  return local_service_->backend().platform();
227}
228
229int LocalClient::device_count() const {
230  return local_service_->backend().device_count();
231}
232
233bool LocalClient::device_ordinal_supported(int device_ordinal) const {
234  return local_service_->backend().device_ordinal_supported(device_ordinal);
235}
236
237int LocalClient::default_device_ordinal() const {
238  return local_service_->backend().default_device_ordinal();
239}
240
241const Backend& LocalClient::backend() const {
242  return local_service_->backend();
243}
244
245Backend* LocalClient::mutable_backend() {
246  return local_service_->mutable_backend();
247}
248
249StatusOr<std::unique_ptr<LocalExecutable>> LocalClient::Compile(
250    const Computation& computation,
251    const tensorflow::gtl::ArraySlice<const Shape*> argument_layouts,
252    const ExecutableBuildOptions& options) {
253  ExecutableBuildOptions updated_options = options;
254  if (options.device_ordinal() == -1) {
255    updated_options.set_device_ordinal(default_device_ordinal());
256    VLOG(3) << "Set device ordinal to default value of: "
257            << updated_options.device_ordinal();
258  }
259  TF_ASSIGN_OR_RETURN(
260      std::unique_ptr<Executable> executable,
261      local_service_->CompileExecutable(computation.handle(), argument_layouts,
262                                        updated_options));
263  return WrapUnique(new LocalExecutable(std::move(executable),
264                                        local_service_->mutable_backend(),
265                                        updated_options));
266}
267
268StatusOr<std::unique_ptr<ScopedShapedBuffer>>
269LocalClient::LiteralToShapedBuffer(const Literal& literal, int device_ordinal,
270                                   DeviceMemoryAllocator* allocator) {
271  if (allocator == nullptr) {
272    allocator = backend().memory_allocator();
273  }
274  TF_ASSIGN_OR_RETURN(auto scoped_buffer,
275                      backend().transfer_manager()->AllocateScopedShapedBuffer(
276                          literal.shape(), allocator, device_ordinal));
277  TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
278                      backend().stream_executor(device_ordinal));
279  TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
280      executor, literal, *scoped_buffer));
281  return std::move(scoped_buffer);
282}
283
284StatusOr<std::unique_ptr<Literal>> LocalClient::ShapedBufferToLiteral(
285    const ShapedBuffer& shaped_buffer) {
286  TF_ASSIGN_OR_RETURN(
287      se::StreamExecutor * executor,
288      backend().stream_executor(shaped_buffer.device_ordinal()));
289  return backend().transfer_manager()->TransferLiteralFromDevice(executor,
290                                                                 shaped_buffer);
291}
292
293Status LocalClient::TransferToInfeedLocal(const Literal& literal,
294                                          int device_ordinal) {
295  TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
296                      backend().stream_executor(device_ordinal));
297  return backend().transfer_manager()->TransferLiteralToInfeed(executor,
298                                                               literal);
299}
300
301StatusOr<std::unique_ptr<Literal>> LocalClient::TransferFromOutfeedLocal(
302    const Shape& shape, int device_ordinal) {
303  TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
304                      backend().stream_executor(device_ordinal));
305  auto literal = MakeUnique<Literal>();
306  TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralFromOutfeed(
307      executor, shape, literal.get()));
308  return std::move(literal);
309}
310
311StatusOr<int> LocalClient::ReplicaNumberToDeviceOrdinal(int replica_number) {
312  return local_service_->ReplicaNumberToDeviceOrdinal(replica_number);
313}
314
315}  // namespace xla
316