ExecutionBuilder.cpp revision 75886e77f9ca074173a49283b5c0a8c182d98977
1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "ExecutionBuilder"
18
19#include "ExecutionBuilder.h"
20
21#include "CompilationBuilder.h"
22#include "CpuExecutor.h"
23#include "HalInterfaces.h"
24#include "Manager.h"
25#include "ModelBuilder.h"
26
27#include <mutex>
28#include <thread>
29#include <vector>
30
31namespace android {
32namespace nn {
33
34int ModelArgumentInfo::setFromPointer(const Operand& operand,
35                                      const ANeuralNetworksOperandType* type, void* data,
36                                      uint32_t length) {
37    int n = updateDimensionInfo(operand, type);
38    if (n != ANEURALNETWORKS_NO_ERROR) {
39        return n;
40    }
41    if (data == nullptr) {
42        if (length) {
43            LOG(ERROR) << "Setting argument as having no value but non-zero length passed.";
44            return ANEURALNETWORKS_BAD_DATA;
45        }
46        state = ModelArgumentInfo::HAS_NO_VALUE;
47    } else {
48        state = ModelArgumentInfo::POINTER;
49    }
50    buffer = data;
51    locationAndLength = {.poolIndex = 0, .offset = 0, .length = length};
52    return ANEURALNETWORKS_NO_ERROR;
53}
54
55int ModelArgumentInfo::setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
56                                     uint32_t poolIndex, uint32_t offset, uint32_t length) {
57    int n = updateDimensionInfo(operand, type);
58    if (n != ANEURALNETWORKS_NO_ERROR) {
59        return n;
60    }
61    state = ModelArgumentInfo::MEMORY;
62    locationAndLength = {.poolIndex = poolIndex, .offset = offset, .length = length};
63    buffer = nullptr;
64    return ANEURALNETWORKS_NO_ERROR;
65}
66
67int ModelArgumentInfo::setFromTemporaryMemory(const Operand& operand,
68                                              uint32_t poolIndex, uint32_t offset) {
69    dimensions = operand.dimensions;
70    state = ModelArgumentInfo::MEMORY;
71    locationAndLength =
72            {.poolIndex = poolIndex, .offset = offset, .length = sizeOfData(operand)};
73    buffer = nullptr;
74    return ANEURALNETWORKS_NO_ERROR;
75}
76
77int ModelArgumentInfo::updateDimensionInfo(const Operand& operand,
78                                           const ANeuralNetworksOperandType* newType) {
79    if (newType == nullptr) {
80        dimensions = hidl_vec<uint32_t>();
81    } else {
82        uint32_t count = newType->dimensionCount;
83        if (static_cast<OperandType>(newType->type) != operand.type ||
84            count != operand.dimensions.size()) {
85            LOG(ERROR) << "ANeuralNetworksExecution_setInput/Output incompatible types";
86            return ANEURALNETWORKS_BAD_DATA;
87        }
88
89        dimensions = hidl_vec<uint32_t>(count);
90        for (uint32_t i = 0; i < count; i++) {
91            dimensions[i] = newType->dimensions[i];
92        }
93    }
94    return ANEURALNETWORKS_NO_ERROR;
95}
96
97ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) :
98        mModel(compilation->mModel),
99        mPlan(&compilation->mPlan),
100        mPartitioning(compilation->mPartitioning),
101        mInputs(mModel->inputCount()),
102        mOutputs(mModel->outputCount()) {
103    VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder";
104}
105
106int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type,
107                               const void* buffer, size_t length) {
108    uint32_t count = static_cast<uint32_t>(mInputs.size());
109    if (index >= count) {
110        LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count;
111        return ANEURALNETWORKS_BAD_DATA;
112    }
113    if (type != nullptr) {
114        int n = validateOperandType(*type, "ANeuralNetworksExecution_setInput", false);
115        if (n != ANEURALNETWORKS_NO_ERROR) {
116            return n;
117        }
118    }
119    if (length > 0xFFFFFFFF) {
120        LOG(ERROR) << "ANeuralNetworksExecution_setInput input exceeds max length " << length;
121        return ANEURALNETWORKS_BAD_DATA;
122    }
123    uint32_t l = static_cast<uint32_t>(length);
124    return mInputs[index].setFromPointer(mModel->getInputOperand(index), type,
125                                         const_cast<void*>(buffer), l);
126}
127
128int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
129                                         const Memory* memory, size_t offset, size_t length) {
130    // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
131
132    uint32_t count = static_cast<uint32_t>(mInputs.size());
133    if (index >= count) {
134        LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " "
135                   << count;
136        return ANEURALNETWORKS_BAD_DATA;
137    }
138    if (!memory->validateSize(offset, length)) {
139        return ANEURALNETWORKS_BAD_DATA;
140    }
141    // TODO validate the rest
142    uint32_t poolIndex = mMemories.add(memory);
143    return mInputs[index].setFromMemory(mModel->getInputOperand(index), type, poolIndex, offset,
144                                        length);
145}
146
147int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
148                                size_t length) {
149    uint32_t count = static_cast<uint32_t>(mOutputs.size());
150    if (index >= count) {
151        LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count;
152        return ANEURALNETWORKS_BAD_DATA;
153    }
154    if (type != nullptr) {
155        int n = validateOperandType(*type, "ANeuralNetworksExecution_setOutput", false);
156        if (n != ANEURALNETWORKS_NO_ERROR) {
157            return n;
158        }
159    }
160    if (length > 0xFFFFFFFF) {
161        LOG(ERROR) << "ANeuralNetworksExecution_setOutput input exceeds max length " << length;
162        return ANEURALNETWORKS_BAD_DATA;
163    }
164    uint32_t l = static_cast<uint32_t>(length);
165    return mOutputs[index].setFromPointer(mModel->getOutputOperand(index), type, buffer, l);
166}
167
168int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
169                                          const Memory* memory, size_t offset, size_t length) {
170    // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
171
172    uint32_t count = static_cast<uint32_t>(mOutputs.size());
173    if (index >= count) {
174        LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " "
175                   << count;
176        return ANEURALNETWORKS_BAD_DATA;
177    }
178    if (!memory->validateSize(offset, length)) {
179        return ANEURALNETWORKS_BAD_DATA;
180    }
181    // TODO validate the rest
182    uint32_t poolIndex = mMemories.add(memory);
183    return mOutputs[index].setFromMemory(mModel->getOutputOperand(index), type, poolIndex, offset,
184                                         length);
185}
186
187// Attempt synchronous execution of full model on CPU.
188// Ensure that executionCallback->notify() is called.
189static void cpuFallbackFull(const ExecutionBuilder* executionBuilder,
190                            const sp<ExecutionCallback>& executionCallback) {
191    VLOG(EXECUTION) << "cpuFallbackFull";
192    StepExecutor executor(executionBuilder, executionBuilder->getModel(),
193                          nullptr /* no VersionedIDevice, so CPU */,
194                          nullptr /* no IPreparedModel */);
195    executor.mapInputsAndOutputsTrivially();
196    sp<ExecutionCallback> fallbackCallback;
197    if (executor.startCompute(&fallbackCallback) != ANEURALNETWORKS_NO_ERROR) {
198        executionCallback->notify(ErrorStatus::GENERAL_FAILURE);
199        return;
200    }
201    fallbackCallback->wait();
202    executionCallback->notify(fallbackCallback->getStatus());
203}
204
205// Attempt synchronous execution on CPU.
206// (1) First, attempt to execute this step on CPU.  If successful,
207//     return true.  (Do not call executionCallback->notify().)
208// (2) If unsuccessful, attempt to execute the full model on CPU,
209//     ensure that executionCallback->notify() is called, and return
210//     false.
211static bool cpuFallbackPartial(const ExecutionBuilder* executionBuilder,
212                               const ExecutionPlan* plan,
213                               std::shared_ptr<ExecutionPlan::Controller> controller,
214                               const sp<ExecutionCallback>& executionCallback) {
215    VLOG(EXECUTION) << "cpuFallbackPartial";
216    std::shared_ptr<StepExecutor> executor;
217    int n = plan->fallback(controller, &executor);
218    if (n != ANEURALNETWORKS_NO_ERROR || executor->isCpu()) {
219        cpuFallbackFull(executionBuilder, executionCallback);
220        return false;
221    }
222    sp<ExecutionCallback> fallbackCallback;
223    if (executor->startComputeOnCpu(&fallbackCallback) != ANEURALNETWORKS_NO_ERROR) {
224        cpuFallbackFull(executionBuilder, executionCallback);
225        return false;
226    }
227    fallbackCallback->wait();
228    if (fallbackCallback->getStatus() != ErrorStatus::NONE) {
229        cpuFallbackFull(executionBuilder, executionCallback);
230        return false;
231    }
232    return true;
233}
234
235static void asyncStartComputePartitioned(const ExecutionBuilder* executionBuilder,
236                                         const ExecutionPlan* plan,
237                                         std::shared_ptr<ExecutionPlan::Controller> controller,
238                                         bool allowFallback,
239                                         const sp<ExecutionCallback>& executionCallback) {
240    VLOG(EXECUTION) << "ExecutionBuilder::startCompute (from plan, iteratively)";
241    while (true) {
242        std::shared_ptr<StepExecutor> executor;
243        VLOG(EXECUTION) << "looking for next StepExecutor";
244        int n = plan->next(controller, &executor);
245        if (n != ANEURALNETWORKS_NO_ERROR) {
246            if (allowFallback) {
247                cpuFallbackFull(executionBuilder, executionCallback);
248            } else {
249                executionCallback->notify(ErrorStatus::GENERAL_FAILURE);
250            }
251            return;
252        }
253        if (executor == nullptr) {
254            executionCallback->notify(ErrorStatus::NONE);
255            return;
256        }
257
258        sp<ExecutionCallback> stepCallback;
259        n = executor->startCompute(&stepCallback);
260        if (n != ANEURALNETWORKS_NO_ERROR) {
261            if (allowFallback) {
262                if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) {
263                    // Successfully executed one step on CPU.
264                    continue;
265                } else {
266                    // Either successfully executed entire plan on
267                    // CPU, or tried and failed to do so.
268                    return;
269                }
270            } else {
271                executionCallback->notify(ErrorStatus::GENERAL_FAILURE);
272                return;
273            }
274        }
275        stepCallback->wait();
276        ErrorStatus status = stepCallback->getStatus();
277        if (status != ErrorStatus::NONE) {
278            if (allowFallback) {
279                if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) {
280                    // Successfully executed one step on CPU.
281                    continue;
282                } else {
283                    // Either successfully executed entire plan on
284                    // CPU, or tried and failed to do so.
285                    return;
286                }
287            } else {
288                executionCallback->notify(status);
289                return;
290            }
291        }
292    }
293}
294
295int ExecutionBuilder::startCompute(sp<ExecutionCallback>* synchronizationCallback) {
296    *synchronizationCallback = nullptr;
297
298    // TODO validate that we have full types for all inputs and outputs,
299    // that the graph is not cyclic,
300
301    for (auto& p : mInputs) {
302        if (p.state == ModelArgumentInfo::UNSPECIFIED) {
303            LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all inputs specified";
304            return ANEURALNETWORKS_BAD_DATA;
305        }
306    }
307    for (auto& p : mOutputs) {
308        if (p.state == ModelArgumentInfo::UNSPECIFIED) {
309            LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all outputs specified";
310            return ANEURALNETWORKS_BAD_DATA;
311        }
312    }
313
314#ifndef DISABLE_PARTITIONED_EXECUTION
315    {
316        // TODO: Remove the non-plan-based path once we've fully integrated ExecutionPlan
317        // with the compilation and execution phases of the NN API?  Or retain that path
318        // as a fallback in the case of partitioning failure?
319        //
320        // TODO: Entire plan-based-path should run in an asynchronous thread --
321        // take the asynchronous thread logic out of startComputeOnCpu() and use
322        // it to wrap the plan-based-path.
323        if (mPartitioning > 0) {
324            const bool allowFallback = DeviceManager::partitioningAllowsFallback(mPartitioning);
325            std::shared_ptr<ExecutionPlan::Controller> controller = mPlan->makeController(this);
326            if (controller == nullptr) {
327                if (!allowFallback) {
328                    return ANEURALNETWORKS_OP_FAILED;
329                }
330            } else {
331                // TODO: use a thread pool
332
333                // Prepare the callback for asynchronous execution.
334                // sp<ExecutionCallback> object is returned when the
335                // execution has been successfully launched, otherwise a
336                // nullptr is returned.  The executionCallback is
337                // abstracted in the NN API as an "event".
338                sp<ExecutionCallback> executionCallback = new ExecutionCallback();
339                std::thread thread(asyncStartComputePartitioned, this, mPlan, controller,
340                                   allowFallback,
341                                   executionCallback);
342                executionCallback->bind_thread(std::move(thread));
343                *synchronizationCallback = executionCallback;
344                return ANEURALNETWORKS_NO_ERROR;
345            }
346        }
347    }
348#else
349    {
350        // Find a driver that can handle all the operations.
351        // TODO: Does not handle CPU fallback (which is tricky because
352        //       StepExecutor::startCompute() is designed as
353        //       asynchronous).
354        // TODO: Does not actually behave asynchronously (because
355        //       StepExecutor::startCompute() isn't actually asynchronous
356        //       on a device as opposed to a CPU).
357        Model hidlModel;
358        mModel->setHidlModel(&hidlModel);
359        const std::vector<std::shared_ptr<Device>>& devices = DeviceManager::get()->getDrivers();
360        for (const auto& device : devices) {
361            hidl_vec<bool> supports;
362            VLOG(EXECUTION) << "Checking " << device->getName();
363            device->getSupportedOperations(hidlModel, &supports);
364            if (std::find(supports.begin(), supports.end(), false) == supports.end()) {
365                VLOG(EXECUTION) << "ExecutionBuilder::startCompute (without plan) on " << device->getName();
366                StepExecutor executor(this, mModel, device->getInterface(),
367                                      nullptr /* no IPreparedModel, so compile */);
368                executor.mapInputsAndOutputsTrivially();
369                return executor.startCompute(synchronizationCallback);
370            }
371        }
372    }
373#endif  // DISABLE_PARTITIONED_EXECUTION
374
375    // Run on the CPU.
376    VLOG(EXECUTION) << "ExecutionBuilder::startCompute (without plan) on CPU";
377    StepExecutor executor(this, mModel,
378                          nullptr /* no VersionedIDevice, so CPU */,
379                          nullptr /* no IPreparedModel */);
380    executor.mapInputsAndOutputsTrivially();
381    return executor.startCompute(synchronizationCallback);
382}
383
384// Figures out how to place each of the input or outputs in a buffer. This just does the layout,
385// it does not copy data.  Aligns each input a bit.
386int StepExecutor::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args,
387                                                 Memory* memory) {
388    uint32_t nextPoolIndex = mMemories.size();
389    int64_t total = 0;
390    for (auto& info : *args) {
391        if (info.state == ModelArgumentInfo::POINTER) {
392            DataLocation& loc = info.locationAndLength;
393            // TODO Good enough alignment?
394            total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length);
395            loc.poolIndex = nextPoolIndex;
396            loc.offset = static_cast<uint32_t>(total);
397            total += loc.length;
398        }
399    };
400    if (total > 0xFFFFFFFF) {
401        LOG(ERROR) << "ANeuralNetworksExecution_startCompute Size of all inputs or outputs exceeds "
402                      "2^32.";
403        return ANEURALNETWORKS_BAD_DATA;
404    }
405    hidl_memory hidlMemory;
406    if (total > 0) {
407        memory->create(total);  // TODO check error
408        mMemories.add(memory);
409    }
410    return ANEURALNETWORKS_NO_ERROR;
411}
412
413static void setRequestArgumentArray(const std::vector<ModelArgumentInfo>& argumentInfos,
414                                     hidl_vec<RequestArgument>* ioInfos) {
415    size_t count = argumentInfos.size();
416    ioInfos->resize(count);
417    for (size_t i = 0; i < count; i++) {
418        const auto& info = argumentInfos[i];
419        (*ioInfos)[i] = { .hasNoValue = info.state == ModelArgumentInfo::HAS_NO_VALUE,
420                          .location = info.locationAndLength,
421                          .dimensions = info.dimensions,
422                        };
423    }
424}
425
426StepExecutor::StepExecutor(const ExecutionBuilder* executionBuilder,
427                           const ModelBuilder* model,
428                           VersionedIDevice* driver, sp<IPreparedModel> preparedModel) :
429    mExecutionBuilder(executionBuilder), mModel(model),
430    mDriver(driver), mPreparedModel(preparedModel),
431    mInputs(model->inputCount()), mOutputs(model->outputCount()) {}
432
433void StepExecutor::mapInputsAndOutputsTrivially() {
434    mInputs = mExecutionBuilder->mInputs;
435    mOutputs = mExecutionBuilder->mOutputs;
436    mMemories = mExecutionBuilder->mMemories;
437}
438
439void StepExecutor::mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
440                                    ModelArgumentInfo* executorInputOrOutput) {
441    *executorInputOrOutput = builderInputOrOutput;
442    switch (executorInputOrOutput->state) {
443        default:
444            nnAssert(!"unexpected ModelArgumentInfo::state");
445        case ModelArgumentInfo::POINTER:
446        case ModelArgumentInfo::UNSPECIFIED:
447            break;
448        case ModelArgumentInfo::MEMORY: {
449            const uint32_t builderPoolIndex =
450                    builderInputOrOutput.locationAndLength.poolIndex;
451            const Memory* memory = mExecutionBuilder->mMemories[builderPoolIndex];
452            const uint32_t executorPoolIndex = mMemories.add(memory);
453            executorInputOrOutput->locationAndLength.poolIndex =
454                    executorPoolIndex;
455            break;
456        }
457    }
458}
459
460int StepExecutor::setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand,
461                                                      const Memory* memory, uint32_t offset,
462                                                      ModelArgumentInfo* inputOrOutputInfo) {
463    // Should be similar to
464    //     ExecutionBuilder::setInputFromMemory()
465    //     ExecutionBuilder::setOutputFromMemory()
466
467    uint32_t poolIndex = mMemories.add(memory);
468    return inputOrOutputInfo->setFromTemporaryMemory(inputOrOutputOperand, poolIndex, offset);
469}
470
471static void logArguments(const char* kind, const std::vector<ModelArgumentInfo> &args) {
472    for (unsigned i = 0; i < args.size(); i++) {
473        const auto& arg = args[i];
474        std::string prefix = kind + std::string("[") + std::to_string(i) + "] = ";
475        switch (arg.state) {
476            case ModelArgumentInfo::POINTER:
477                VLOG(EXECUTION) << prefix << "POINTER(" << arg.buffer << ")";
478                break;
479            case ModelArgumentInfo::MEMORY:
480                VLOG(EXECUTION) << prefix << "MEMORY("
481                                << "pool=" << arg.locationAndLength.poolIndex
482                                << ", "
483                                << "off=" << arg.locationAndLength.offset
484                                << ")";
485                break;
486            case ModelArgumentInfo::HAS_NO_VALUE:
487                VLOG(EXECUTION) << prefix << "HAS_NO_VALUE";
488                break;
489            case ModelArgumentInfo::UNSPECIFIED:
490                VLOG(EXECUTION) << prefix << "UNSPECIFIED";
491                break;
492            default:
493                VLOG(EXECUTION) << prefix << "state(" << arg.state << ")";
494                break;
495        }
496    }
497}
498
499int StepExecutor::startCompute(sp<ExecutionCallback>* synchronizationCallback) {
500    if (VLOG_IS_ON(EXECUTION)) {
501        logArguments("input", mInputs);
502        logArguments("output", mOutputs);
503    }
504    if (mDriver == nullptr) {
505        return startComputeOnCpu(synchronizationCallback);
506    } else {
507        return startComputeOnDevice(synchronizationCallback);
508    }
509}
510
511int StepExecutor::startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback) {
512    nnAssert(mDriver != nullptr);
513
514    *synchronizationCallback = nullptr;
515
516    // TODO: Remove the mPreparedModel == nullptr case once we've fully integrated
517    // ExecutionPlan with the compilation and execution phases of the NN API
518    if (mPreparedModel == nullptr) {
519        Model model;
520        mModel->setHidlModel(&model);
521
522        // TODO Dangerous!  In async, the model will outlive it here. Safe for now
523        sp<PreparedModelCallback> preparedModelCallback = new PreparedModelCallback();
524        ErrorStatus prepareLaunchStatus = mDriver->prepareModel(model, preparedModelCallback);
525        if (prepareLaunchStatus != ErrorStatus::NONE) {
526            return ANEURALNETWORKS_OP_FAILED;
527        }
528
529        // Immediately synchronize with callback object for now
530        // TODO: change to asynchronous later
531        preparedModelCallback->wait();
532        ErrorStatus prepareReturnStatus = preparedModelCallback->getStatus();
533        mPreparedModel = preparedModelCallback->getPreparedModel();
534        if (prepareReturnStatus != ErrorStatus::NONE || mPreparedModel == nullptr) {
535            return ANEURALNETWORKS_OP_FAILED;
536        }
537    }
538
539    // We separate the input & output pools so that we reduce the copying done if we
540    // do an eventual remoting (hidl_memory->update()).  We could also use it to set
541    // protection on read only memory but that's not currently done.
542    Memory inputPointerArguments;
543    Memory outputPointerArguments;
544
545    // Layout the input and output data
546    int n = allocatePointerArgumentsToPool(&mInputs, &inputPointerArguments);
547    if (n != ANEURALNETWORKS_NO_ERROR) {
548        return n;
549    }
550    n = allocatePointerArgumentsToPool(&mOutputs, &outputPointerArguments);
551    if (n != ANEURALNETWORKS_NO_ERROR) {
552        return n;
553    }
554
555    // Copy the input data that was specified via a pointer.
556    // inputPointerArguments.update();
557    for (auto& info : mInputs) {
558        if (info.state == ModelArgumentInfo::POINTER) {
559            DataLocation& loc = info.locationAndLength;
560            uint8_t* data = nullptr;
561            int n = inputPointerArguments.getPointer(&data);
562            if (n != ANEURALNETWORKS_NO_ERROR) {
563                return n;
564            }
565            memcpy(data + loc.offset, info.buffer, loc.length);
566        }
567    }
568    // TODO: Add inputPointerArguments.commit() and .update() at all the right places
569
570    Request request;
571    setRequestArgumentArray(mInputs, &request.inputs);
572    setRequestArgumentArray(mOutputs, &request.outputs);
573    uint32_t count = mMemories.size();
574    request.pools.resize(count);
575    for (uint32_t i = 0; i < count; i++) {
576        request.pools[i] = mMemories[i]->getHidlMemory();
577    }
578
579    // Prepare the callback for asynchronous execution. sp<ExecutionCallback>
580    // object is returned when the execution has been successfully launched,
581    // otherwise a nullptr is returned. The executionCallback is abstracted in
582    // the NN API as an "event".
583    //
584    // The sp is used for ref-counting purposes. Without it, the HIDL service
585    // could attempt to communicate with a dead callback object.
586    //
587    // TODO: Explain the "dead callback" problem further, either here or
588    // in the design document.
589    sp<ExecutionCallback> executionCallback = new ExecutionCallback();
590
591    VLOG(EXECUTION) << "Before mPreparedModel->execute() " << toString(request);
592    // Execute.
593    // TODO: What happens to the Callback if the service dies abnormally
594    // -- won't that keep the Callback live forever, because the service
595    // never has the opportunity to bump the reference count down? Or
596    // maybe the HIDL infrastructure handles this magically? At worst,
597    // it seems like this is a small memory leak, if the Callback stays
598    // alive forever.
599    Return<ErrorStatus> executeStatus = mPreparedModel->execute(request, executionCallback);
600    if (!executeStatus.isOk() || executeStatus != ErrorStatus::NONE) {
601        VLOG(EXECUTION) << "**Execute failed**";
602        return ANEURALNETWORKS_OP_FAILED;
603    }
604
605    // TODO: Remove this synchronization point when the block of code below is
606    // removed.
607    executionCallback->wait();
608    Return<ErrorStatus> callbackStatus = executionCallback->getStatus();
609    if (!callbackStatus.isOk() || callbackStatus != ErrorStatus::NONE) {
610        VLOG(EXECUTION) << "**Execute async failed**";
611        return ANEURALNETWORKS_OP_FAILED;
612    }
613
614    // Copy the output data from shared memory to the output buffers.
615    // TODO: Move this block of code somewhere else. It should not be in the
616    // startCompute function.
617    // TODO: outputMemory->update(); outputMemory->commit()
618    for (auto& info : mOutputs) {
619        if (info.state == ModelArgumentInfo::POINTER) {
620            DataLocation& loc = info.locationAndLength;
621            uint8_t* data = nullptr;
622            int n = outputPointerArguments.getPointer(&data);
623            if (n != ANEURALNETWORKS_NO_ERROR) {
624                return n;
625            }
626            memcpy(info.buffer, data + loc.offset, loc.length);
627        }
628    }
629    VLOG(EXECUTION) << "StepExecutor::startComputeOnDevice completed";
630
631    *synchronizationCallback = executionCallback;
632    return ANEURALNETWORKS_NO_ERROR;
633}
634
635static void asyncStartComputeOnCpu(const Model& model, const Request& request,
636                                   const std::vector<RunTimePoolInfo>& modelPoolInfos,
637                                   const std::vector<RunTimePoolInfo>& requestPoolInfos,
638                                   const sp<IExecutionCallback>& executionCallback) {
639    CpuExecutor executor;
640    int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
641    ErrorStatus status = err == ANEURALNETWORKS_NO_ERROR ?
642            ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE;
643    executionCallback->notify(status);
644}
645
646int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback) {
647    // TODO: use a thread pool
648
649    Model model;
650    mModel->setHidlModel(&model);
651
652    // Prepare the callback for asynchronous execution. sp<ExecutionCallback>
653    // object is returned when the execution has been successfully launched,
654    // otherwise a nullptr is returned. The executionCallback is abstracted in
655    // the NN API as an "event".
656    sp<ExecutionCallback> executionCallback = new ExecutionCallback();
657    *synchronizationCallback = nullptr;
658
659    std::vector<RunTimePoolInfo> modelPoolInfos;
660    if (!setRunTimePoolInfosFromHidlMemories(&modelPoolInfos, model.pools)) {
661        return ANEURALNETWORKS_UNMAPPABLE;
662    }
663
664    std::vector<RunTimePoolInfo> requestPoolInfos;
665    requestPoolInfos.reserve(mMemories.size());
666    bool fail = false;
667    for (const Memory* mem : mMemories) {
668        requestPoolInfos.emplace_back(mem->getHidlMemory(), &fail);
669    }
670    if (fail) {
671        return ANEURALNETWORKS_UNMAPPABLE;
672    }
673    // Create as many pools as there are input / output.
674    auto fixPointerArguments = [&requestPoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) {
675        for (ModelArgumentInfo& argumentInfo : argumentInfos) {
676            if (argumentInfo.state == ModelArgumentInfo::POINTER) {
677                argumentInfo.locationAndLength.poolIndex =
678                            static_cast<uint32_t>(requestPoolInfos.size());
679                argumentInfo.locationAndLength.offset = 0;
680                requestPoolInfos.emplace_back(static_cast<uint8_t*>(argumentInfo.buffer));
681            }
682        }
683    };
684    fixPointerArguments(mInputs);
685    fixPointerArguments(mOutputs);
686
687    Request request;
688    setRequestArgumentArray(mInputs, &request.inputs);
689    setRequestArgumentArray(mOutputs, &request.outputs);
690
691    // TODO: should model be moved with a std::cref?
692    std::thread thread(asyncStartComputeOnCpu, model, std::move(request),
693                       std::move(modelPoolInfos), std::move(requestPoolInfos),
694                       executionCallback);
695    executionCallback->bind_thread(std::move(thread));
696
697    *synchronizationCallback = executionCallback;
698    return ANEURALNETWORKS_NO_ERROR;
699}
700
701}  // namespace nn
702}  // namespace android
703