ExecutionBuilder.cpp revision 3ced3cfd5b8f22b632c35f24e585c4847383b195
1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "ExecutionBuilder"
18
19#include "ExecutionBuilder.h"
20
21#include "CompilationBuilder.h"
22#include "CpuExecutor.h"
23#include "HalInterfaces.h"
24#include "Manager.h"
25#include "ModelBuilder.h"
26
27#include <mutex>
28#include <thread>
29#include <vector>
30
31namespace android {
32namespace nn {
33
34int ModelArgumentInfo::setFromPointer(const Operand& operand,
35                                      const ANeuralNetworksOperandType* type, void* data,
36                                      uint32_t length) {
37    int n = updateDimensionInfo(operand, type);
38    if (n != ANEURALNETWORKS_NO_ERROR) {
39        return n;
40    }
41    state = ModelArgumentInfo::POINTER;
42    locationAndDimension.location = {.poolIndex = 0, .offset = 0, .length = length};
43    buffer = data;
44    return ANEURALNETWORKS_NO_ERROR;
45}
46
47int ModelArgumentInfo::setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
48                                     uint32_t poolIndex, uint32_t offset, uint32_t length) {
49    int n = updateDimensionInfo(operand, type);
50    if (n != ANEURALNETWORKS_NO_ERROR) {
51        return n;
52    }
53    state = ModelArgumentInfo::MEMORY;
54    locationAndDimension.location = {.poolIndex = poolIndex, .offset = offset, .length = length};
55    buffer = nullptr;
56    return ANEURALNETWORKS_NO_ERROR;
57}
58
59int ModelArgumentInfo::updateDimensionInfo(const Operand& operand,
60                                           const ANeuralNetworksOperandType* newType) {
61    if (newType == nullptr) {
62        locationAndDimension.dimensions = hidl_vec<uint32_t>();
63    } else {
64        uint32_t count = newType->dimensions.count;
65        if (static_cast<OperandType>(newType->type) != operand.type ||
66            count != operand.dimensions.size()) {
67            LOG(ERROR) << "ANeuralNetworksExecution_setInput/Output incompatible types";
68            return ANEURALNETWORKS_BAD_DATA;
69        }
70        for (uint32_t i = 0; i < count; i++) {
71            locationAndDimension.dimensions[i] = newType->dimensions.data[i];
72        }
73    }
74    return ANEURALNETWORKS_NO_ERROR;
75}
76
77ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) :
78        mModel(compilation->mModel),
79        mInputs(mModel->inputCount()),
80        mOutputs(mModel->outputCount()),
81        mMemories(mModel->getMemories()) {
82    LOG(DEBUG) << "ExecutionBuilder::ExecutionBuilder";
83    for (auto& p : mInputs) {
84        p.state = ModelArgumentInfo::UNSPECIFIED;
85    }
86    for (auto& p : mOutputs) {
87        p.state = ModelArgumentInfo::UNSPECIFIED;
88    }
89}
90
91int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type,
92                               const void* buffer, uint32_t length) {
93    uint32_t count = static_cast<uint32_t>(mInputs.size());
94    if (index >= count) {
95        LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count;
96        return ANEURALNETWORKS_BAD_DATA;
97    }
98    return mInputs[index].setFromPointer(mModel->getInputOperand(index), type,
99                                         const_cast<void*>(buffer), length);
100}
101
102int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
103                                         const Memory* memory, uint32_t offset, uint32_t length) {
104    uint32_t count = static_cast<uint32_t>(mInputs.size());
105    if (index >= count) {
106        LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " "
107                   << count;
108        return ANEURALNETWORKS_BAD_DATA;
109    }
110    if (!memory->validateSize(offset, length)) {
111        return ANEURALNETWORKS_BAD_DATA;
112    }
113    uint32_t poolIndex = mMemories.add(memory);
114    return mInputs[index].setFromMemory(mModel->getInputOperand(index), type, poolIndex, offset,
115                                        length);
116}
117
118int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
119                                uint32_t length) {
120    uint32_t count = static_cast<uint32_t>(mOutputs.size());
121    if (index >= count) {
122        LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count;
123        return ANEURALNETWORKS_BAD_DATA;
124    }
125    return mOutputs[index].setFromPointer(mModel->getOutputOperand(index), type, buffer, length);
126}
127
128int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
129                                          const Memory* memory, uint32_t offset, uint32_t length) {
130    uint32_t count = static_cast<uint32_t>(mOutputs.size());
131    if (index >= count) {
132        LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " "
133                   << count;
134        return ANEURALNETWORKS_BAD_DATA;
135    }
136    if (!memory->validateSize(offset, length)) {
137        return ANEURALNETWORKS_BAD_DATA;
138    }
139    uint32_t poolIndex = mMemories.add(memory);
140    return mOutputs[index].setFromMemory(mModel->getOutputOperand(index), type, poolIndex, offset,
141                                         length);
142}
143
144int ExecutionBuilder::startCompute() {
145    // TODO validate that we have full types for all inputs and outputs,
146    // that the graph is not cyclic,
147    /*
148       TODO: For non-optional inputs, also verify that buffers are not null.
149
150    for (auto& p : mInputs) {
151        if (p.state == ModelArgumentInfo::UNSPECIFIED) {
152            LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all inputs specified";
153            return ANEURALNETWORKS_BAD_DATA;
154        }
155    }
156    */
157    for (auto& p : mOutputs) {
158        if (p.state == ModelArgumentInfo::UNSPECIFIED) {
159            LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all outputs specified";
160            return ANEURALNETWORKS_BAD_DATA;
161        }
162    }
163    LOG(DEBUG) << "ExecutionBuilder::startCompute";
164
165    std::shared_ptr<Device> device = DeviceManager::get()->getAvailableDriver();
166    Model model;
167    mModel->setHidlModel(&model);
168
169    return device == nullptr ? startComputeOnCpu(model)
170                             : startComputeOnDevice(device->getInterface(), model);
171}
172
173int ExecutionBuilder::wait() {
174    if (mEvent == nullptr) {
175        LOG(ERROR) << "ANeuralNetworksExecution_wait without execution in flight";
176        return ANEURALNETWORKS_BAD_STATE;
177    }
178    mEvent->wait();
179    return ANEURALNETWORKS_NO_ERROR;  // TODO shouldn't we look at wait()'s return value?
180}
181
182// Figures out how to place each of the input or outputs in a buffer. This just does the layout,
183// it does not copy data.  Aligns each input a bit.
184int ExecutionBuilder::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args,
185                                                     Memory* memory) {
186    uint32_t nextPoolIndex = mMemories.size();
187    int64_t total = 0;
188    for (auto& info : *args) {
189        if (info.state == ModelArgumentInfo::POINTER) {
190            DataLocation& loc = info.locationAndDimension.location;
191            // TODO Good enough alignment?
192            total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length);
193            loc.poolIndex = nextPoolIndex;
194            loc.offset = static_cast<uint32_t>(total);
195            total += loc.length;
196        }
197    };
198    if (total > 0xFFFFFFFF) {
199        LOG(ERROR) << "ANeuralNetworksExecution_startCompute Size of all inputs or outputs exceeds "
200                      "2^32.";
201        return ANEURALNETWORKS_BAD_DATA;
202    }
203    hidl_memory hidlMemory;
204    if (total > 0) {
205        memory->create(total);  // TODO check error
206        mMemories.add(memory);
207    }
208    return ANEURALNETWORKS_NO_ERROR;
209}
210
211static void copyLocationAndDimension(const std::vector<ModelArgumentInfo>& argumentInfos,
212                                     hidl_vec<RequestArgument>* ioInfos) {
213    size_t count = argumentInfos.size();
214    ioInfos->resize(count);
215    for (size_t i = 0; i < count; i++) {
216        (*ioInfos)[i] = argumentInfos[i].locationAndDimension;
217    }
218}
219
220int ExecutionBuilder::startComputeOnDevice(sp<IDevice> driver, const Model& model) {
221    LOG(DEBUG) << "ExecutionBuilder::startComputeOnDevice";
222    // TODO Dangerous!  In async, the model will outlive it here. Safe for now
223    sp<Event> preparationEvent = new Event();
224    ErrorStatus prepareStatus = ErrorStatus::GENERAL_FAILURE;
225    sp<IPreparedModel> preparedModel;
226
227    driver->prepareModel(model, preparationEvent,
228                         [&](ErrorStatus status, const sp<IPreparedModel>& prepared) {
229                             prepareStatus = status;
230                             preparedModel = prepared;
231                         });
232
233    // Immediately synchronize with event for now
234    // TODO: change to asynchronous later
235    Event::Status eventStatus = preparationEvent->wait();
236
237    if (prepareStatus != ErrorStatus::NONE || preparedModel == nullptr ||
238            eventStatus != Event::Status::SUCCESS) {
239        return ANEURALNETWORKS_OP_FAILED;
240    }
241
242    // Layout the input and output data
243    int n = allocatePointerArgumentsToPool(&mInputs, &mInputPointerArguments);
244    if (n != ANEURALNETWORKS_NO_ERROR) {
245        return n;
246    }
247    n = allocatePointerArgumentsToPool(&mOutputs, &mOutputPointerArguments);
248    if (n != ANEURALNETWORKS_NO_ERROR) {
249        return n;
250    }
251
252    // Copy the input data that was specified via a pointer.
253    // mInputPointerArguments.update();
254    for (auto& info : mInputs) {
255        if (info.state == ModelArgumentInfo::POINTER) {
256            DataLocation& loc = info.locationAndDimension.location;
257            uint8_t* data = nullptr;
258            int n = mInputPointerArguments.getPointer(&data);
259            if (n != ANEURALNETWORKS_NO_ERROR) {
260                return n;
261            }
262            memcpy(data + loc.offset, info.buffer, loc.length);
263        }
264    }
265    // TODO: Add mInputPointerArguments.commit() and .update() at all the right places
266
267    Request request;
268    copyLocationAndDimension(mInputs, &request.inputs);
269    copyLocationAndDimension(mOutputs, &request.outputs);
270    uint32_t count = mMemories.size();
271    request.pools.resize(count);
272    for (uint32_t i = 0; i < count; i++) {
273        request.pools[i] = mMemories[i]->getHidlMemory();
274    }
275
276    // Prepare the event for asynchronous execution. The sp<Event>
277    // object is recorded if the execution has been successfully
278    // launched.  The sp is used for ref-counting purposes. Without
279    // it, the HIDL service could attempt to communicate with a dead
280    // event object.
281    //
282    // TODO: Explain the "dead event" problem further, either here or
283    // in the design document.
284    sp<Event> eventSp = new Event();
285
286    LOG(DEBUG) << "Before preparedModel->execute() " << toString(request);
287    // Execute.
288    // TODO: What happens to the Event if the service dies abnormally
289    // -- won't that keep the Event live forever, because the service
290    // never has the opportunity to bump the reference count down? Or
291    // maybe the HIDL infrastructure handles this magically? At worst,
292    // it seems like this is a small memory leak, if the Event stays
293    // alive forever.
294    if (preparedModel->execute(request, eventSp) != ErrorStatus::NONE) {
295        LOG(DEBUG) << "**Execute failed**";
296        return ANEURALNETWORKS_OP_FAILED;
297    }
298
299    // TODO: Remove this synchronization point when the block of code below is
300    // removed.
301    Event::Status status = eventSp->wait();
302    if (status != Event::Status::SUCCESS) {
303        LOG(DEBUG) << "**Execute async failed**";
304        return ANEURALNETWORKS_OP_FAILED;
305    }
306
307    // Copy the output data from shared memory to the output buffers.
308    // TODO: Move this block of code somewhere else. It should not be in the
309    // startCompute function.
310    // TODO: outputMemory->update(); outputMemory->commit()
311    for (auto& info : mOutputs) {
312        if (info.state == ModelArgumentInfo::POINTER) {
313            DataLocation& loc = info.locationAndDimension.location;
314            uint8_t* data = nullptr;
315            int n = mOutputPointerArguments.getPointer(&data);
316            if (n != ANEURALNETWORKS_NO_ERROR) {
317                return n;
318            }
319            memcpy(info.buffer, data + loc.offset, loc.length);
320        }
321    }
322    LOG(DEBUG) << "ExecutionBuilder::startComputeOnDevice completed";
323
324    mEvent = eventSp;
325    return ANEURALNETWORKS_NO_ERROR;
326}
327
328static void asyncStartComputeOnCpu(const Model& model, const Request& request,
329                                   const std::vector<RunTimePoolInfo>& runTimePoolInfos,
330                                   const sp<IEvent>& event) {
331    CpuExecutor executor;
332    int err = executor.run(model, request, runTimePoolInfos);
333    ErrorStatus status = err == ANEURALNETWORKS_NO_ERROR ?
334            ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE;
335    event->notify(status);
336}
337
338int ExecutionBuilder::startComputeOnCpu(const Model& model) {
339    // TODO: use a thread pool
340
341    // Prepare the event for asynchronous execution. The sp<Event> object is
342    // recorded if the execution has been successfully launched.
343    sp<Event> eventSp = new Event();
344
345    std::vector<RunTimePoolInfo> runTimePoolInfos;
346    uint32_t count = mMemories.size();
347    runTimePoolInfos.resize(count);
348    for (uint32_t i = 0; i < count; i++) {
349        const Memory* mem = mMemories[i];
350        runTimePoolInfos[i].set(mem->getHidlMemory());
351    }
352    // Create as many pools as there are input / output.
353    auto fixPointerArguments = [&runTimePoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) {
354        for (ModelArgumentInfo& argumentInfo : argumentInfos) {
355            if (argumentInfo.state == ModelArgumentInfo::POINTER) {
356                RunTimePoolInfo runTimeInfo = {
357                            .buffer = static_cast<uint8_t*>(argumentInfo.buffer)};
358                argumentInfo.locationAndDimension.location.poolIndex =
359                            static_cast<uint32_t>(runTimePoolInfos.size());
360                argumentInfo.locationAndDimension.location.offset = 0;
361                runTimePoolInfos.push_back(runTimeInfo);
362            }
363        }
364    };
365    fixPointerArguments(mInputs);
366    fixPointerArguments(mOutputs);
367
368    Request request;
369    copyLocationAndDimension(mInputs, &request.inputs);
370    copyLocationAndDimension(mOutputs, &request.outputs);
371
372    // TODO: should model be moved with a std::cref?
373    std::thread thread(asyncStartComputeOnCpu, model, std::move(request),
374                       std::move(runTimePoolInfos), eventSp);
375    eventSp->bind_thread(std::move(thread));
376
377    mEvent = eventSp;
378    return ANEURALNETWORKS_NO_ERROR;
379}
380
381}  // namespace nn
382}  // namespace android
383