ExecutionBuilder.cpp revision f1817c663af4f22bc089ef82cd50df4186422c42
1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "RequestBuilder"
18
19#include "RequestBuilder.h"
20
21#include "CpuExecutor.h"
22#include "HalInterfaces.h"
23#include "Manager.h"
24#include "ModelBuilder.h"
25
26namespace android {
27namespace nn {
28
29RequestBuilder::RequestBuilder(const ModelBuilder* model)
30      : mModel(model),
31        mInputs(model->inputCount()),
32        mOutputs(model->outputCount()),
33        mMemories(model->getMemories()) {
34    LOG(DEBUG) << "RequestBuilder::RequestBuilder";
35    for (auto& p : mInputs) {
36        p.state = ModelArgumentInfo::MISSING;
37    }
38    for (auto& p : mOutputs) {
39        p.state = ModelArgumentInfo::MISSING;
40    }
41}
42
43int RequestBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type,
44                             const void* buffer, uint32_t length) {
45    uint32_t count = static_cast<uint32_t>(mInputs.size());
46    if (index >= count) {
47        LOG(ERROR) << "ANeuralNetworksRequest_setInput bad index " << index << " " << count;
48        return ANEURALNETWORKS_BAD_DATA;
49    }
50    ModelArgumentInfo& info = mInputs[index];
51    info.state = ModelArgumentInfo::POINTER;
52    info.locationAndDimension.location = {.poolIndex = RUN_TIME, .offset = 0, .length = length};
53    updateDimensionInfo(&info, type, mModel->getInputOperandIndex(index));
54    info.buffer = const_cast<void*>(buffer);
55    return ANEURALNETWORKS_NO_ERROR;
56}
57
58int RequestBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
59                                       const Memory* memory, uint32_t offset, uint32_t length) {
60    uint32_t count = static_cast<uint32_t>(mInputs.size());
61    if (index >= count) {
62        LOG(ERROR) << "ANeuralNetworksRequest_setInputFromMemory bad index " << index << " "
63                   << count;
64        return ANEURALNETWORKS_BAD_DATA;
65    }
66    ModelArgumentInfo& info = mInputs[index];
67    info.state = ModelArgumentInfo::MEMORY;
68    info.locationAndDimension.location = {.poolIndex = mMemories.add(memory),
69                                          .offset = offset,
70                                          .length = length};
71    updateDimensionInfo(&info, type, mModel->getInputOperandIndex(index));
72    info.buffer = nullptr;
73    return ANEURALNETWORKS_NO_ERROR;
74}
75
76int RequestBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
77                              uint32_t length) {
78    uint32_t count = static_cast<uint32_t>(mOutputs.size());
79    if (index >= count) {
80        LOG(ERROR) << "ANeuralNetworksRequest_setOutput bad index " << index << " " << count;
81        return ANEURALNETWORKS_BAD_DATA;
82    }
83    ModelArgumentInfo& info = mOutputs[index];
84    info.state = ModelArgumentInfo::POINTER;
85    info.locationAndDimension.location = {.poolIndex = RUN_TIME, .offset = 0, .length = length};
86    updateDimensionInfo(&info, type, mModel->getOutputOperandIndex(index));
87    info.buffer = const_cast<void*>(buffer);
88    return ANEURALNETWORKS_NO_ERROR;
89}
90
91int RequestBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
92                                        const Memory* memory, uint32_t offset, uint32_t length) {
93    uint32_t count = static_cast<uint32_t>(mOutputs.size());
94    if (index >= count) {
95        LOG(ERROR) << "ANeuralNetworksRequest_setOutputFromMemory bad index " << index << " "
96                   << count;
97        return ANEURALNETWORKS_BAD_DATA;
98    }
99    ModelArgumentInfo& info = mOutputs[index];
100    info.state = ModelArgumentInfo::MEMORY;
101    info.locationAndDimension.location = {.poolIndex = mMemories.add(memory),
102                                          .offset = offset,
103                                          .length = length};
104    updateDimensionInfo(&info, type, mModel->getOutputOperandIndex(index));
105    info.buffer = nullptr;
106    return ANEURALNETWORKS_NO_ERROR;
107}
108
109int RequestBuilder::updateDimensionInfo(ModelArgumentInfo* info,
110                                        const ANeuralNetworksOperandType* newType,
111                                        uint32_t operandIndex) {
112    if (newType == nullptr) {
113        info->locationAndDimension.dimensions = hidl_vec<uint32_t>();
114    } else {
115        const Operand& operand = mModel->getOperand(operandIndex);
116        uint32_t count = newType->dimensions.count;
117        if (static_cast<OperandType>(newType->type) != operand.type ||
118            count != operand.dimensions.size()) {
119            LOG(ERROR) << "ANeuralNetworksRequest_setInput/Output incompatible types";
120            return ANEURALNETWORKS_BAD_DATA;
121        }
122        for (uint32_t i = 0; i < count; i++) {
123            info->locationAndDimension.dimensions[i] = newType->dimensions.data[i];
124        }
125    }
126    return ANEURALNETWORKS_NO_ERROR;
127}
128
129int RequestBuilder::startCompute(Event** event) {
130    // TODO validate that we have full types for all inputs and outputs,
131    // that the graph is not cyclic,
132    /*
133       TODO: For non-optional inputs, also verify that buffers are not null.
134
135    for (auto& p : mInputs) {
136        if (p.state == ModelArgumentInfo::MISSING) {
137            LOG(ERROR) << "ANeuralNetworksRequest_startCompute not all inputs specified";
138            return ANEURALNETWORKS_BAD_DATA;
139        }
140    }
141    */
142    for (auto& p : mOutputs) {
143        if (p.state == ModelArgumentInfo::MISSING) {
144            LOG(ERROR) << "ANeuralNetworksRequest_startCompute not all outputs specified";
145            return ANEURALNETWORKS_BAD_DATA;
146        }
147    }
148    LOG(DEBUG) << "RequestBuilder::startCompute";
149
150    std::shared_ptr<Device> device = DeviceManager::get()->getAvailableDriver();
151    Model model;
152    mModel->setHidlModel(&model);
153
154    return device == nullptr ? startComputeOnCpu(event, model)
155                             : startComputeOnDevice(device->getInterface(), model, event);
156}
157
158// Figures out how to place each of the input or outputs in a buffer. This just does the layout,
159// it does not copy data.  Aligns each input a bit.
160int RequestBuilder::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args,
161                                                   Memory* memory) {
162    uint32_t nextPoolIndex = mMemories.size();
163    int64_t total = 0;
164    for (auto& info : *args) {
165        if (info.state == ModelArgumentInfo::POINTER) {
166            DataLocation& loc = info.locationAndDimension.location;
167            // TODO Good enough alignment?
168            total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length);
169            loc.poolIndex = nextPoolIndex;
170            loc.offset = static_cast<uint32_t>(total);
171            total += loc.length;
172        }
173    };
174    if (total > 0xFFFFFFFF) {
175        LOG(ERROR) << "ANeuralNetworksRequest_startCompute Size of all inputs or outputs exceeds "
176                      "2^32.";
177        return ANEURALNETWORKS_BAD_DATA;
178    }
179    hidl_memory hidlMemory;
180    if (total > 0) {
181        memory->create(total); // TODO check error
182        mMemories.add(memory);
183    }
184    return ANEURALNETWORKS_NO_ERROR;
185}
186
187static void copyLocationAndDimension(const std::vector<ModelArgumentInfo>& argumentInfos,
188                                     hidl_vec<InputOutputInfo>* ioInfos) {
189    size_t count = argumentInfos.size();
190    ioInfos->resize(count);
191    for (size_t i = 0; i < count; i++) {
192        (*ioInfos)[i] = argumentInfos[i].locationAndDimension;
193    }
194}
195
196int RequestBuilder::startComputeOnDevice(sp<IDevice> driver, const Model& model, Event** event) {
197    LOG(DEBUG) << "RequestBuilder::startComputeOnDevice1";
198    // TODO Dangerous!  In async, the model will outlive it here. Safe for now
199    sp<IPreparedModel> preparedModel = driver->prepareModel(model);
200    if (preparedModel == nullptr) {
201        return ANEURALNETWORKS_OP_FAILED;
202    }
203
204    // Layout the input and output data
205    int n = allocatePointerArgumentsToPool(&mInputs, &mInputPointerArguments);
206    if (n != ANEURALNETWORKS_NO_ERROR) {
207        return n;
208    }
209    n = allocatePointerArgumentsToPool(&mOutputs, &mOutputPointerArguments);
210    if (n != ANEURALNETWORKS_NO_ERROR) {
211        return n;
212    }
213
214    // Copy the input data that was specified via a pointer.
215    // mInputPointerArguments.update();
216    for (auto& info : mInputs) {
217        if (info.state == ModelArgumentInfo::POINTER) {
218            DataLocation& loc = info.locationAndDimension.location;
219            uint8_t* data = mInputPointerArguments.getPointer();
220            memcpy(data + loc.offset, info.buffer, loc.length);
221        }
222    }
223    // TODO: Add mInputPointerArguments.commit() and .update() at all the right places
224
225    Request request;
226    copyLocationAndDimension(mInputs, &request.inputs);
227    copyLocationAndDimension(mOutputs, &request.outputs);
228    uint32_t count = mMemories.size();
229    request.pools.resize(count);
230    for (uint32_t i = 0; i < count; i++) {
231        request.pools[i] = mMemories[i]->getHidlMemory();
232    }
233
234    LOG(DEBUG) << "Before preparedModel->execute() " << toString(request);
235    // Execute the request.
236    if (!preparedModel->execute(request)) {
237        LOG(DEBUG) << "**Execute failed**";
238        return ANEURALNETWORKS_OP_FAILED;
239    }
240
241    // Copy the output data from shared memory to the output buffers.
242    // TODO: outputMemory->update();
243    for (auto& info : mOutputs) {
244        if (info.state == ModelArgumentInfo::POINTER) {
245            DataLocation& loc = info.locationAndDimension.location;
246            uint8_t* data = mOutputPointerArguments.getPointer();
247            memcpy(info.buffer, data + loc.offset, loc.length);
248        }
249    }
250    LOG(DEBUG) << "RequestBuilder::startComputeOnDevice completed";
251
252    *event = new Event(); // TODO pass ievent
253    return ANEURALNETWORKS_NO_ERROR;
254}
255
256int RequestBuilder::startComputeOnCpu(Event** event, [[maybe_unused]] const Model& model) {
257    // TODO: use a thread pool
258    Event* e = new Event();
259    *event = e;
260
261    std::vector<RunTimePoolInfo> runTimePoolInfos;
262    uint32_t count = mMemories.size();
263    runTimePoolInfos.resize(count);
264    for (uint32_t i = 0; i < count; i++) {
265        const Memory* mem = mMemories[i];
266        runTimePoolInfos[i].set(mem->getHidlMemory());
267    }
268    // Create as many pools as there are input / output.
269    auto fixPointerArguments = [&runTimePoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) {
270        for (ModelArgumentInfo& argumentInfo : argumentInfos) {
271            if (argumentInfo.state == ModelArgumentInfo::POINTER) {
272                RunTimePoolInfo runTimeInfo = {.buffer = static_cast<uint8_t*>(argumentInfo.buffer)};
273                argumentInfo.locationAndDimension.location.poolIndex =
274                        static_cast<uint32_t>(runTimePoolInfos.size());
275                argumentInfo.locationAndDimension.location.offset = 0;
276                runTimePoolInfos.push_back(runTimeInfo);
277            }
278        }
279    };
280    fixPointerArguments(mInputs);
281    fixPointerArguments(mOutputs);
282
283    Request request;
284    copyLocationAndDimension(mInputs, &request.inputs);
285    copyLocationAndDimension(mOutputs, &request.outputs);
286
287    CpuExecutor executor;
288    return executor.run(model, request, runTimePoolInfos);
289}
290
291} // namespace nn
292} // namespace android
293