1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
18#define ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
19
20#include "Callbacks.h"
21#include "HalInterfaces.h"
22#include "Memory.h"
23#include "ModelBuilder.h"
24#include "NeuralNetworks.h"
25
26#include <unordered_map>
27#include <vector>
28
29using ::android::hardware::neuralnetworks::V1_0::implementation::ExecutionCallback;
30using ::android::hardware::neuralnetworks::V1_0::implementation::PreparedModelCallback;
31
32namespace android {
33namespace nn {
34
35class CompilationBuilder;
36class ExecutionPlan;
37class Memory;
38class ModelBuilder;
39class StepExecutor;
40class VersionedIDevice;
41
42// TODO move length out of DataLocation
43struct ModelArgumentInfo {
44    // Whether the argument was specified as being in a Memory, as a pointer,
45    // has no value, or has not been specified.
46    // If POINTER then:
47    //   locationAndLength.length is valid.
48    //   dimensions is valid.
49    //   buffer is valid
50    // If MEMORY then:
51    //   locationAndLength.{poolIndex, offset, length} is valid.
52    //   dimensions is valid.
53    enum { POINTER, MEMORY, HAS_NO_VALUE, UNSPECIFIED } state = UNSPECIFIED;
54    DataLocation locationAndLength;
55    std::vector<uint32_t> dimensions;
56    void* buffer;
57
58    int setFromPointer(const Operand& operand, const ANeuralNetworksOperandType* type, void* buffer,
59                       uint32_t length);
60    int setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
61                      uint32_t poolIndex, uint32_t offset, uint32_t length);
62    int setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex, uint32_t offset);
63    int updateDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType);
64};
65
66class ExecutionBuilder {
67    friend class StepExecutor;
68public:
69    ExecutionBuilder(const CompilationBuilder* compilation);
70
71    int setInput(uint32_t index, const ANeuralNetworksOperandType* type, const void* buffer,
72                 size_t length);
73    int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
74                           const Memory* memory, size_t offset, size_t length);
75    int setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
76                  size_t length);
77    int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
78                            const Memory* memory, size_t offset, size_t length);
79    int startCompute(sp<ExecutionCallback>* synchronizationCallback);
80
81    const ModelBuilder* getModel() const { return mModel; }
82
83private:
84    const ModelBuilder* mModel;
85    const ExecutionPlan* mPlan;
86
87    // This is a DeviceManager::kPartitioning* value captured from
88    // CompilationBuilder when the ExecutionBuilder is constructed.
89    uint32_t mPartitioning;
90
91    // The information we'll send to the driver about the inputs and outputs.
92    // Note that we build this in two steps:
93    // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
94    //    If set from a pointer, don't set the location in the RequestArgument but store it
95    //    instead in mInputBuffers or mOutputBuffers.
96    // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
97    //    the m*Buffers entries.  Copy the input values into the shared memory.
98    // We do this to avoid creating a lot of shared memory objects if we have a lot of
99    // parameters specified via pointers.  We also avoid copying in the case where
100    // some of the nodes will interpreted on the CPU anyway.
101    std::vector<ModelArgumentInfo> mInputs;
102    std::vector<ModelArgumentInfo> mOutputs;
103    MemoryTracker mMemories;
104};
105
106// class StepExecutor is used to execute a single "step" in a
107// potentially multiple step execution process.  The graph associated
108// with that step is executed in its entirety on a single device (or
109// on the CPU).
110class StepExecutor {
111public:
112    // executionBuilder
113    //     Describes the full (possibly multiple-"step") execution.
114    // model
115    //     The model to be executed by the executor.  Possibly a
116    //     submodel of the model from executionBuilder.
117    // driver, preparedModel
118    //     The device on which to execute the "step", and the prepared
119    //     model to execute on that device.  (Both are nullptr in the
120    //     case of CPU.)
121    StepExecutor(const ExecutionBuilder* executionBuilder,
122                 const ModelBuilder* model,
123                 VersionedIDevice* driver, sp<IPreparedModel> preparedModel);
124
125    // Map inputs and outputs from ExecutionBuilder to StepExecutor,
126    // in the case where we have a single-"step" execution (i.e., the executor
127    // is executing the entire model from the ExecutionBuilder).
128    void mapInputsAndOutputsTrivially();
129
130    // Map inputs and outputs from ExecutionBuilder to StepExecutor,
131    // one at a time.  Note that these are input/output indexes, not
132    // operand indexes.
133    void mapInput(uint32_t builderIndex, uint32_t executorIndex) {
134        mapInputOrOutput(mExecutionBuilder->mInputs[builderIndex],
135                         &mInputs[executorIndex]);
136    }
137    void mapOutput(uint32_t builderIndex, uint32_t executorIndex) {
138        mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex],
139                         &mOutputs[executorIndex]);
140    }
141    void mapOutputToInput(uint32_t builderIndex, uint32_t executorIndex) {
142        mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex],
143                         &mInputs[executorIndex]);
144    }
145
146    // The input or output is assumed to have the size of the
147    // corresponding operand.
148    int setInputFromTemporaryMemory(uint32_t inputIndex, const Memory* memory, uint32_t offset) {
149        return setInputOrOutputFromTemporaryMemory(mModel->getInputOperand(inputIndex),
150                                                   memory, offset,
151                                                   &mInputs.at(inputIndex));
152    }
153    int setOutputFromTemporaryMemory(uint32_t outputIndex, const Memory* memory, uint32_t offset) {
154        return setInputOrOutputFromTemporaryMemory(mModel->getOutputOperand(outputIndex),
155                                                   memory, offset,
156                                                   &mOutputs.at(outputIndex));
157    }
158
159    // Executes using the (driver, preparedModel) specified at construction time.
160    int startCompute(sp<ExecutionCallback>* synchronizationCallback);
161
162    // Executes using the CPU, regardless of the (driver,
163    // preparedModel) specified at construction time.
164    int startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback);
165
166    bool isCpu() const { return mDriver == nullptr; }
167
168private:
169    int allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, Memory* memory);
170    int startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback);
171
172    void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
173                          ModelArgumentInfo* executorInputOrOutput);
174
175    int setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand,
176                                            const Memory* memory, uint32_t offset,
177                                            ModelArgumentInfo* inputOrOutputInfo);
178
179    // describes the full (possibly multiple-"step") execution
180    const ExecutionBuilder* mExecutionBuilder;
181
182    // model to be executed on the executor, in both original and
183    // compiled forms; and device on which to execute it
184    const ModelBuilder* mModel;
185    VersionedIDevice* mDriver;          // nullptr if CPU execution
186    sp<IPreparedModel> mPreparedModel;  // nullptr if CPU execution or if bypassing ExecutionPlan
187
188    // The information we'll send to the driver about the inputs and outputs.
189    // Note that we build this in two steps:
190    // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
191    //    If set from a pointer, don't set the location in the RequestArgument but store it
192    //    instead in mInputBuffers or mOutputBuffers.
193    // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
194    //    the m*Buffers entries.  Copy the input values into the shared memory.
195    // We do this to avoid creating a lot of shared memory objects if we have a lot of
196    // parameters specified via pointers.  We also avoid copying in the case where
197    // some of the nodes will interpreted on the CPU anyway.
198    std::vector<ModelArgumentInfo> mInputs;
199    std::vector<ModelArgumentInfo> mOutputs;
200    MemoryTracker mMemories;
201};
202
203} // namespace nn
204} // namespace android
205
206#endif // ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
207