ExecutionBuilder.h revision 033b8a6ce8ebd2a01ecccc6bae96d0fff8d4964e
1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
18#define ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
19
20#include "Callbacks.h"
21#include "HalInterfaces.h"
22#include "Memory.h"
23#include "NeuralNetworks.h"
24
25#include <unordered_map>
26#include <vector>
27
28using ::android::hardware::neuralnetworks::V1_0::implementation::ExecutionCallback;
29using ::android::hardware::neuralnetworks::V1_0::implementation::PreparedModelCallback;
30
31namespace android {
32namespace nn {
33
34class CompilationBuilder;
35class ExecutionPlan;
36class Memory;
37class ModelBuilder;
38class StepExecutor;
39
40// TODO move length out of DataLocation
41struct ModelArgumentInfo {
42    // Whether the arguement was specified as being in a Memory, as a pointer,
43    // or has not been specified.
44    // If POINTER then:
45    //   locationAndDimension.location.length is valid.
46    //   locationAndDimension.dimension is valid.
47    //   buffer is valid
48    // If MEMORY then:
49    //   locationAndDimension.location.{poolIndex, offset, length} is valid.
50    //   locationAndDimension.dimension is valid.
51    enum { POINTER, MEMORY, UNSPECIFIED } state = UNSPECIFIED;
52    RequestArgument locationAndDimension;
53    void* buffer;
54
55    int setFromPointer(const Operand& operand, const ANeuralNetworksOperandType* type, void* buffer,
56                       uint32_t length);
57    int setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
58                      uint32_t poolIndex, uint32_t offset, uint32_t length);
59    int updateDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType);
60};
61
62class ExecutionBuilder {
63    friend class StepExecutor;
64public:
65    ExecutionBuilder(const CompilationBuilder* compilation);
66
67    int setInput(uint32_t index, const ANeuralNetworksOperandType* type, const void* buffer,
68                 size_t length);
69    int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
70                           const Memory* memory, size_t offset, size_t length);
71    int setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
72                  size_t length);
73    int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
74                            const Memory* memory, size_t offset, size_t length);
75    int startCompute(sp<ExecutionCallback>* synchronizationCallback);
76
77private:
78    const ModelBuilder* mModel;
79    [[maybe_unused]] const ExecutionPlan* mPlan;
80
81    // The information we'll send to the driver about the inputs and outputs.
82    // Note that we build this in two steps:
83    // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
84    //    If set from a pointer, don't set the location in the RequestArgument but store it
85    //    instead in mInputBuffers or mOutputBuffers.
86    // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
87    //    the m*Buffers entries.  Copy the input values into the shared memory.
88    // We do this to avoid creating a lot of shared memory objects if we have a lot of
89    // parameters specified via pointers.  We also avoid copying in the case where
90    // some of the nodes will interpreted on the CPU anyway.
91    std::vector<ModelArgumentInfo> mInputs;
92    std::vector<ModelArgumentInfo> mOutputs;
93    MemoryTracker mMemories;
94};
95
96// class StepExecutor is used to execute a single "step" in a
97// potentially multiple step execution process.  The graph associated
98// with that step is executed in its entirety on a single device (or
99// on the CPU).
100class StepExecutor {
101public:
102    // executionBuilder
103    //     Describes the full (possibly multiple-"step") execution.
104    // model
105    //     The model to be executed by the executor.  Possibly a
106    //     submodel of the model from executionBuilder.
107    // driver, preparedModel
108    //     The device on which to execute the "step", and the prepared
109    //     model to execute on that device.  (Both are nullptr in the
110    //     case of CPU.)
111    StepExecutor(const ExecutionBuilder* executionBuilder,
112                 const ModelBuilder* model,
113                 sp<IDevice> driver, sp<IPreparedModel> preparedModel);
114
115    // Map inputs and outputs from ExecutionBuilder to StepExecutor,
116    // in the case where we have a single-"step" execution (i.e., the executor
117    // is executing the entire model from the ExecutionBuilder).
118    void mapInputsAndOutputsTrivially();
119
120    // Map inputs and outputs from ExecutionBuilder to StepExecutor,
121    // one at a time.  Note that these are input/output indexes, not
122    // operand indexes.
123    void mapInput(uint32_t builderIndex, uint32_t executorIndex) {
124        mapInputOrOutput(mExecutionBuilder->mInputs[builderIndex],
125                         &mInputs[executorIndex]);
126    }
127    void mapOutput(uint32_t builderIndex, uint32_t executorIndex) {
128        mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex],
129                         &mOutputs[executorIndex]);
130    }
131
132    // TODO: inter-partition temporaries
133
134    int startCompute(sp<ExecutionCallback>* synchronizationCallback);
135
136private:
137    int allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, Memory* memory);
138    int startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback);
139    int startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback);
140
141    void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
142                          ModelArgumentInfo* executorInputOrOutput);
143
144    // describes the full (possibly multiple-"step") execution
145    const ExecutionBuilder* mExecutionBuilder;
146
147    // model to be executed on the executor, in both original and
148    // compiled forms; and device on which to execute it
149    const ModelBuilder* mModel;
150    sp<IDevice> mDriver;                // nullptr if CPU execution
151    sp<IPreparedModel> mPreparedModel;  // nullptr if CPU execution or if bypassing ExecutionPlan
152
153    // The information we'll send to the driver about the inputs and outputs.
154    // Note that we build this in two steps:
155    // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
156    //    If set from a pointer, don't set the location in the RequestArgument but store it
157    //    instead in mInputBuffers or mOutputBuffers.
158    // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
159    //    the m*Buffers entries.  Copy the input values into the shared memory.
160    // We do this to avoid creating a lot of shared memory objects if we have a lot of
161    // parameters specified via pointers.  We also avoid copying in the case where
162    // some of the nodes will interpreted on the CPU anyway.
163    std::vector<ModelArgumentInfo> mInputs;
164    std::vector<ModelArgumentInfo> mOutputs;
165    MemoryTracker mMemories;
166};
167
168} // namespace nn
169} // namespace android
170
171#endif // ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
172