ExecutionBuilder.h revision 033b8a6ce8ebd2a01ecccc6bae96d0fff8d4964e
1/* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H 18#define ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H 19 20#include "Callbacks.h" 21#include "HalInterfaces.h" 22#include "Memory.h" 23#include "NeuralNetworks.h" 24 25#include <unordered_map> 26#include <vector> 27 28using ::android::hardware::neuralnetworks::V1_0::implementation::ExecutionCallback; 29using ::android::hardware::neuralnetworks::V1_0::implementation::PreparedModelCallback; 30 31namespace android { 32namespace nn { 33 34class CompilationBuilder; 35class ExecutionPlan; 36class Memory; 37class ModelBuilder; 38class StepExecutor; 39 40// TODO move length out of DataLocation 41struct ModelArgumentInfo { 42 // Whether the arguement was specified as being in a Memory, as a pointer, 43 // or has not been specified. 44 // If POINTER then: 45 // locationAndDimension.location.length is valid. 46 // locationAndDimension.dimension is valid. 47 // buffer is valid 48 // If MEMORY then: 49 // locationAndDimension.location.{poolIndex, offset, length} is valid. 50 // locationAndDimension.dimension is valid. 51 enum { POINTER, MEMORY, UNSPECIFIED } state = UNSPECIFIED; 52 RequestArgument locationAndDimension; 53 void* buffer; 54 55 int setFromPointer(const Operand& operand, const ANeuralNetworksOperandType* type, void* buffer, 56 uint32_t length); 57 int setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type, 58 uint32_t poolIndex, uint32_t offset, uint32_t length); 59 int updateDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType); 60}; 61 62class ExecutionBuilder { 63 friend class StepExecutor; 64public: 65 ExecutionBuilder(const CompilationBuilder* compilation); 66 67 int setInput(uint32_t index, const ANeuralNetworksOperandType* type, const void* buffer, 68 size_t length); 69 int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 70 const Memory* memory, size_t offset, size_t length); 71 int setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer, 72 size_t length); 73 int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 74 const Memory* memory, size_t offset, size_t length); 75 int startCompute(sp<ExecutionCallback>* synchronizationCallback); 76 77private: 78 const ModelBuilder* mModel; 79 [[maybe_unused]] const ExecutionPlan* mPlan; 80 81 // The information we'll send to the driver about the inputs and outputs. 82 // Note that we build this in two steps: 83 // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element. 84 // If set from a pointer, don't set the location in the RequestArgument but store it 85 // instead in mInputBuffers or mOutputBuffers. 86 // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for 87 // the m*Buffers entries. Copy the input values into the shared memory. 88 // We do this to avoid creating a lot of shared memory objects if we have a lot of 89 // parameters specified via pointers. We also avoid copying in the case where 90 // some of the nodes will interpreted on the CPU anyway. 91 std::vector<ModelArgumentInfo> mInputs; 92 std::vector<ModelArgumentInfo> mOutputs; 93 MemoryTracker mMemories; 94}; 95 96// class StepExecutor is used to execute a single "step" in a 97// potentially multiple step execution process. The graph associated 98// with that step is executed in its entirety on a single device (or 99// on the CPU). 100class StepExecutor { 101public: 102 // executionBuilder 103 // Describes the full (possibly multiple-"step") execution. 104 // model 105 // The model to be executed by the executor. Possibly a 106 // submodel of the model from executionBuilder. 107 // driver, preparedModel 108 // The device on which to execute the "step", and the prepared 109 // model to execute on that device. (Both are nullptr in the 110 // case of CPU.) 111 StepExecutor(const ExecutionBuilder* executionBuilder, 112 const ModelBuilder* model, 113 sp<IDevice> driver, sp<IPreparedModel> preparedModel); 114 115 // Map inputs and outputs from ExecutionBuilder to StepExecutor, 116 // in the case where we have a single-"step" execution (i.e., the executor 117 // is executing the entire model from the ExecutionBuilder). 118 void mapInputsAndOutputsTrivially(); 119 120 // Map inputs and outputs from ExecutionBuilder to StepExecutor, 121 // one at a time. Note that these are input/output indexes, not 122 // operand indexes. 123 void mapInput(uint32_t builderIndex, uint32_t executorIndex) { 124 mapInputOrOutput(mExecutionBuilder->mInputs[builderIndex], 125 &mInputs[executorIndex]); 126 } 127 void mapOutput(uint32_t builderIndex, uint32_t executorIndex) { 128 mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex], 129 &mOutputs[executorIndex]); 130 } 131 132 // TODO: inter-partition temporaries 133 134 int startCompute(sp<ExecutionCallback>* synchronizationCallback); 135 136private: 137 int allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, Memory* memory); 138 int startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback); 139 int startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback); 140 141 void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput, 142 ModelArgumentInfo* executorInputOrOutput); 143 144 // describes the full (possibly multiple-"step") execution 145 const ExecutionBuilder* mExecutionBuilder; 146 147 // model to be executed on the executor, in both original and 148 // compiled forms; and device on which to execute it 149 const ModelBuilder* mModel; 150 sp<IDevice> mDriver; // nullptr if CPU execution 151 sp<IPreparedModel> mPreparedModel; // nullptr if CPU execution or if bypassing ExecutionPlan 152 153 // The information we'll send to the driver about the inputs and outputs. 154 // Note that we build this in two steps: 155 // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element. 156 // If set from a pointer, don't set the location in the RequestArgument but store it 157 // instead in mInputBuffers or mOutputBuffers. 158 // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for 159 // the m*Buffers entries. Copy the input values into the shared memory. 160 // We do this to avoid creating a lot of shared memory objects if we have a lot of 161 // parameters specified via pointers. We also avoid copying in the case where 162 // some of the nodes will interpreted on the CPU anyway. 163 std::vector<ModelArgumentInfo> mInputs; 164 std::vector<ModelArgumentInfo> mOutputs; 165 MemoryTracker mMemories; 166}; 167 168} // namespace nn 169} // namespace android 170 171#endif // ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H 172