1/* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17// Classes used to plan how to execute a model across multiple devices. 18 19#ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H 20#define ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H 21 22#include "HalInterfaces.h" 23#include "Memory.h" 24#include "NeuralNetworks.h" 25#include "Utils.h" 26 27#include <set> 28 29namespace android { 30namespace nn { 31 32class CompilationBuilder; 33class Device; 34class ExecutionBuilder; 35class ExecutionPlan; 36class Memory; 37class ModelBuilder; 38class StepExecutor; 39 40class ExecutionStep { 41private: 42 typedef std::vector<std::pair<uint32_t, uint32_t>> RemapVectorType; 43 typedef std::set<std::pair<uint32_t, uint32_t>> SubModelOutputSetType; 44 45public: 46 enum OperandKind { INPUT, OUTPUT }; 47 48 ExecutionStep(ExecutionPlan* plan, 49 uint32_t stepIndex, 50 std::shared_ptr<ModelBuilder> model, 51 std::shared_ptr<Device> device); 52 int addOperation(int operationIndex, const ModelBuilder& fromModel); 53 int addOperand(uint32_t fromOperandIndex, uint32_t* toOperandIndex, 54 const ModelBuilder& fromModel, OperandKind kind); 55 56 // Each container entry is of the form (fromModel index, subModel index) 57 const RemapVectorType& getModelInputs() const { 58 return mModelInputs; 59 } 60 const RemapVectorType& getModelOutputs() const { 61 return mModelOutputs; 62 } 63 const RemapVectorType& getSubModelInputs() const { 64 return mSubModelInputs; 65 } 66 const SubModelOutputSetType& getSubModelOutputs() const { 67 return mSubModelOutputs; 68 } 69 70 void recordSubModelOutput(uint32_t fromModelIndex) { 71 const auto it = mOperandMap.find(fromModelIndex); 72 nnAssert(it != mOperandMap.end()); 73 mSubModelOutputs.insert(std::make_pair(fromModelIndex, it->second)); 74 } 75 76 // If this step has a submodel output of unknown size, sets 77 // *hasOutputOfUnknownSize to true; otherwise, leaves it 78 // unchanged. 79 int finishSubModel(const ModelBuilder* fromModel, bool* hasOutputOfUnknownSize); 80 81 std::shared_ptr<ModelBuilder> getSubModel() const { return mSubModel; } 82 std::shared_ptr<Device> getDevice() const { return mDevice; } 83 84 // only available after calling finishSubModel() 85 sp<IPreparedModel> getPreparedSubModel() const { return mPreparedSubModel; } 86 87 // Map inputs and outputs from ExecutionBuilder to StepExecutor. 88 void mapInputsAndOutputs(std::shared_ptr<StepExecutor> stepExecutor) const; 89 90 void dump() const; 91private: 92 // TODO: Some of the data is working state information that 93 // shouldn't be needed after we've constructed but not executed 94 // the step. 95 96 ExecutionPlan* mPlan; 97 uint32_t mIndex; // index of step within plan 98 std::shared_ptr<ModelBuilder> mSubModel; 99 std::shared_ptr<Device> mDevice; // nullptr signifies CPU 100 sp<IPreparedModel> mPreparedSubModel; // not used for CPU 101 102 // Inputs of original model that are also inputs of this submodel: 103 // (fromModel index, subModel index) 104 RemapVectorType mModelInputs; 105 // Outputs of original model that are also outputs of this submodel: 106 // (fromModel index, subModel index) 107 RemapVectorType mModelOutputs; 108 // Temporaries of original model that are inputs of this submodel: 109 // (fromModel index, subModel index) 110 RemapVectorType mSubModelInputs; 111 // Temporaries of original model that are outputs of this submodel: 112 // (fromModel index, subModel index) 113 SubModelOutputSetType mSubModelOutputs; 114 // Converts operand indexes from the main model to the submodel. 115 std::unordered_map<uint32_t, uint32_t> mOperandMap; 116 // Converts input indexes from the submodel to the main model 117 // (these are input indexes, not operand indexes). This vector 118 // only describes inputs of the submodel that are also inputs of 119 // the main model -- that is, mModelInputs but not mSubModelInputs. 120 std::vector<uint32_t> mInputIndexSubModelToFromModel; 121 // Converts output indexes from the submodel to the main model 122 // (these are output indexes, not operand indexes). This vector 123 // only describes outputs of the submodel that are also outputs of 124 // the main model -- that is, mModelOutputs but not mSubModelOutputs. 125 std::vector<uint32_t> mOutputIndexSubModelToFromModel; 126}; 127 128class ExecutionPlan { 129public: 130 ExecutionPlan(const ExecutionPlan&) = delete; 131 ExecutionPlan& operator=(const ExecutionPlan&) = delete; 132 133 ExecutionPlan() { } 134 ~ExecutionPlan() { delete mBody; } 135 136 // Controller is part of the interface to a mechanism for 137 // performing an execution in N steps. 138 // 139 // Usage pattern: 140 // - Instantiate Controller with ExecutionPlan::makeController(). 141 // - Call ExecutionPlan::next() on Controller N+1 times. The first N times, 142 // *executor is set to point to a new StepExecutor corresponding 143 // to that step. The N+1st time, *executor is set to nullptr, 144 // signifying there are no more steps. 145 // - If ExecutionPlan::next() returns anything other than ANEURALNETWORKS_NO_ERROR, 146 // a problem has occurred. 147 class Controller { 148 friend class ExecutionPlan; 149 private: 150 Controller(const Controller&) = delete; 151 Controller& operator=(const Controller&) = delete; 152 153 // Map from the operand index of a TEMPORARY in the original 154 // model to an offset into mTemporaries used to represent that 155 // TEMPORARY as an inter-partition input or output. 156 typedef std::map<uint32_t, uint32_t> SubModelInputsAndOutputsType; 157 158 static const size_t kBadStepIndex = ~size_t(0); 159 160 Controller(const ExecutionPlan* plan, const ExecutionBuilder* executionBuilder, 161 std::shared_ptr<const SubModelInputsAndOutputsType> subModelInputsAndOutputs, 162 uint32_t totalSizeOfTemporaries); 163 164 const ExecutionPlan* mPlan; 165 const ExecutionBuilder* mExecutionBuilder; 166 std::shared_ptr<const SubModelInputsAndOutputsType> mSubModelInputsAndOutputs; // may be nullptr 167 Memory mTemporaries; 168 size_t mNextStepIndex; 169 }; 170 171 std::shared_ptr<Controller> makeController(const ExecutionBuilder* executionBuilder) const; 172 173 int next(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor) const; 174 175 // Create the same executor as the last one created by next(). 176 int fallback(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor) const; 177 178 std::shared_ptr<ExecutionStep> createNewStep(const std::shared_ptr<Device> device); 179 180 void becomeSingleStep(const std::shared_ptr<Device> device, 181 const ModelBuilder* model); 182 183 int finish(const ModelBuilder* fromModel); 184 185 void recordTemporaryDef(uint32_t fromModelIndex, uint32_t stepIndex) { 186 auto& temporaryToDefiningStep = compound()->mTemporaryToDefiningStep; 187 nnAssert(temporaryToDefiningStep.count(fromModelIndex) == 0); 188 temporaryToDefiningStep.insert(std::make_pair(fromModelIndex, stepIndex)); 189 } 190 191 void dump() const; 192 193 // These functions are solely intended for use by unit tests of 194 // the partitioning algorithm. 195 enum class Kind { ERROR, EMPTY, SIMPLE, COMPOUND }; 196 Kind forTest_getKind() const; 197 std::shared_ptr<const Device> forTest_simpleGetDevice() const; 198 const std::vector<std::shared_ptr<ExecutionStep>>& forTest_compoundGetSteps() const; 199 200private: 201 void findSubModelOutputs(); 202 203 struct Body { 204 virtual ~Body() {} 205 virtual void dump() const = 0; 206 virtual int finish(const ModelBuilder* fromModel) = 0; 207 bool mSuccessfulFinish = false; 208 }; 209 210 struct SimpleBody : Body { 211 SimpleBody(std::shared_ptr<Device> device, const ModelBuilder* model) : 212 mDevice(device), mModel(model) {} 213 214 void dump() const override; 215 int finish(const ModelBuilder* fromModel) override; 216 217 std::shared_ptr<Device> mDevice; // nullptr signifies CPU 218 const ModelBuilder* mModel; 219 sp<IPreparedModel> mPreparedModel; // not used for CPU 220 }; 221 222 struct CompoundBody : Body { 223 void dump() const override; 224 int finish(const ModelBuilder* fromModel) override; 225 226 // TODO: Some of the data is working state information that 227 // shouldn't be needed after we've constructed but not 228 // executed the plan. 229 230 std::vector<std::shared_ptr<ExecutionStep>> mSteps; 231 232 // Map from original operand index to defining step index. 233 // Used for all (and only) TEMPORARY_VARIABLEs. 234 std::unordered_map<uint32_t, uint32_t> mTemporaryToDefiningStep; 235 236 bool mHasSubModelOutputOfUnknownSize = false; 237 private: 238 void findSubModelOutputs(); 239 }; 240 241 enum { EMPTY, SIMPLE, COMPOUND } mState = EMPTY; 242 Body* mBody = nullptr; 243 CompoundBody* compound() { 244 nnAssert(mState == COMPOUND); 245 return static_cast<CompoundBody*>(mBody); 246 } 247 const CompoundBody* compound() const { 248 nnAssert(mState == COMPOUND); 249 return static_cast<const CompoundBody*>(mBody); 250 } 251}; 252 253} // namespace nn 254} // namespace android 255 256#endif // ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H 257