ExecutionBuilder.cpp revision 3ced3cfd5b8f22b632c35f24e585c4847383b195
1/* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define LOG_TAG "ExecutionBuilder" 18 19#include "ExecutionBuilder.h" 20 21#include "CompilationBuilder.h" 22#include "CpuExecutor.h" 23#include "HalInterfaces.h" 24#include "Manager.h" 25#include "ModelBuilder.h" 26 27#include <mutex> 28#include <thread> 29#include <vector> 30 31namespace android { 32namespace nn { 33 34int ModelArgumentInfo::setFromPointer(const Operand& operand, 35 const ANeuralNetworksOperandType* type, void* data, 36 uint32_t length) { 37 int n = updateDimensionInfo(operand, type); 38 if (n != ANEURALNETWORKS_NO_ERROR) { 39 return n; 40 } 41 state = ModelArgumentInfo::POINTER; 42 locationAndDimension.location = {.poolIndex = 0, .offset = 0, .length = length}; 43 buffer = data; 44 return ANEURALNETWORKS_NO_ERROR; 45} 46 47int ModelArgumentInfo::setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type, 48 uint32_t poolIndex, uint32_t offset, uint32_t length) { 49 int n = updateDimensionInfo(operand, type); 50 if (n != ANEURALNETWORKS_NO_ERROR) { 51 return n; 52 } 53 state = ModelArgumentInfo::MEMORY; 54 locationAndDimension.location = {.poolIndex = poolIndex, .offset = offset, .length = length}; 55 buffer = nullptr; 56 return ANEURALNETWORKS_NO_ERROR; 57} 58 59int ModelArgumentInfo::updateDimensionInfo(const Operand& operand, 60 const ANeuralNetworksOperandType* newType) { 61 if (newType == nullptr) { 62 locationAndDimension.dimensions = hidl_vec<uint32_t>(); 63 } else { 64 uint32_t count = newType->dimensions.count; 65 if (static_cast<OperandType>(newType->type) != operand.type || 66 count != operand.dimensions.size()) { 67 LOG(ERROR) << "ANeuralNetworksExecution_setInput/Output incompatible types"; 68 return ANEURALNETWORKS_BAD_DATA; 69 } 70 for (uint32_t i = 0; i < count; i++) { 71 locationAndDimension.dimensions[i] = newType->dimensions.data[i]; 72 } 73 } 74 return ANEURALNETWORKS_NO_ERROR; 75} 76 77ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) : 78 mModel(compilation->mModel), 79 mInputs(mModel->inputCount()), 80 mOutputs(mModel->outputCount()), 81 mMemories(mModel->getMemories()) { 82 LOG(DEBUG) << "ExecutionBuilder::ExecutionBuilder"; 83 for (auto& p : mInputs) { 84 p.state = ModelArgumentInfo::UNSPECIFIED; 85 } 86 for (auto& p : mOutputs) { 87 p.state = ModelArgumentInfo::UNSPECIFIED; 88 } 89} 90 91int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type, 92 const void* buffer, uint32_t length) { 93 uint32_t count = static_cast<uint32_t>(mInputs.size()); 94 if (index >= count) { 95 LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count; 96 return ANEURALNETWORKS_BAD_DATA; 97 } 98 return mInputs[index].setFromPointer(mModel->getInputOperand(index), type, 99 const_cast<void*>(buffer), length); 100} 101 102int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 103 const Memory* memory, uint32_t offset, uint32_t length) { 104 uint32_t count = static_cast<uint32_t>(mInputs.size()); 105 if (index >= count) { 106 LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " " 107 << count; 108 return ANEURALNETWORKS_BAD_DATA; 109 } 110 if (!memory->validateSize(offset, length)) { 111 return ANEURALNETWORKS_BAD_DATA; 112 } 113 uint32_t poolIndex = mMemories.add(memory); 114 return mInputs[index].setFromMemory(mModel->getInputOperand(index), type, poolIndex, offset, 115 length); 116} 117 118int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer, 119 uint32_t length) { 120 uint32_t count = static_cast<uint32_t>(mOutputs.size()); 121 if (index >= count) { 122 LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count; 123 return ANEURALNETWORKS_BAD_DATA; 124 } 125 return mOutputs[index].setFromPointer(mModel->getOutputOperand(index), type, buffer, length); 126} 127 128int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 129 const Memory* memory, uint32_t offset, uint32_t length) { 130 uint32_t count = static_cast<uint32_t>(mOutputs.size()); 131 if (index >= count) { 132 LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " " 133 << count; 134 return ANEURALNETWORKS_BAD_DATA; 135 } 136 if (!memory->validateSize(offset, length)) { 137 return ANEURALNETWORKS_BAD_DATA; 138 } 139 uint32_t poolIndex = mMemories.add(memory); 140 return mOutputs[index].setFromMemory(mModel->getOutputOperand(index), type, poolIndex, offset, 141 length); 142} 143 144int ExecutionBuilder::startCompute() { 145 // TODO validate that we have full types for all inputs and outputs, 146 // that the graph is not cyclic, 147 /* 148 TODO: For non-optional inputs, also verify that buffers are not null. 149 150 for (auto& p : mInputs) { 151 if (p.state == ModelArgumentInfo::UNSPECIFIED) { 152 LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all inputs specified"; 153 return ANEURALNETWORKS_BAD_DATA; 154 } 155 } 156 */ 157 for (auto& p : mOutputs) { 158 if (p.state == ModelArgumentInfo::UNSPECIFIED) { 159 LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all outputs specified"; 160 return ANEURALNETWORKS_BAD_DATA; 161 } 162 } 163 LOG(DEBUG) << "ExecutionBuilder::startCompute"; 164 165 std::shared_ptr<Device> device = DeviceManager::get()->getAvailableDriver(); 166 Model model; 167 mModel->setHidlModel(&model); 168 169 return device == nullptr ? startComputeOnCpu(model) 170 : startComputeOnDevice(device->getInterface(), model); 171} 172 173int ExecutionBuilder::wait() { 174 if (mEvent == nullptr) { 175 LOG(ERROR) << "ANeuralNetworksExecution_wait without execution in flight"; 176 return ANEURALNETWORKS_BAD_STATE; 177 } 178 mEvent->wait(); 179 return ANEURALNETWORKS_NO_ERROR; // TODO shouldn't we look at wait()'s return value? 180} 181 182// Figures out how to place each of the input or outputs in a buffer. This just does the layout, 183// it does not copy data. Aligns each input a bit. 184int ExecutionBuilder::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, 185 Memory* memory) { 186 uint32_t nextPoolIndex = mMemories.size(); 187 int64_t total = 0; 188 for (auto& info : *args) { 189 if (info.state == ModelArgumentInfo::POINTER) { 190 DataLocation& loc = info.locationAndDimension.location; 191 // TODO Good enough alignment? 192 total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length); 193 loc.poolIndex = nextPoolIndex; 194 loc.offset = static_cast<uint32_t>(total); 195 total += loc.length; 196 } 197 }; 198 if (total > 0xFFFFFFFF) { 199 LOG(ERROR) << "ANeuralNetworksExecution_startCompute Size of all inputs or outputs exceeds " 200 "2^32."; 201 return ANEURALNETWORKS_BAD_DATA; 202 } 203 hidl_memory hidlMemory; 204 if (total > 0) { 205 memory->create(total); // TODO check error 206 mMemories.add(memory); 207 } 208 return ANEURALNETWORKS_NO_ERROR; 209} 210 211static void copyLocationAndDimension(const std::vector<ModelArgumentInfo>& argumentInfos, 212 hidl_vec<RequestArgument>* ioInfos) { 213 size_t count = argumentInfos.size(); 214 ioInfos->resize(count); 215 for (size_t i = 0; i < count; i++) { 216 (*ioInfos)[i] = argumentInfos[i].locationAndDimension; 217 } 218} 219 220int ExecutionBuilder::startComputeOnDevice(sp<IDevice> driver, const Model& model) { 221 LOG(DEBUG) << "ExecutionBuilder::startComputeOnDevice"; 222 // TODO Dangerous! In async, the model will outlive it here. Safe for now 223 sp<Event> preparationEvent = new Event(); 224 ErrorStatus prepareStatus = ErrorStatus::GENERAL_FAILURE; 225 sp<IPreparedModel> preparedModel; 226 227 driver->prepareModel(model, preparationEvent, 228 [&](ErrorStatus status, const sp<IPreparedModel>& prepared) { 229 prepareStatus = status; 230 preparedModel = prepared; 231 }); 232 233 // Immediately synchronize with event for now 234 // TODO: change to asynchronous later 235 Event::Status eventStatus = preparationEvent->wait(); 236 237 if (prepareStatus != ErrorStatus::NONE || preparedModel == nullptr || 238 eventStatus != Event::Status::SUCCESS) { 239 return ANEURALNETWORKS_OP_FAILED; 240 } 241 242 // Layout the input and output data 243 int n = allocatePointerArgumentsToPool(&mInputs, &mInputPointerArguments); 244 if (n != ANEURALNETWORKS_NO_ERROR) { 245 return n; 246 } 247 n = allocatePointerArgumentsToPool(&mOutputs, &mOutputPointerArguments); 248 if (n != ANEURALNETWORKS_NO_ERROR) { 249 return n; 250 } 251 252 // Copy the input data that was specified via a pointer. 253 // mInputPointerArguments.update(); 254 for (auto& info : mInputs) { 255 if (info.state == ModelArgumentInfo::POINTER) { 256 DataLocation& loc = info.locationAndDimension.location; 257 uint8_t* data = nullptr; 258 int n = mInputPointerArguments.getPointer(&data); 259 if (n != ANEURALNETWORKS_NO_ERROR) { 260 return n; 261 } 262 memcpy(data + loc.offset, info.buffer, loc.length); 263 } 264 } 265 // TODO: Add mInputPointerArguments.commit() and .update() at all the right places 266 267 Request request; 268 copyLocationAndDimension(mInputs, &request.inputs); 269 copyLocationAndDimension(mOutputs, &request.outputs); 270 uint32_t count = mMemories.size(); 271 request.pools.resize(count); 272 for (uint32_t i = 0; i < count; i++) { 273 request.pools[i] = mMemories[i]->getHidlMemory(); 274 } 275 276 // Prepare the event for asynchronous execution. The sp<Event> 277 // object is recorded if the execution has been successfully 278 // launched. The sp is used for ref-counting purposes. Without 279 // it, the HIDL service could attempt to communicate with a dead 280 // event object. 281 // 282 // TODO: Explain the "dead event" problem further, either here or 283 // in the design document. 284 sp<Event> eventSp = new Event(); 285 286 LOG(DEBUG) << "Before preparedModel->execute() " << toString(request); 287 // Execute. 288 // TODO: What happens to the Event if the service dies abnormally 289 // -- won't that keep the Event live forever, because the service 290 // never has the opportunity to bump the reference count down? Or 291 // maybe the HIDL infrastructure handles this magically? At worst, 292 // it seems like this is a small memory leak, if the Event stays 293 // alive forever. 294 if (preparedModel->execute(request, eventSp) != ErrorStatus::NONE) { 295 LOG(DEBUG) << "**Execute failed**"; 296 return ANEURALNETWORKS_OP_FAILED; 297 } 298 299 // TODO: Remove this synchronization point when the block of code below is 300 // removed. 301 Event::Status status = eventSp->wait(); 302 if (status != Event::Status::SUCCESS) { 303 LOG(DEBUG) << "**Execute async failed**"; 304 return ANEURALNETWORKS_OP_FAILED; 305 } 306 307 // Copy the output data from shared memory to the output buffers. 308 // TODO: Move this block of code somewhere else. It should not be in the 309 // startCompute function. 310 // TODO: outputMemory->update(); outputMemory->commit() 311 for (auto& info : mOutputs) { 312 if (info.state == ModelArgumentInfo::POINTER) { 313 DataLocation& loc = info.locationAndDimension.location; 314 uint8_t* data = nullptr; 315 int n = mOutputPointerArguments.getPointer(&data); 316 if (n != ANEURALNETWORKS_NO_ERROR) { 317 return n; 318 } 319 memcpy(info.buffer, data + loc.offset, loc.length); 320 } 321 } 322 LOG(DEBUG) << "ExecutionBuilder::startComputeOnDevice completed"; 323 324 mEvent = eventSp; 325 return ANEURALNETWORKS_NO_ERROR; 326} 327 328static void asyncStartComputeOnCpu(const Model& model, const Request& request, 329 const std::vector<RunTimePoolInfo>& runTimePoolInfos, 330 const sp<IEvent>& event) { 331 CpuExecutor executor; 332 int err = executor.run(model, request, runTimePoolInfos); 333 ErrorStatus status = err == ANEURALNETWORKS_NO_ERROR ? 334 ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE; 335 event->notify(status); 336} 337 338int ExecutionBuilder::startComputeOnCpu(const Model& model) { 339 // TODO: use a thread pool 340 341 // Prepare the event for asynchronous execution. The sp<Event> object is 342 // recorded if the execution has been successfully launched. 343 sp<Event> eventSp = new Event(); 344 345 std::vector<RunTimePoolInfo> runTimePoolInfos; 346 uint32_t count = mMemories.size(); 347 runTimePoolInfos.resize(count); 348 for (uint32_t i = 0; i < count; i++) { 349 const Memory* mem = mMemories[i]; 350 runTimePoolInfos[i].set(mem->getHidlMemory()); 351 } 352 // Create as many pools as there are input / output. 353 auto fixPointerArguments = [&runTimePoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) { 354 for (ModelArgumentInfo& argumentInfo : argumentInfos) { 355 if (argumentInfo.state == ModelArgumentInfo::POINTER) { 356 RunTimePoolInfo runTimeInfo = { 357 .buffer = static_cast<uint8_t*>(argumentInfo.buffer)}; 358 argumentInfo.locationAndDimension.location.poolIndex = 359 static_cast<uint32_t>(runTimePoolInfos.size()); 360 argumentInfo.locationAndDimension.location.offset = 0; 361 runTimePoolInfos.push_back(runTimeInfo); 362 } 363 } 364 }; 365 fixPointerArguments(mInputs); 366 fixPointerArguments(mOutputs); 367 368 Request request; 369 copyLocationAndDimension(mInputs, &request.inputs); 370 copyLocationAndDimension(mOutputs, &request.outputs); 371 372 // TODO: should model be moved with a std::cref? 373 std::thread thread(asyncStartComputeOnCpu, model, std::move(request), 374 std::move(runTimePoolInfos), eventSp); 375 eventSp->bind_thread(std::move(thread)); 376 377 mEvent = eventSp; 378 return ANEURALNETWORKS_NO_ERROR; 379} 380 381} // namespace nn 382} // namespace android 383