ExecutionBuilder.cpp revision 4d83c52f52613585f7b86368be762b2857f7460f
1/* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define LOG_TAG "ExecutionBuilder" 18 19#include "ExecutionBuilder.h" 20 21#include "CompilationBuilder.h" 22#include "CpuExecutor.h" 23#include "HalInterfaces.h" 24#include "Manager.h" 25#include "ModelBuilder.h" 26 27#include <mutex> 28#include <thread> 29#include <vector> 30 31namespace android { 32namespace nn { 33 34int ModelArgumentInfo::setFromPointer(const Operand& operand, 35 const ANeuralNetworksOperandType* type, void* data, 36 uint32_t length) { 37 int n = updateDimensionInfo(operand, type); 38 if (n != ANEURALNETWORKS_NO_ERROR) { 39 return n; 40 } 41 if (data == nullptr) { 42 if (length) { 43 LOG(ERROR) << "Setting argument as having no value but non-zero length passed."; 44 return ANEURALNETWORKS_BAD_DATA; 45 } 46 state = ModelArgumentInfo::HAS_NO_VALUE; 47 } else { 48 state = ModelArgumentInfo::POINTER; 49 } 50 buffer = data; 51 locationAndLength = {.poolIndex = 0, .offset = 0, .length = length}; 52 return ANEURALNETWORKS_NO_ERROR; 53} 54 55int ModelArgumentInfo::setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type, 56 uint32_t poolIndex, uint32_t offset, uint32_t length) { 57 int n = updateDimensionInfo(operand, type); 58 if (n != ANEURALNETWORKS_NO_ERROR) { 59 return n; 60 } 61 state = ModelArgumentInfo::MEMORY; 62 locationAndLength = {.poolIndex = poolIndex, .offset = offset, .length = length}; 63 buffer = nullptr; 64 return ANEURALNETWORKS_NO_ERROR; 65} 66 67int ModelArgumentInfo::setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex) { 68 dimensions = operand.dimensions; 69 state = ModelArgumentInfo::MEMORY; 70 locationAndLength = 71 {.poolIndex = poolIndex, .offset = 0, .length = sizeOfData(operand)}; 72 buffer = nullptr; 73 return ANEURALNETWORKS_NO_ERROR; 74} 75 76int ModelArgumentInfo::updateDimensionInfo(const Operand& operand, 77 const ANeuralNetworksOperandType* newType) { 78 if (newType == nullptr) { 79 dimensions = hidl_vec<uint32_t>(); 80 } else { 81 uint32_t count = newType->dimensionCount; 82 if (static_cast<OperandType>(newType->type) != operand.type || 83 count != operand.dimensions.size()) { 84 LOG(ERROR) << "ANeuralNetworksExecution_setInput/Output incompatible types"; 85 return ANEURALNETWORKS_BAD_DATA; 86 } 87 for (uint32_t i = 0; i < count; i++) { 88 dimensions[i] = newType->dimensions[i]; 89 } 90 } 91 return ANEURALNETWORKS_NO_ERROR; 92} 93 94ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) : 95 mModel(compilation->mModel), 96 mPlan(&compilation->mPlan), 97 mInputs(mModel->inputCount()), 98 mOutputs(mModel->outputCount()), 99 mMemories(mModel->getMemories()) { 100 LOG(DEBUG) << "ExecutionBuilder::ExecutionBuilder"; 101} 102 103int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type, 104 const void* buffer, size_t length) { 105 uint32_t count = static_cast<uint32_t>(mInputs.size()); 106 if (index >= count) { 107 LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count; 108 return ANEURALNETWORKS_BAD_DATA; 109 } 110 if (type != nullptr) { 111 int n = validateOperandType(*type, "ANeuralNetworksExecution_setInput", false); 112 if (n != ANEURALNETWORKS_NO_ERROR) { 113 return n; 114 } 115 } 116 if (length > 0xFFFFFFFF) { 117 LOG(ERROR) << "ANeuralNetworksExecution_setInput input exceeds max length " << length; 118 return ANEURALNETWORKS_BAD_DATA; 119 } 120 uint32_t l = static_cast<uint32_t>(length); 121 return mInputs[index].setFromPointer(mModel->getInputOperand(index), type, 122 const_cast<void*>(buffer), l); 123} 124 125int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 126 const Memory* memory, size_t offset, size_t length) { 127 // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory() 128 129 uint32_t count = static_cast<uint32_t>(mInputs.size()); 130 if (index >= count) { 131 LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " " 132 << count; 133 return ANEURALNETWORKS_BAD_DATA; 134 } 135 if (!memory->validateSize(offset, length)) { 136 return ANEURALNETWORKS_BAD_DATA; 137 } 138 // TODO validate the rest 139 uint32_t poolIndex = mMemories.add(memory); 140 return mInputs[index].setFromMemory(mModel->getInputOperand(index), type, poolIndex, offset, 141 length); 142} 143 144int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer, 145 size_t length) { 146 uint32_t count = static_cast<uint32_t>(mOutputs.size()); 147 if (index >= count) { 148 LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count; 149 return ANEURALNETWORKS_BAD_DATA; 150 } 151 if (type != nullptr) { 152 int n = validateOperandType(*type, "ANeuralNetworksExecution_setOutput", false); 153 if (n != ANEURALNETWORKS_NO_ERROR) { 154 return n; 155 } 156 } 157 if (length > 0xFFFFFFFF) { 158 LOG(ERROR) << "ANeuralNetworksExecution_setOutput input exceeds max length " << length; 159 return ANEURALNETWORKS_BAD_DATA; 160 } 161 uint32_t l = static_cast<uint32_t>(length); 162 return mOutputs[index].setFromPointer(mModel->getOutputOperand(index), type, buffer, l); 163} 164 165int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 166 const Memory* memory, size_t offset, size_t length) { 167 // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory() 168 169 uint32_t count = static_cast<uint32_t>(mOutputs.size()); 170 if (index >= count) { 171 LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " " 172 << count; 173 return ANEURALNETWORKS_BAD_DATA; 174 } 175 if (!memory->validateSize(offset, length)) { 176 return ANEURALNETWORKS_BAD_DATA; 177 } 178 // TODO validate the rest 179 uint32_t poolIndex = mMemories.add(memory); 180 return mOutputs[index].setFromMemory(mModel->getOutputOperand(index), type, poolIndex, offset, 181 length); 182} 183 184int ExecutionBuilder::startCompute(sp<ExecutionCallback>* synchronizationCallback) { 185 *synchronizationCallback = nullptr; 186 187 // TODO validate that we have full types for all inputs and outputs, 188 // that the graph is not cyclic, 189 190 for (auto& p : mInputs) { 191 if (p.state == ModelArgumentInfo::UNSPECIFIED) { 192 LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all inputs specified"; 193 return ANEURALNETWORKS_BAD_DATA; 194 } 195 } 196 for (auto& p : mOutputs) { 197 if (p.state == ModelArgumentInfo::UNSPECIFIED) { 198 LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all outputs specified"; 199 return ANEURALNETWORKS_BAD_DATA; 200 } 201 } 202 203 // TODO: Remove the non-plan-based path once we've fully integrated ExecutionPlan 204 // with the compilation and execution phases of the NN API? Or retain that path 205 // as a fallback in the case of partitioning failure? 206 // 207 // TODO: Entire plan-based-path should run in an asynchronous thread -- 208 // take the asynchronous thread logic out of startComputeOnCpu() and use 209 // it to wrap the plan-based-path. 210 const int partitioning = DeviceManager::get()->getPartitioning(); 211 if (partitioning > 0) { 212 const bool simulation = (partitioning == 1); 213 std::shared_ptr<ExecutionPlan::Controller> controller = mPlan->makeController(this); 214 if (controller == nullptr) { 215 const bool fallback = (partitioning == 2); 216 if (!simulation && !fallback) { 217 return ANEURALNETWORKS_OP_FAILED; 218 } 219 } else { 220 LOG(DEBUG) << "ExecutionBuilder::startCompute" 221 << (simulation ? " SIMULATION" : "") 222 << " (from plan, iteratively)"; 223 while (true) { 224 std::shared_ptr<StepExecutor> executor; 225 LOG(DEBUG) << "looking for next StepExecutor"; 226 int n = mPlan->next(controller, &executor); 227 if (n != ANEURALNETWORKS_NO_ERROR || executor == nullptr) { 228 if (!simulation) { 229 return n; 230 } 231 232 // simulation 233 if (n != ANEURALNETWORKS_NO_ERROR) { 234 LOG(DEBUG) << "ExecutionBuilder::startCompute SIMULATION failed " 235 << "with error " << n; 236 } 237 break; 238 } 239 if (simulation) { 240 continue; 241 } 242 243 n = executor->startCompute(synchronizationCallback); 244 if (n != ANEURALNETWORKS_NO_ERROR) { 245 return n; 246 } 247 (*synchronizationCallback)->wait(); 248 if ((*synchronizationCallback)->getStatus() != ErrorStatus::NONE) { 249 return ANEURALNETWORKS_OP_FAILED; 250 } 251 } 252 } 253 } 254 255 // Find a driver that can handle all the operations. 256 Model hidlModel; 257 mModel->setHidlModel(&hidlModel); 258 const std::vector<std::shared_ptr<Device>>& devices = DeviceManager::get()->getDrivers(); 259 for (const auto& device : devices) { 260 hidl_vec<bool> supports; 261 LOG(DEBUG) << "Checking " << device->getName(); 262 device->getSupportedOperations(hidlModel, &supports); 263 if (std::find(supports.begin(), supports.end(), false) == supports.end()) { 264 LOG(DEBUG) << "ExecutionBuilder::startCompute (without plan) on " << device->getName(); 265 StepExecutor executor(this, mModel, device->getInterface(), 266 nullptr /* no IPreparedModel, so compile */); 267 executor.mapInputsAndOutputsTrivially(); 268 return executor.startCompute(synchronizationCallback); 269 } 270 } 271 // If none can, run on the CPU. 272 LOG(DEBUG) << "ExecutionBuilder::startCompute (without plan) on CPU"; 273 StepExecutor executor(this, mModel, 274 nullptr /* no IDevice, so CPU */, 275 nullptr /* no IPreparedModel */); 276 executor.mapInputsAndOutputsTrivially(); 277 return executor.startCompute(synchronizationCallback); 278} 279 280// Figures out how to place each of the input or outputs in a buffer. This just does the layout, 281// it does not copy data. Aligns each input a bit. 282int StepExecutor::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, 283 Memory* memory) { 284 uint32_t nextPoolIndex = mMemories.size(); 285 int64_t total = 0; 286 for (auto& info : *args) { 287 if (info.state == ModelArgumentInfo::POINTER) { 288 DataLocation& loc = info.locationAndLength; 289 // TODO Good enough alignment? 290 total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length); 291 loc.poolIndex = nextPoolIndex; 292 loc.offset = static_cast<uint32_t>(total); 293 total += loc.length; 294 } 295 }; 296 if (total > 0xFFFFFFFF) { 297 LOG(ERROR) << "ANeuralNetworksExecution_startCompute Size of all inputs or outputs exceeds " 298 "2^32."; 299 return ANEURALNETWORKS_BAD_DATA; 300 } 301 hidl_memory hidlMemory; 302 if (total > 0) { 303 memory->create(total); // TODO check error 304 mMemories.add(memory); 305 } 306 return ANEURALNETWORKS_NO_ERROR; 307} 308 309static void setRequestArgumentArray(const std::vector<ModelArgumentInfo>& argumentInfos, 310 hidl_vec<RequestArgument>* ioInfos) { 311 size_t count = argumentInfos.size(); 312 ioInfos->resize(count); 313 for (size_t i = 0; i < count; i++) { 314 const auto& info = argumentInfos[i]; 315 (*ioInfos)[i] = { .hasNoValue = info.state == ModelArgumentInfo::HAS_NO_VALUE, 316 .location = info.locationAndLength, 317 .dimensions = info.dimensions, 318 }; 319 } 320} 321 322StepExecutor::StepExecutor(const ExecutionBuilder* executionBuilder, 323 const ModelBuilder* model, 324 sp<IDevice> driver, sp<IPreparedModel> preparedModel) : 325 mExecutionBuilder(executionBuilder), mModel(model), 326 mDriver(driver), mPreparedModel(preparedModel), 327 mInputs(model->inputCount()), mOutputs(model->outputCount()) {} 328 329void StepExecutor::mapInputsAndOutputsTrivially() { 330 mInputs = mExecutionBuilder->mInputs; 331 mOutputs = mExecutionBuilder->mOutputs; 332 mMemories = mExecutionBuilder->mMemories; 333} 334 335void StepExecutor::mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput, 336 ModelArgumentInfo* executorInputOrOutput) { 337 *executorInputOrOutput = builderInputOrOutput; 338 switch (executorInputOrOutput->state) { 339 default: 340 nnAssert(!"unexpected ModelArgumentInfo::state"); 341 case ModelArgumentInfo::POINTER: 342 case ModelArgumentInfo::UNSPECIFIED: 343 break; 344 case ModelArgumentInfo::MEMORY: { 345 const uint32_t builderPoolIndex = 346 builderInputOrOutput.locationAndLength.poolIndex; 347 const Memory* memory = mExecutionBuilder->mMemories[builderPoolIndex]; 348 const uint32_t executorPoolIndex = mMemories.add(memory); 349 executorInputOrOutput->locationAndLength.poolIndex = 350 executorPoolIndex; 351 break; 352 } 353 } 354} 355 356int StepExecutor::setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand, 357 const Memory* memory, 358 ModelArgumentInfo* inputOrOutputInfo) { 359 // Should be similar to 360 // ExecutionBuilder::setInputFromMemory() 361 // ExecutionBuilder::setOutputFromMemory() 362 363 uint32_t poolIndex = mMemories.add(memory); 364 return inputOrOutputInfo->setFromTemporaryMemory(inputOrOutputOperand, poolIndex); 365} 366 367int StepExecutor::startCompute(sp<ExecutionCallback>* synchronizationCallback) { 368 if (mDriver == nullptr) { 369 return startComputeOnCpu(synchronizationCallback); 370 } else { 371 return startComputeOnDevice(synchronizationCallback); 372 } 373} 374 375int StepExecutor::startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback) { 376 nnAssert(mDriver != nullptr); 377 378 *synchronizationCallback = nullptr; 379 380 // TODO: Remove the mPreparedModel == nullptr case once we've fully integrated 381 // ExecutionPlan with the compilation and execution phases of the NN API 382 if (mPreparedModel == nullptr) { 383 Model model; 384 mModel->setHidlModel(&model); 385 386 // TODO Dangerous! In async, the model will outlive it here. Safe for now 387 sp<PreparedModelCallback> preparedModelCallback = new PreparedModelCallback(); 388 Return<ErrorStatus> prepareLaunchStatus = 389 mDriver->prepareModel(model, preparedModelCallback); 390 if (!prepareLaunchStatus.isOk() || prepareLaunchStatus != ErrorStatus::NONE) { 391 return ANEURALNETWORKS_OP_FAILED; 392 } 393 394 // Immediately synchronize with callback object for now 395 // TODO: change to asynchronous later 396 preparedModelCallback->wait(); 397 ErrorStatus prepareReturnStatus = preparedModelCallback->getStatus(); 398 mPreparedModel = preparedModelCallback->getPreparedModel(); 399 if (prepareReturnStatus != ErrorStatus::NONE || mPreparedModel == nullptr) { 400 return ANEURALNETWORKS_OP_FAILED; 401 } 402 } 403 404 // We separate the input & output pools so that we reduce the copying done if we 405 // do an eventual remoting (hidl_memory->update()). We could also use it to set 406 // protection on read only memory but that's not currently done. 407 Memory inputPointerArguments; 408 Memory outputPointerArguments; 409 410 // Layout the input and output data 411 int n = allocatePointerArgumentsToPool(&mInputs, &inputPointerArguments); 412 if (n != ANEURALNETWORKS_NO_ERROR) { 413 return n; 414 } 415 n = allocatePointerArgumentsToPool(&mOutputs, &outputPointerArguments); 416 if (n != ANEURALNETWORKS_NO_ERROR) { 417 return n; 418 } 419 420 // Copy the input data that was specified via a pointer. 421 // inputPointerArguments.update(); 422 for (auto& info : mInputs) { 423 if (info.state == ModelArgumentInfo::POINTER) { 424 DataLocation& loc = info.locationAndLength; 425 uint8_t* data = nullptr; 426 int n = inputPointerArguments.getPointer(&data); 427 if (n != ANEURALNETWORKS_NO_ERROR) { 428 return n; 429 } 430 memcpy(data + loc.offset, info.buffer, loc.length); 431 } 432 } 433 // TODO: Add inputPointerArguments.commit() and .update() at all the right places 434 435 Request request; 436 setRequestArgumentArray(mInputs, &request.inputs); 437 setRequestArgumentArray(mOutputs, &request.outputs); 438 uint32_t count = mMemories.size(); 439 request.pools.resize(count); 440 for (uint32_t i = 0; i < count; i++) { 441 request.pools[i] = mMemories[i]->getHidlMemory(); 442 } 443 444 // Prepare the callback for asynchronous execution. sp<ExecutionCallback> 445 // object is returned when the execution has been successfully launched, 446 // otherwise a nullptr is returned. The executionCallback is abstracted in 447 // the NN API as an "event". 448 // 449 // The sp is used for ref-counting purposes. Without it, the HIDL service 450 // could attempt to communicate with a dead callback object. 451 // 452 // TODO: Explain the "dead callback" problem further, either here or 453 // in the design document. 454 sp<ExecutionCallback> executionCallback = new ExecutionCallback(); 455 456 LOG(DEBUG) << "Before mPreparedModel->execute() " << toString(request); 457 // Execute. 458 // TODO: What happens to the Callback if the service dies abnormally 459 // -- won't that keep the Callback live forever, because the service 460 // never has the opportunity to bump the reference count down? Or 461 // maybe the HIDL infrastructure handles this magically? At worst, 462 // it seems like this is a small memory leak, if the Callback stays 463 // alive forever. 464 if (mPreparedModel->execute(request, executionCallback) != ErrorStatus::NONE) { 465 LOG(DEBUG) << "**Execute failed**"; 466 return ANEURALNETWORKS_OP_FAILED; 467 } 468 469 // TODO: Remove this synchronization point when the block of code below is 470 // removed. 471 executionCallback->wait(); 472 Return<ErrorStatus> executionStatus = executionCallback->getStatus(); 473 if (!executionStatus.isOk() || executionStatus != ErrorStatus::NONE) { 474 LOG(DEBUG) << "**Execute async failed**"; 475 return ANEURALNETWORKS_OP_FAILED; 476 } 477 478 // Copy the output data from shared memory to the output buffers. 479 // TODO: Move this block of code somewhere else. It should not be in the 480 // startCompute function. 481 // TODO: outputMemory->update(); outputMemory->commit() 482 for (auto& info : mOutputs) { 483 if (info.state == ModelArgumentInfo::POINTER) { 484 DataLocation& loc = info.locationAndLength; 485 uint8_t* data = nullptr; 486 int n = outputPointerArguments.getPointer(&data); 487 if (n != ANEURALNETWORKS_NO_ERROR) { 488 return n; 489 } 490 memcpy(info.buffer, data + loc.offset, loc.length); 491 } 492 } 493 LOG(DEBUG) << "StepExecutor::startComputeOnDevice completed"; 494 495 *synchronizationCallback = executionCallback; 496 return ANEURALNETWORKS_NO_ERROR; 497} 498 499static void asyncStartComputeOnCpu(const Model& model, const Request& request, 500 const std::vector<RunTimePoolInfo>& runTimePoolInfos, 501 const sp<IExecutionCallback>& executionCallback) { 502 CpuExecutor executor; 503 int err = executor.run(model, request, runTimePoolInfos); 504 ErrorStatus status = err == ANEURALNETWORKS_NO_ERROR ? 505 ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE; 506 executionCallback->notify(status); 507} 508 509int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback) { 510 // TODO: use a thread pool 511 512 Model model; 513 mModel->setHidlModel(&model); 514 515 // Prepare the callback for asynchronous execution. sp<ExecutionCallback> 516 // object is returned when the execution has been successfully launched, 517 // otherwise a nullptr is returned. The executionCallback is abstracted in 518 // the NN API as an "event". 519 sp<ExecutionCallback> executionCallback = new ExecutionCallback(); 520 *synchronizationCallback = nullptr; 521 522 std::vector<RunTimePoolInfo> runTimePoolInfos; 523 uint32_t count = mMemories.size(); 524 runTimePoolInfos.resize(count); 525 for (uint32_t i = 0; i < count; i++) { 526 const Memory* mem = mMemories[i]; 527 runTimePoolInfos[i].set(mem->getHidlMemory()); 528 } 529 // Create as many pools as there are input / output. 530 auto fixPointerArguments = [&runTimePoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) { 531 for (ModelArgumentInfo& argumentInfo : argumentInfos) { 532 if (argumentInfo.state == ModelArgumentInfo::POINTER) { 533 RunTimePoolInfo runTimeInfo = { 534 .buffer = static_cast<uint8_t*>(argumentInfo.buffer)}; 535 argumentInfo.locationAndLength.poolIndex = 536 static_cast<uint32_t>(runTimePoolInfos.size()); 537 argumentInfo.locationAndLength.offset = 0; 538 runTimePoolInfos.push_back(runTimeInfo); 539 } 540 } 541 }; 542 fixPointerArguments(mInputs); 543 fixPointerArguments(mOutputs); 544 545 Request request; 546 setRequestArgumentArray(mInputs, &request.inputs); 547 setRequestArgumentArray(mOutputs, &request.outputs); 548 549 // TODO: should model be moved with a std::cref? 550 std::thread thread(asyncStartComputeOnCpu, model, std::move(request), 551 std::move(runTimePoolInfos), executionCallback); 552 executionCallback->bind_thread(std::move(thread)); 553 554 *synchronizationCallback = executionCallback; 555 return ANEURALNETWORKS_NO_ERROR; 556} 557 558} // namespace nn 559} // namespace android 560