ExecutionBuilder.cpp revision 75886e77f9ca074173a49283b5c0a8c182d98977
1/* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define LOG_TAG "ExecutionBuilder" 18 19#include "ExecutionBuilder.h" 20 21#include "CompilationBuilder.h" 22#include "CpuExecutor.h" 23#include "HalInterfaces.h" 24#include "Manager.h" 25#include "ModelBuilder.h" 26 27#include <mutex> 28#include <thread> 29#include <vector> 30 31namespace android { 32namespace nn { 33 34int ModelArgumentInfo::setFromPointer(const Operand& operand, 35 const ANeuralNetworksOperandType* type, void* data, 36 uint32_t length) { 37 int n = updateDimensionInfo(operand, type); 38 if (n != ANEURALNETWORKS_NO_ERROR) { 39 return n; 40 } 41 if (data == nullptr) { 42 if (length) { 43 LOG(ERROR) << "Setting argument as having no value but non-zero length passed."; 44 return ANEURALNETWORKS_BAD_DATA; 45 } 46 state = ModelArgumentInfo::HAS_NO_VALUE; 47 } else { 48 state = ModelArgumentInfo::POINTER; 49 } 50 buffer = data; 51 locationAndLength = {.poolIndex = 0, .offset = 0, .length = length}; 52 return ANEURALNETWORKS_NO_ERROR; 53} 54 55int ModelArgumentInfo::setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type, 56 uint32_t poolIndex, uint32_t offset, uint32_t length) { 57 int n = updateDimensionInfo(operand, type); 58 if (n != ANEURALNETWORKS_NO_ERROR) { 59 return n; 60 } 61 state = ModelArgumentInfo::MEMORY; 62 locationAndLength = {.poolIndex = poolIndex, .offset = offset, .length = length}; 63 buffer = nullptr; 64 return ANEURALNETWORKS_NO_ERROR; 65} 66 67int ModelArgumentInfo::setFromTemporaryMemory(const Operand& operand, 68 uint32_t poolIndex, uint32_t offset) { 69 dimensions = operand.dimensions; 70 state = ModelArgumentInfo::MEMORY; 71 locationAndLength = 72 {.poolIndex = poolIndex, .offset = offset, .length = sizeOfData(operand)}; 73 buffer = nullptr; 74 return ANEURALNETWORKS_NO_ERROR; 75} 76 77int ModelArgumentInfo::updateDimensionInfo(const Operand& operand, 78 const ANeuralNetworksOperandType* newType) { 79 if (newType == nullptr) { 80 dimensions = hidl_vec<uint32_t>(); 81 } else { 82 uint32_t count = newType->dimensionCount; 83 if (static_cast<OperandType>(newType->type) != operand.type || 84 count != operand.dimensions.size()) { 85 LOG(ERROR) << "ANeuralNetworksExecution_setInput/Output incompatible types"; 86 return ANEURALNETWORKS_BAD_DATA; 87 } 88 89 dimensions = hidl_vec<uint32_t>(count); 90 for (uint32_t i = 0; i < count; i++) { 91 dimensions[i] = newType->dimensions[i]; 92 } 93 } 94 return ANEURALNETWORKS_NO_ERROR; 95} 96 97ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) : 98 mModel(compilation->mModel), 99 mPlan(&compilation->mPlan), 100 mPartitioning(compilation->mPartitioning), 101 mInputs(mModel->inputCount()), 102 mOutputs(mModel->outputCount()) { 103 VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder"; 104} 105 106int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type, 107 const void* buffer, size_t length) { 108 uint32_t count = static_cast<uint32_t>(mInputs.size()); 109 if (index >= count) { 110 LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count; 111 return ANEURALNETWORKS_BAD_DATA; 112 } 113 if (type != nullptr) { 114 int n = validateOperandType(*type, "ANeuralNetworksExecution_setInput", false); 115 if (n != ANEURALNETWORKS_NO_ERROR) { 116 return n; 117 } 118 } 119 if (length > 0xFFFFFFFF) { 120 LOG(ERROR) << "ANeuralNetworksExecution_setInput input exceeds max length " << length; 121 return ANEURALNETWORKS_BAD_DATA; 122 } 123 uint32_t l = static_cast<uint32_t>(length); 124 return mInputs[index].setFromPointer(mModel->getInputOperand(index), type, 125 const_cast<void*>(buffer), l); 126} 127 128int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 129 const Memory* memory, size_t offset, size_t length) { 130 // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory() 131 132 uint32_t count = static_cast<uint32_t>(mInputs.size()); 133 if (index >= count) { 134 LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " " 135 << count; 136 return ANEURALNETWORKS_BAD_DATA; 137 } 138 if (!memory->validateSize(offset, length)) { 139 return ANEURALNETWORKS_BAD_DATA; 140 } 141 // TODO validate the rest 142 uint32_t poolIndex = mMemories.add(memory); 143 return mInputs[index].setFromMemory(mModel->getInputOperand(index), type, poolIndex, offset, 144 length); 145} 146 147int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer, 148 size_t length) { 149 uint32_t count = static_cast<uint32_t>(mOutputs.size()); 150 if (index >= count) { 151 LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count; 152 return ANEURALNETWORKS_BAD_DATA; 153 } 154 if (type != nullptr) { 155 int n = validateOperandType(*type, "ANeuralNetworksExecution_setOutput", false); 156 if (n != ANEURALNETWORKS_NO_ERROR) { 157 return n; 158 } 159 } 160 if (length > 0xFFFFFFFF) { 161 LOG(ERROR) << "ANeuralNetworksExecution_setOutput input exceeds max length " << length; 162 return ANEURALNETWORKS_BAD_DATA; 163 } 164 uint32_t l = static_cast<uint32_t>(length); 165 return mOutputs[index].setFromPointer(mModel->getOutputOperand(index), type, buffer, l); 166} 167 168int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 169 const Memory* memory, size_t offset, size_t length) { 170 // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory() 171 172 uint32_t count = static_cast<uint32_t>(mOutputs.size()); 173 if (index >= count) { 174 LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " " 175 << count; 176 return ANEURALNETWORKS_BAD_DATA; 177 } 178 if (!memory->validateSize(offset, length)) { 179 return ANEURALNETWORKS_BAD_DATA; 180 } 181 // TODO validate the rest 182 uint32_t poolIndex = mMemories.add(memory); 183 return mOutputs[index].setFromMemory(mModel->getOutputOperand(index), type, poolIndex, offset, 184 length); 185} 186 187// Attempt synchronous execution of full model on CPU. 188// Ensure that executionCallback->notify() is called. 189static void cpuFallbackFull(const ExecutionBuilder* executionBuilder, 190 const sp<ExecutionCallback>& executionCallback) { 191 VLOG(EXECUTION) << "cpuFallbackFull"; 192 StepExecutor executor(executionBuilder, executionBuilder->getModel(), 193 nullptr /* no VersionedIDevice, so CPU */, 194 nullptr /* no IPreparedModel */); 195 executor.mapInputsAndOutputsTrivially(); 196 sp<ExecutionCallback> fallbackCallback; 197 if (executor.startCompute(&fallbackCallback) != ANEURALNETWORKS_NO_ERROR) { 198 executionCallback->notify(ErrorStatus::GENERAL_FAILURE); 199 return; 200 } 201 fallbackCallback->wait(); 202 executionCallback->notify(fallbackCallback->getStatus()); 203} 204 205// Attempt synchronous execution on CPU. 206// (1) First, attempt to execute this step on CPU. If successful, 207// return true. (Do not call executionCallback->notify().) 208// (2) If unsuccessful, attempt to execute the full model on CPU, 209// ensure that executionCallback->notify() is called, and return 210// false. 211static bool cpuFallbackPartial(const ExecutionBuilder* executionBuilder, 212 const ExecutionPlan* plan, 213 std::shared_ptr<ExecutionPlan::Controller> controller, 214 const sp<ExecutionCallback>& executionCallback) { 215 VLOG(EXECUTION) << "cpuFallbackPartial"; 216 std::shared_ptr<StepExecutor> executor; 217 int n = plan->fallback(controller, &executor); 218 if (n != ANEURALNETWORKS_NO_ERROR || executor->isCpu()) { 219 cpuFallbackFull(executionBuilder, executionCallback); 220 return false; 221 } 222 sp<ExecutionCallback> fallbackCallback; 223 if (executor->startComputeOnCpu(&fallbackCallback) != ANEURALNETWORKS_NO_ERROR) { 224 cpuFallbackFull(executionBuilder, executionCallback); 225 return false; 226 } 227 fallbackCallback->wait(); 228 if (fallbackCallback->getStatus() != ErrorStatus::NONE) { 229 cpuFallbackFull(executionBuilder, executionCallback); 230 return false; 231 } 232 return true; 233} 234 235static void asyncStartComputePartitioned(const ExecutionBuilder* executionBuilder, 236 const ExecutionPlan* plan, 237 std::shared_ptr<ExecutionPlan::Controller> controller, 238 bool allowFallback, 239 const sp<ExecutionCallback>& executionCallback) { 240 VLOG(EXECUTION) << "ExecutionBuilder::startCompute (from plan, iteratively)"; 241 while (true) { 242 std::shared_ptr<StepExecutor> executor; 243 VLOG(EXECUTION) << "looking for next StepExecutor"; 244 int n = plan->next(controller, &executor); 245 if (n != ANEURALNETWORKS_NO_ERROR) { 246 if (allowFallback) { 247 cpuFallbackFull(executionBuilder, executionCallback); 248 } else { 249 executionCallback->notify(ErrorStatus::GENERAL_FAILURE); 250 } 251 return; 252 } 253 if (executor == nullptr) { 254 executionCallback->notify(ErrorStatus::NONE); 255 return; 256 } 257 258 sp<ExecutionCallback> stepCallback; 259 n = executor->startCompute(&stepCallback); 260 if (n != ANEURALNETWORKS_NO_ERROR) { 261 if (allowFallback) { 262 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) { 263 // Successfully executed one step on CPU. 264 continue; 265 } else { 266 // Either successfully executed entire plan on 267 // CPU, or tried and failed to do so. 268 return; 269 } 270 } else { 271 executionCallback->notify(ErrorStatus::GENERAL_FAILURE); 272 return; 273 } 274 } 275 stepCallback->wait(); 276 ErrorStatus status = stepCallback->getStatus(); 277 if (status != ErrorStatus::NONE) { 278 if (allowFallback) { 279 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) { 280 // Successfully executed one step on CPU. 281 continue; 282 } else { 283 // Either successfully executed entire plan on 284 // CPU, or tried and failed to do so. 285 return; 286 } 287 } else { 288 executionCallback->notify(status); 289 return; 290 } 291 } 292 } 293} 294 295int ExecutionBuilder::startCompute(sp<ExecutionCallback>* synchronizationCallback) { 296 *synchronizationCallback = nullptr; 297 298 // TODO validate that we have full types for all inputs and outputs, 299 // that the graph is not cyclic, 300 301 for (auto& p : mInputs) { 302 if (p.state == ModelArgumentInfo::UNSPECIFIED) { 303 LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all inputs specified"; 304 return ANEURALNETWORKS_BAD_DATA; 305 } 306 } 307 for (auto& p : mOutputs) { 308 if (p.state == ModelArgumentInfo::UNSPECIFIED) { 309 LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all outputs specified"; 310 return ANEURALNETWORKS_BAD_DATA; 311 } 312 } 313 314#ifndef DISABLE_PARTITIONED_EXECUTION 315 { 316 // TODO: Remove the non-plan-based path once we've fully integrated ExecutionPlan 317 // with the compilation and execution phases of the NN API? Or retain that path 318 // as a fallback in the case of partitioning failure? 319 // 320 // TODO: Entire plan-based-path should run in an asynchronous thread -- 321 // take the asynchronous thread logic out of startComputeOnCpu() and use 322 // it to wrap the plan-based-path. 323 if (mPartitioning > 0) { 324 const bool allowFallback = DeviceManager::partitioningAllowsFallback(mPartitioning); 325 std::shared_ptr<ExecutionPlan::Controller> controller = mPlan->makeController(this); 326 if (controller == nullptr) { 327 if (!allowFallback) { 328 return ANEURALNETWORKS_OP_FAILED; 329 } 330 } else { 331 // TODO: use a thread pool 332 333 // Prepare the callback for asynchronous execution. 334 // sp<ExecutionCallback> object is returned when the 335 // execution has been successfully launched, otherwise a 336 // nullptr is returned. The executionCallback is 337 // abstracted in the NN API as an "event". 338 sp<ExecutionCallback> executionCallback = new ExecutionCallback(); 339 std::thread thread(asyncStartComputePartitioned, this, mPlan, controller, 340 allowFallback, 341 executionCallback); 342 executionCallback->bind_thread(std::move(thread)); 343 *synchronizationCallback = executionCallback; 344 return ANEURALNETWORKS_NO_ERROR; 345 } 346 } 347 } 348#else 349 { 350 // Find a driver that can handle all the operations. 351 // TODO: Does not handle CPU fallback (which is tricky because 352 // StepExecutor::startCompute() is designed as 353 // asynchronous). 354 // TODO: Does not actually behave asynchronously (because 355 // StepExecutor::startCompute() isn't actually asynchronous 356 // on a device as opposed to a CPU). 357 Model hidlModel; 358 mModel->setHidlModel(&hidlModel); 359 const std::vector<std::shared_ptr<Device>>& devices = DeviceManager::get()->getDrivers(); 360 for (const auto& device : devices) { 361 hidl_vec<bool> supports; 362 VLOG(EXECUTION) << "Checking " << device->getName(); 363 device->getSupportedOperations(hidlModel, &supports); 364 if (std::find(supports.begin(), supports.end(), false) == supports.end()) { 365 VLOG(EXECUTION) << "ExecutionBuilder::startCompute (without plan) on " << device->getName(); 366 StepExecutor executor(this, mModel, device->getInterface(), 367 nullptr /* no IPreparedModel, so compile */); 368 executor.mapInputsAndOutputsTrivially(); 369 return executor.startCompute(synchronizationCallback); 370 } 371 } 372 } 373#endif // DISABLE_PARTITIONED_EXECUTION 374 375 // Run on the CPU. 376 VLOG(EXECUTION) << "ExecutionBuilder::startCompute (without plan) on CPU"; 377 StepExecutor executor(this, mModel, 378 nullptr /* no VersionedIDevice, so CPU */, 379 nullptr /* no IPreparedModel */); 380 executor.mapInputsAndOutputsTrivially(); 381 return executor.startCompute(synchronizationCallback); 382} 383 384// Figures out how to place each of the input or outputs in a buffer. This just does the layout, 385// it does not copy data. Aligns each input a bit. 386int StepExecutor::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, 387 Memory* memory) { 388 uint32_t nextPoolIndex = mMemories.size(); 389 int64_t total = 0; 390 for (auto& info : *args) { 391 if (info.state == ModelArgumentInfo::POINTER) { 392 DataLocation& loc = info.locationAndLength; 393 // TODO Good enough alignment? 394 total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length); 395 loc.poolIndex = nextPoolIndex; 396 loc.offset = static_cast<uint32_t>(total); 397 total += loc.length; 398 } 399 }; 400 if (total > 0xFFFFFFFF) { 401 LOG(ERROR) << "ANeuralNetworksExecution_startCompute Size of all inputs or outputs exceeds " 402 "2^32."; 403 return ANEURALNETWORKS_BAD_DATA; 404 } 405 hidl_memory hidlMemory; 406 if (total > 0) { 407 memory->create(total); // TODO check error 408 mMemories.add(memory); 409 } 410 return ANEURALNETWORKS_NO_ERROR; 411} 412 413static void setRequestArgumentArray(const std::vector<ModelArgumentInfo>& argumentInfos, 414 hidl_vec<RequestArgument>* ioInfos) { 415 size_t count = argumentInfos.size(); 416 ioInfos->resize(count); 417 for (size_t i = 0; i < count; i++) { 418 const auto& info = argumentInfos[i]; 419 (*ioInfos)[i] = { .hasNoValue = info.state == ModelArgumentInfo::HAS_NO_VALUE, 420 .location = info.locationAndLength, 421 .dimensions = info.dimensions, 422 }; 423 } 424} 425 426StepExecutor::StepExecutor(const ExecutionBuilder* executionBuilder, 427 const ModelBuilder* model, 428 VersionedIDevice* driver, sp<IPreparedModel> preparedModel) : 429 mExecutionBuilder(executionBuilder), mModel(model), 430 mDriver(driver), mPreparedModel(preparedModel), 431 mInputs(model->inputCount()), mOutputs(model->outputCount()) {} 432 433void StepExecutor::mapInputsAndOutputsTrivially() { 434 mInputs = mExecutionBuilder->mInputs; 435 mOutputs = mExecutionBuilder->mOutputs; 436 mMemories = mExecutionBuilder->mMemories; 437} 438 439void StepExecutor::mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput, 440 ModelArgumentInfo* executorInputOrOutput) { 441 *executorInputOrOutput = builderInputOrOutput; 442 switch (executorInputOrOutput->state) { 443 default: 444 nnAssert(!"unexpected ModelArgumentInfo::state"); 445 case ModelArgumentInfo::POINTER: 446 case ModelArgumentInfo::UNSPECIFIED: 447 break; 448 case ModelArgumentInfo::MEMORY: { 449 const uint32_t builderPoolIndex = 450 builderInputOrOutput.locationAndLength.poolIndex; 451 const Memory* memory = mExecutionBuilder->mMemories[builderPoolIndex]; 452 const uint32_t executorPoolIndex = mMemories.add(memory); 453 executorInputOrOutput->locationAndLength.poolIndex = 454 executorPoolIndex; 455 break; 456 } 457 } 458} 459 460int StepExecutor::setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand, 461 const Memory* memory, uint32_t offset, 462 ModelArgumentInfo* inputOrOutputInfo) { 463 // Should be similar to 464 // ExecutionBuilder::setInputFromMemory() 465 // ExecutionBuilder::setOutputFromMemory() 466 467 uint32_t poolIndex = mMemories.add(memory); 468 return inputOrOutputInfo->setFromTemporaryMemory(inputOrOutputOperand, poolIndex, offset); 469} 470 471static void logArguments(const char* kind, const std::vector<ModelArgumentInfo> &args) { 472 for (unsigned i = 0; i < args.size(); i++) { 473 const auto& arg = args[i]; 474 std::string prefix = kind + std::string("[") + std::to_string(i) + "] = "; 475 switch (arg.state) { 476 case ModelArgumentInfo::POINTER: 477 VLOG(EXECUTION) << prefix << "POINTER(" << arg.buffer << ")"; 478 break; 479 case ModelArgumentInfo::MEMORY: 480 VLOG(EXECUTION) << prefix << "MEMORY(" 481 << "pool=" << arg.locationAndLength.poolIndex 482 << ", " 483 << "off=" << arg.locationAndLength.offset 484 << ")"; 485 break; 486 case ModelArgumentInfo::HAS_NO_VALUE: 487 VLOG(EXECUTION) << prefix << "HAS_NO_VALUE"; 488 break; 489 case ModelArgumentInfo::UNSPECIFIED: 490 VLOG(EXECUTION) << prefix << "UNSPECIFIED"; 491 break; 492 default: 493 VLOG(EXECUTION) << prefix << "state(" << arg.state << ")"; 494 break; 495 } 496 } 497} 498 499int StepExecutor::startCompute(sp<ExecutionCallback>* synchronizationCallback) { 500 if (VLOG_IS_ON(EXECUTION)) { 501 logArguments("input", mInputs); 502 logArguments("output", mOutputs); 503 } 504 if (mDriver == nullptr) { 505 return startComputeOnCpu(synchronizationCallback); 506 } else { 507 return startComputeOnDevice(synchronizationCallback); 508 } 509} 510 511int StepExecutor::startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback) { 512 nnAssert(mDriver != nullptr); 513 514 *synchronizationCallback = nullptr; 515 516 // TODO: Remove the mPreparedModel == nullptr case once we've fully integrated 517 // ExecutionPlan with the compilation and execution phases of the NN API 518 if (mPreparedModel == nullptr) { 519 Model model; 520 mModel->setHidlModel(&model); 521 522 // TODO Dangerous! In async, the model will outlive it here. Safe for now 523 sp<PreparedModelCallback> preparedModelCallback = new PreparedModelCallback(); 524 ErrorStatus prepareLaunchStatus = mDriver->prepareModel(model, preparedModelCallback); 525 if (prepareLaunchStatus != ErrorStatus::NONE) { 526 return ANEURALNETWORKS_OP_FAILED; 527 } 528 529 // Immediately synchronize with callback object for now 530 // TODO: change to asynchronous later 531 preparedModelCallback->wait(); 532 ErrorStatus prepareReturnStatus = preparedModelCallback->getStatus(); 533 mPreparedModel = preparedModelCallback->getPreparedModel(); 534 if (prepareReturnStatus != ErrorStatus::NONE || mPreparedModel == nullptr) { 535 return ANEURALNETWORKS_OP_FAILED; 536 } 537 } 538 539 // We separate the input & output pools so that we reduce the copying done if we 540 // do an eventual remoting (hidl_memory->update()). We could also use it to set 541 // protection on read only memory but that's not currently done. 542 Memory inputPointerArguments; 543 Memory outputPointerArguments; 544 545 // Layout the input and output data 546 int n = allocatePointerArgumentsToPool(&mInputs, &inputPointerArguments); 547 if (n != ANEURALNETWORKS_NO_ERROR) { 548 return n; 549 } 550 n = allocatePointerArgumentsToPool(&mOutputs, &outputPointerArguments); 551 if (n != ANEURALNETWORKS_NO_ERROR) { 552 return n; 553 } 554 555 // Copy the input data that was specified via a pointer. 556 // inputPointerArguments.update(); 557 for (auto& info : mInputs) { 558 if (info.state == ModelArgumentInfo::POINTER) { 559 DataLocation& loc = info.locationAndLength; 560 uint8_t* data = nullptr; 561 int n = inputPointerArguments.getPointer(&data); 562 if (n != ANEURALNETWORKS_NO_ERROR) { 563 return n; 564 } 565 memcpy(data + loc.offset, info.buffer, loc.length); 566 } 567 } 568 // TODO: Add inputPointerArguments.commit() and .update() at all the right places 569 570 Request request; 571 setRequestArgumentArray(mInputs, &request.inputs); 572 setRequestArgumentArray(mOutputs, &request.outputs); 573 uint32_t count = mMemories.size(); 574 request.pools.resize(count); 575 for (uint32_t i = 0; i < count; i++) { 576 request.pools[i] = mMemories[i]->getHidlMemory(); 577 } 578 579 // Prepare the callback for asynchronous execution. sp<ExecutionCallback> 580 // object is returned when the execution has been successfully launched, 581 // otherwise a nullptr is returned. The executionCallback is abstracted in 582 // the NN API as an "event". 583 // 584 // The sp is used for ref-counting purposes. Without it, the HIDL service 585 // could attempt to communicate with a dead callback object. 586 // 587 // TODO: Explain the "dead callback" problem further, either here or 588 // in the design document. 589 sp<ExecutionCallback> executionCallback = new ExecutionCallback(); 590 591 VLOG(EXECUTION) << "Before mPreparedModel->execute() " << toString(request); 592 // Execute. 593 // TODO: What happens to the Callback if the service dies abnormally 594 // -- won't that keep the Callback live forever, because the service 595 // never has the opportunity to bump the reference count down? Or 596 // maybe the HIDL infrastructure handles this magically? At worst, 597 // it seems like this is a small memory leak, if the Callback stays 598 // alive forever. 599 Return<ErrorStatus> executeStatus = mPreparedModel->execute(request, executionCallback); 600 if (!executeStatus.isOk() || executeStatus != ErrorStatus::NONE) { 601 VLOG(EXECUTION) << "**Execute failed**"; 602 return ANEURALNETWORKS_OP_FAILED; 603 } 604 605 // TODO: Remove this synchronization point when the block of code below is 606 // removed. 607 executionCallback->wait(); 608 Return<ErrorStatus> callbackStatus = executionCallback->getStatus(); 609 if (!callbackStatus.isOk() || callbackStatus != ErrorStatus::NONE) { 610 VLOG(EXECUTION) << "**Execute async failed**"; 611 return ANEURALNETWORKS_OP_FAILED; 612 } 613 614 // Copy the output data from shared memory to the output buffers. 615 // TODO: Move this block of code somewhere else. It should not be in the 616 // startCompute function. 617 // TODO: outputMemory->update(); outputMemory->commit() 618 for (auto& info : mOutputs) { 619 if (info.state == ModelArgumentInfo::POINTER) { 620 DataLocation& loc = info.locationAndLength; 621 uint8_t* data = nullptr; 622 int n = outputPointerArguments.getPointer(&data); 623 if (n != ANEURALNETWORKS_NO_ERROR) { 624 return n; 625 } 626 memcpy(info.buffer, data + loc.offset, loc.length); 627 } 628 } 629 VLOG(EXECUTION) << "StepExecutor::startComputeOnDevice completed"; 630 631 *synchronizationCallback = executionCallback; 632 return ANEURALNETWORKS_NO_ERROR; 633} 634 635static void asyncStartComputeOnCpu(const Model& model, const Request& request, 636 const std::vector<RunTimePoolInfo>& modelPoolInfos, 637 const std::vector<RunTimePoolInfo>& requestPoolInfos, 638 const sp<IExecutionCallback>& executionCallback) { 639 CpuExecutor executor; 640 int err = executor.run(model, request, modelPoolInfos, requestPoolInfos); 641 ErrorStatus status = err == ANEURALNETWORKS_NO_ERROR ? 642 ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE; 643 executionCallback->notify(status); 644} 645 646int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback) { 647 // TODO: use a thread pool 648 649 Model model; 650 mModel->setHidlModel(&model); 651 652 // Prepare the callback for asynchronous execution. sp<ExecutionCallback> 653 // object is returned when the execution has been successfully launched, 654 // otherwise a nullptr is returned. The executionCallback is abstracted in 655 // the NN API as an "event". 656 sp<ExecutionCallback> executionCallback = new ExecutionCallback(); 657 *synchronizationCallback = nullptr; 658 659 std::vector<RunTimePoolInfo> modelPoolInfos; 660 if (!setRunTimePoolInfosFromHidlMemories(&modelPoolInfos, model.pools)) { 661 return ANEURALNETWORKS_UNMAPPABLE; 662 } 663 664 std::vector<RunTimePoolInfo> requestPoolInfos; 665 requestPoolInfos.reserve(mMemories.size()); 666 bool fail = false; 667 for (const Memory* mem : mMemories) { 668 requestPoolInfos.emplace_back(mem->getHidlMemory(), &fail); 669 } 670 if (fail) { 671 return ANEURALNETWORKS_UNMAPPABLE; 672 } 673 // Create as many pools as there are input / output. 674 auto fixPointerArguments = [&requestPoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) { 675 for (ModelArgumentInfo& argumentInfo : argumentInfos) { 676 if (argumentInfo.state == ModelArgumentInfo::POINTER) { 677 argumentInfo.locationAndLength.poolIndex = 678 static_cast<uint32_t>(requestPoolInfos.size()); 679 argumentInfo.locationAndLength.offset = 0; 680 requestPoolInfos.emplace_back(static_cast<uint8_t*>(argumentInfo.buffer)); 681 } 682 } 683 }; 684 fixPointerArguments(mInputs); 685 fixPointerArguments(mOutputs); 686 687 Request request; 688 setRequestArgumentArray(mInputs, &request.inputs); 689 setRequestArgumentArray(mOutputs, &request.outputs); 690 691 // TODO: should model be moved with a std::cref? 692 std::thread thread(asyncStartComputeOnCpu, model, std::move(request), 693 std::move(modelPoolInfos), std::move(requestPoolInfos), 694 executionCallback); 695 executionCallback->bind_thread(std::move(thread)); 696 697 *synchronizationCallback = executionCallback; 698 return ANEURALNETWORKS_NO_ERROR; 699} 700 701} // namespace nn 702} // namespace android 703