1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "CpuExecutor"
18
19#include "CpuExecutor.h"
20
21#include "NeuralNetworks.h"
22#include "Operations.h"
23
24#include <sys/mman.h>
25
26namespace android {
27namespace nn {
28
29// TODO: short term, make share memory mapping and updating a utility function.
30// TODO: long term, implement mmap_fd as a hidl IMemory service.
31bool RunTimePoolInfo::set(const hidl_memory& hidlMemory) {
32    this->hidlMemory = hidlMemory;
33    auto memType = hidlMemory.name();
34    if (memType == "ashmem") {
35        memory = mapMemory(hidlMemory);
36        if (memory == nullptr) {
37            LOG(ERROR) << "Can't map shared memory.";
38            return false;
39        }
40        memory->update();
41        buffer = reinterpret_cast<uint8_t*>(static_cast<void*>(memory->getPointer()));
42        if (buffer == nullptr) {
43            LOG(ERROR) << "Can't access shared memory.";
44            return false;
45        }
46        return true;
47    } else if (memType == "mmap_fd") {
48        size_t size = hidlMemory.size();
49        int fd = hidlMemory.handle()->data[0];
50        int prot = hidlMemory.handle()->data[1];
51        size_t offset = getSizeFromInts(hidlMemory.handle()->data[2],
52                                        hidlMemory.handle()->data[3]);
53        buffer = static_cast<uint8_t*>(mmap(nullptr, size, prot, MAP_SHARED, fd, offset));
54        if (buffer == MAP_FAILED) {
55            LOG(ERROR) << "Can't mmap the file descriptor.";
56            return false;
57        }
58        return true;
59    } else {
60        LOG(ERROR) << "unsupported hidl_memory type";
61        return false;
62    }
63}
64
65// Making sure the output data are correctly updated after execution.
66bool RunTimePoolInfo::update() {
67    auto memType = hidlMemory.name();
68    if (memType == "ashmem") {
69        memory->commit();
70        return true;
71    } else if (memType == "mmap_fd") {
72        int prot = hidlMemory.handle()->data[1];
73        if (prot & PROT_WRITE) {
74            size_t size = hidlMemory.size();
75            return msync(buffer, size, MS_SYNC) == 0;
76        }
77    }
78    // No-op for other types of memory.
79    return true;
80}
81
82bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
83                                         const hidl_vec<hidl_memory>& pools) {
84    poolInfos->resize(pools.size());
85    for (size_t i = 0; i < pools.size(); i++) {
86        auto& poolInfo = (*poolInfos)[i];
87        if (!poolInfo.set(pools[i])) {
88            LOG(ERROR) << "Could not map pool";
89            return false;
90        }
91    }
92    return true;
93}
94
95// Updates the RunTimeOperandInfo with the newly calculated shape.
96// Allocate the buffer if we need to.
97static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) {
98    // For user-provided model output operands, the parameters must match the Shape
99    // calculated from the preparation step.
100    if (info->lifetime == OperandLifeTime::MODEL_OUTPUT) {
101        if (info->type != shape.type ||
102            info->dimensions != shape.dimensions) {
103            LOG(ERROR) << "Invalid type or dimensions for model output";
104            return false;
105        }
106        if (info->type == OperandType::TENSOR_QUANT8_ASYMM &&
107            (info->scale != shape.scale || info->zeroPoint != shape.offset)) {
108            LOG(ERROR) << "Invalid scale or zeroPoint for model output";
109            return false;
110        }
111    }
112    info->type = shape.type;
113    info->dimensions = shape.dimensions;
114    info->scale = shape.scale;
115    info->zeroPoint = shape.offset;
116    if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr) {
117        uint32_t length = sizeOfData(info->type, info->dimensions);
118        info->buffer = new uint8_t[length];
119        if (info->buffer == nullptr) {
120            return false;
121        }
122    }
123    return true;
124}
125
126// Ignore the .pools entry in model and request.  This will have been taken care of
127// by the caller.
128int CpuExecutor::run(const Model& model, const Request& request,
129                     const std::vector<RunTimePoolInfo>& modelPoolInfos,
130                     const std::vector<RunTimePoolInfo>& requestPoolInfos) {
131    VLOG(CPUEXE) << "CpuExecutor::run()";
132    // VLOG(CPUEXE) << "model: " << toString(model);
133    VLOG(CPUEXE) << "request: " << toString(request);
134
135    mModel = &model;
136    mRequest = &request; // TODO check if mRequest is needed
137    initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
138    // The model has serialized the operation in execution order.
139    for (const auto& operation : model.operations) {
140        int n = executeOperation(operation);
141        if (n != ANEURALNETWORKS_NO_ERROR) {
142            return n;
143        }
144    }
145    for (auto runtimeInfo : modelPoolInfos) {
146        runtimeInfo.update();
147    }
148    for (auto runtimeInfo : requestPoolInfos) {
149        runtimeInfo.update();
150    }
151    mModel = nullptr;
152    mRequest = nullptr;
153    VLOG(CPUEXE) << "Completed run normally";
154    return ANEURALNETWORKS_NO_ERROR;
155}
156
157bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
158                                        const std::vector<RunTimePoolInfo>& requestPoolInfos) {
159    VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo";
160    const size_t count = mModel->operands.size();
161    mOperands.resize(count);
162
163    // Start by setting the runtime info to what's in the model.
164    for (size_t i = 0; i < count; i++) {
165        const Operand& from = mModel->operands[i];
166        RunTimeOperandInfo& to = mOperands[i];
167        to.type = from.type;
168        to.dimensions = from.dimensions;
169        to.scale = from.scale;
170        to.zeroPoint = from.zeroPoint;
171        to.length = from.location.length;
172        to.lifetime = from.lifetime;
173        switch (from.lifetime) {
174            case OperandLifeTime::TEMPORARY_VARIABLE:
175                to.buffer = nullptr;
176                to.numberOfUsesLeft = from.numberOfConsumers;
177                break;
178            case OperandLifeTime::CONSTANT_COPY:
179                to.buffer = const_cast<uint8_t*>(&mModel->operandValues[from.location.offset]);
180                to.numberOfUsesLeft = 0;
181                break;
182            case OperandLifeTime::CONSTANT_REFERENCE: {
183                auto poolIndex = from.location.poolIndex;
184                nnAssert(poolIndex < modelPoolInfos.size());
185                auto& r = modelPoolInfos[poolIndex];
186                to.buffer = r.buffer + from.location.offset;
187                to.numberOfUsesLeft = 0;
188                break;
189            }
190            case OperandLifeTime::MODEL_INPUT:
191            case OperandLifeTime::MODEL_OUTPUT:
192            case OperandLifeTime::NO_VALUE:
193                to.buffer = nullptr;
194                to.numberOfUsesLeft = 0;
195                break;
196            default:
197                nnAssert(false);
198                break;
199        }
200    }
201
202    // Adjust the runtime info for the arguments passed to the model,
203    // modifying the buffer location, and possibly the dimensions.
204    auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t>& indexes,
205                                  const hidl_vec<RequestArgument>& arguments) {
206        nnAssert(indexes.size() == arguments.size());
207        for (size_t i = 0; i < indexes.size(); i++) {
208            const uint32_t operandIndex = indexes[i];
209            const RequestArgument& from = arguments[i];
210            RunTimeOperandInfo& to = mOperands[operandIndex];
211            if (from.dimensions.size() > 0) {
212                // It's the responsibility of the caller to validate that
213                // from.dimensions only modifies the dimensions that were
214                // unspecified in the model.  That's the case in SampleDriver.cpp
215                // with the call to validateRequest().
216                // TODO make sure that's the case for the default CPU path.
217                to.dimensions = from.dimensions;
218            }
219            if (from.hasNoValue) {
220                to.lifetime = OperandLifeTime::NO_VALUE;
221                nnAssert(to.buffer == nullptr);
222            } else {
223                auto poolIndex = from.location.poolIndex;
224                nnAssert(poolIndex < requestPoolInfos.size());
225                auto& r = requestPoolInfos[poolIndex];
226                to.buffer = r.buffer + from.location.offset;
227            }
228        }
229    };
230    updateForArguments(mModel->inputIndexes, mRequest->inputs);
231    updateForArguments(mModel->outputIndexes, mRequest->outputs);
232
233    return true;
234}
235
236void CpuExecutor::freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs) {
237    for (uint32_t i : inputs) {
238        auto& info = mOperands[i];
239        // Check if it's a static or model input/output.
240        if (info.numberOfUsesLeft == 0) {
241            continue;
242        }
243        info.numberOfUsesLeft--;
244        if (info.numberOfUsesLeft == 0) {
245            nnAssert(info.buffer != nullptr);
246            delete[] info.buffer;
247            info.buffer = nullptr;
248        }
249    }
250}
251
252int CpuExecutor::executeOperation(const Operation& operation) {
253    // VLOG(CPUEXE) << "CpuExecutor::executeOperation(" << toString(operation) << ")";
254    const hidl_vec<uint32_t>& ins = operation.inputs;
255    const hidl_vec<uint32_t>& outs = operation.outputs;
256    bool success = false;
257
258    // Function to verify that the number of input and output parameters
259    // matches what is expected.  Also checks that all the parameters have
260    // values. This function is to be used only for operations that do not
261    // accept optional arguments.
262    // TODO Have a version that works for optional arguments.
263    auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns,
264                                                                size_t requiredOuts) -> bool {
265        auto verify = [&operation, this](size_t requiredCount, const hidl_vec<uint32_t>& indexes,
266                          const char* type) -> bool {
267            size_t actualCount = indexes.size();
268            if (actualCount != requiredCount) {
269                LOG(ERROR) << getOperationName(operation.type)
270                           << ": Invalid number of " << type << " operands. Got " << actualCount
271                           << " of " << requiredCount;
272                return false;
273            }
274            for (size_t i = 0; i < actualCount; i++) {
275                if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE) {
276                    LOG(ERROR) << getOperationName(operation.type) << " " << type
277                               << " operand " << i << " is required but missing.";
278                    return false;
279                }
280            }
281            return true;
282        };
283        return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out");
284    };
285
286    switch (operation.type) {
287        case OperationType::OEM_OPERATION: {
288            LOG(ERROR) << "OEM operation not supported for CPU execution";
289            success = false;
290        } break;
291        case OperationType::ADD: {
292            if (!allParametersPresent(3, 1)) {
293                return ANEURALNETWORKS_BAD_DATA;
294            }
295            const RunTimeOperandInfo& in1 = mOperands[ins[0]];
296            const RunTimeOperandInfo& in2 = mOperands[ins[1]];
297            int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
298
299            RunTimeOperandInfo& out = mOperands[outs[0]];
300            Shape outShape = out.shape();
301
302            if (in1.type == OperandType::TENSOR_FLOAT32) {
303                success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
304                          setInfoAndAllocateIfNeeded(&out, outShape) &&
305                          addFloat32(reinterpret_cast<const float*>(in1.buffer),
306                                     in1.shape(),
307                                     reinterpret_cast<const float*>(in2.buffer),
308                                     in2.shape(),
309                                     activation,
310                                     reinterpret_cast<float*>(out.buffer),
311                                     outShape);
312            } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
313                success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
314                          setInfoAndAllocateIfNeeded(&out, outShape) &&
315                          addQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
316                                    in1.shape(),
317                                    reinterpret_cast<const uint8_t*>(in2.buffer),
318                                    in2.shape(),
319                                    activation,
320                                    reinterpret_cast<uint8_t*>(out.buffer),
321                                    outShape);
322            }
323        } break;
324        case OperationType::MUL: {
325            if (!allParametersPresent(3, 1)) {
326                return ANEURALNETWORKS_BAD_DATA;
327            }
328            const RunTimeOperandInfo& in1 = mOperands[ins[0]];
329            const RunTimeOperandInfo& in2 = mOperands[ins[1]];
330            int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
331
332            RunTimeOperandInfo& out = mOperands[outs[0]];
333            Shape outShape = out.shape();
334
335            if (in1.type == OperandType::TENSOR_FLOAT32) {
336                success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
337                          setInfoAndAllocateIfNeeded(&out, outShape) &&
338                          mulFloat32(reinterpret_cast<const float*>(in1.buffer),
339                                     in1.shape(),
340                                     reinterpret_cast<const float*>(in2.buffer),
341                                     in2.shape(),
342                                     activation,
343                                     reinterpret_cast<float*>(out.buffer),
344                                     outShape);
345            } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
346                success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
347                          setInfoAndAllocateIfNeeded(&out, outShape) &&
348                          mulQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
349                                    in1.shape(),
350                                    reinterpret_cast<const uint8_t*>(in2.buffer),
351                                    in2.shape(),
352                                    activation,
353                                    reinterpret_cast<uint8_t*>(out.buffer),
354                                    outShape);
355            }
356        } break;
357        case OperationType::FLOOR: {
358            if (!allParametersPresent(1, 1)) {
359                return ANEURALNETWORKS_BAD_DATA;
360            }
361            const RunTimeOperandInfo& input = mOperands[ins[0]];
362            RunTimeOperandInfo& output = mOperands[outs[0]];
363            Shape outShape = output.shape();
364
365            if (input.type == OperandType::TENSOR_FLOAT32) {
366                success = floorPrepare(input.shape(), &outShape) &&
367                          setInfoAndAllocateIfNeeded(&output, outShape) &&
368                          floorFloat32(reinterpret_cast<const float*>(input.buffer),
369                                       reinterpret_cast<float*>(output.buffer),
370                                       outShape);
371            }
372        } break;
373        case OperationType::DEQUANTIZE: {
374            if (!allParametersPresent(1, 1)) {
375                return ANEURALNETWORKS_BAD_DATA;
376            }
377            const RunTimeOperandInfo& input = mOperands[ins[0]];
378            RunTimeOperandInfo& output = mOperands[outs[0]];
379            Shape outShape = output.shape();
380
381            if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
382                success = dequantizePrepare(input.shape(), &outShape) &&
383                          setInfoAndAllocateIfNeeded(&output, outShape) &&
384                          dequantizeQuant8ToFloat32(
385                                  reinterpret_cast<const uint8_t*>(input.buffer),
386                                  reinterpret_cast<float*>(output.buffer),
387                                  input.shape());
388            }
389        } break;
390        case OperationType::DEPTHWISE_CONV_2D: {
391            const size_t inCount = ins.size();
392            if ((inCount != 11 && inCount != 8) ||
393                    !allParametersPresent(inCount, 1)) {
394                return ANEURALNETWORKS_BAD_DATA;
395            }
396            const RunTimeOperandInfo& input  = mOperands[ins[0]];
397            const RunTimeOperandInfo& filter = mOperands[ins[1]];
398            const RunTimeOperandInfo& bias   = mOperands[ins[2]];
399
400            int32_t padding_left, padding_right;
401            int32_t padding_top, padding_bottom;
402            int32_t stride_width, stride_height;
403            int32_t depth_multiplier;
404            int32_t activation;
405
406            if (inCount == 11) {
407                padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
408                padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
409                padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
410                padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
411                stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
412                stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
413                depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]);
414                activation       = getScalarData<int32_t>(mOperands[ins[10]]);
415            } else {
416                int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
417                stride_width     = getScalarData<int32_t>(mOperands[ins[4]]);
418                stride_height    = getScalarData<int32_t>(mOperands[ins[5]]);
419                depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]);
420                activation       = getScalarData<int32_t>(mOperands[ins[7]]);
421
422                Shape inputShape = input.shape();
423                Shape filterShape = filter.shape();
424                int32_t input_width  = getSizeOfDimension(inputShape, 2);
425                int32_t input_height = getSizeOfDimension(inputShape, 1);
426                int32_t filter_width  = getSizeOfDimension(filterShape, 2);
427                int32_t filter_height = getSizeOfDimension(filterShape, 1);
428                calculateExplicitPadding(input_width, stride_width,
429                                         filter_width, padding_implicit,
430                                         &padding_left, &padding_right);
431                calculateExplicitPadding(input_height, stride_height,
432                                         filter_height, padding_implicit,
433                                         &padding_top, &padding_bottom);
434            }
435
436            RunTimeOperandInfo& output = mOperands[outs[0]];
437            Shape outShape = output.shape();
438
439            if (input.type == OperandType::TENSOR_FLOAT32) {
440                success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
441                                               padding_left, padding_right,
442                                               padding_top, padding_bottom,
443                                               stride_width, stride_height,
444                                               &outShape) &&
445                          setInfoAndAllocateIfNeeded(&output, outShape) &&
446                          depthwiseConvFloat32(reinterpret_cast<const float*>(input.buffer),
447                                               input.shape(),
448                                               reinterpret_cast<const float*>(filter.buffer),
449                                               filter.shape(),
450                                               reinterpret_cast<const float*>(bias.buffer),
451                                               bias.shape(),
452                                               padding_left, padding_right,
453                                               padding_top, padding_bottom,
454                                               stride_width, stride_height,
455                                               depth_multiplier, activation,
456                                               reinterpret_cast<float*>(output.buffer),
457                                               outShape);
458            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
459                success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
460                                               padding_left, padding_right,
461                                               padding_top, padding_bottom,
462                                               stride_width, stride_height,
463                                               &outShape) &&
464                          setInfoAndAllocateIfNeeded(&output, outShape) &&
465                          depthwiseConvQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
466                                              input.shape(),
467                                              reinterpret_cast<const uint8_t*>(filter.buffer),
468                                              filter.shape(),
469                                              reinterpret_cast<const int32_t*>(bias.buffer),
470                                              bias.shape(),
471                                              padding_left, padding_right,
472                                              padding_top, padding_bottom,
473                                              stride_width, stride_height,
474                                              depth_multiplier, activation,
475                                              reinterpret_cast<uint8_t*>(output.buffer),
476                                              outShape);
477            }
478
479        } break;
480        case OperationType::CONV_2D: {
481            const size_t inCount = ins.size();
482            if ((inCount != 10 && inCount != 7) ||
483                    !allParametersPresent(inCount, 1)) {
484                return ANEURALNETWORKS_BAD_DATA;
485            }
486            const RunTimeOperandInfo& input  = mOperands[ins[0]];
487            const RunTimeOperandInfo& filter = mOperands[ins[1]];
488            const RunTimeOperandInfo& bias   = mOperands[ins[2]];
489
490            int32_t padding_left, padding_right;
491            int32_t padding_top, padding_bottom;
492            int32_t stride_width, stride_height;
493            int32_t activation;
494
495            if (inCount == 10) {
496                padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
497                padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
498                padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
499                padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
500                stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
501                stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
502                activation       = getScalarData<int32_t>(mOperands[ins[9]]);
503            } else {
504                int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
505                stride_width     = getScalarData<int32_t>(mOperands[ins[4]]);
506                stride_height    = getScalarData<int32_t>(mOperands[ins[5]]);
507                activation       = getScalarData<int32_t>(mOperands[ins[6]]);
508
509                Shape inputShape = input.shape();
510                Shape filterShape = filter.shape();
511                int32_t input_width  = getSizeOfDimension(inputShape, 2);
512                int32_t input_height = getSizeOfDimension(inputShape, 1);
513                int32_t filter_width  = getSizeOfDimension(filterShape, 2);
514                int32_t filter_height = getSizeOfDimension(filterShape, 1);
515                calculateExplicitPadding(input_width, stride_width,
516                                         filter_width, padding_implicit,
517                                         &padding_left, &padding_right);
518                calculateExplicitPadding(input_height, stride_height,
519                                         filter_height, padding_implicit,
520                                         &padding_top, &padding_bottom);
521            }
522
523            RunTimeOperandInfo& output = mOperands[outs[0]];
524            Shape outShape = output.shape();
525
526            if (input.type == OperandType::TENSOR_FLOAT32) {
527                success = convPrepare(input.shape(), filter.shape(), bias.shape(),
528                                      padding_left, padding_right,
529                                      padding_top, padding_bottom,
530                                      stride_width, stride_height,
531                                      &outShape) &&
532                          setInfoAndAllocateIfNeeded(&output, outShape) &&
533                          convFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
534                                      reinterpret_cast<const float*>(filter.buffer), filter.shape(),
535                                      reinterpret_cast<const float*>(bias.buffer), bias.shape(),
536                                      padding_left, padding_right,
537                                      padding_top, padding_bottom,
538                                      stride_width, stride_height, activation,
539                                      reinterpret_cast<float*>(output.buffer), outShape);
540            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
541                success = convPrepare(input.shape(), filter.shape(), bias.shape(),
542                                      padding_left, padding_right,
543                                      padding_top, padding_bottom,
544                                      stride_width, stride_height,
545                                      &outShape) &&
546                          setInfoAndAllocateIfNeeded(&output, outShape) &&
547                          convQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
548                                     input.shape(),
549                                     reinterpret_cast<const uint8_t*>(filter.buffer),
550                                     filter.shape(),
551                                     reinterpret_cast<const int32_t*>(bias.buffer),
552                                     bias.shape(),
553                                     padding_left, padding_right,
554                                     padding_top, padding_bottom,
555                                     stride_width, stride_height, activation,
556                                     reinterpret_cast<uint8_t*>(output.buffer),
557                                     outShape);
558            }
559        } break;
560        case OperationType::AVERAGE_POOL_2D: {
561            const size_t inCount = ins.size();
562            if ((inCount != 10 && inCount != 7) ||
563                    !allParametersPresent(inCount, 1)) {
564                return ANEURALNETWORKS_BAD_DATA;
565            }
566            const RunTimeOperandInfo& input = mOperands[ins[0]];
567
568            int32_t padding_left, padding_right;
569            int32_t padding_top, padding_bottom;
570            int32_t stride_width, stride_height;
571            int32_t filter_width, filter_height;
572            int32_t activation;
573
574            if (inCount == 10) {
575                padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
576                padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
577                padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
578                padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
579                stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
580                stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
581                filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
582                filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
583                activation       = getScalarData<int32_t>(mOperands[ins[9]]);
584            } else {
585                int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
586                stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
587                stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
588                filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
589                filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
590                activation       = getScalarData<int32_t>(mOperands[ins[6]]);
591
592                Shape inputShape = input.shape();
593                int32_t input_width  = getSizeOfDimension(inputShape, 2);
594                int32_t input_height = getSizeOfDimension(inputShape, 1);
595                calculateExplicitPadding(input_width, stride_width,
596                                         filter_width, padding_implicit,
597                                         &padding_left, &padding_right);
598                calculateExplicitPadding(input_height, stride_height,
599                                         filter_height, padding_implicit,
600                                         &padding_top, &padding_bottom);
601            }
602
603            RunTimeOperandInfo& output = mOperands[outs[0]];
604            Shape outShape = output.shape();
605
606            if (input.type == OperandType::TENSOR_FLOAT32) {
607                success = genericPoolingPrepare(input.shape(),
608                                                padding_left, padding_right,
609                                                padding_top, padding_bottom,
610                                                stride_width, stride_height,
611                                                filter_width, filter_height,
612                                                &outShape) &&
613                          setInfoAndAllocateIfNeeded(&output, outShape) &&
614                          averagePoolFloat32(reinterpret_cast<const float*>(input.buffer),
615                                             input.shape(),
616                                             padding_left, padding_right,
617                                             padding_top, padding_bottom,
618                                             stride_width, stride_height,
619                                             filter_width, filter_height, activation,
620                                             reinterpret_cast<float*>(output.buffer),
621                                             outShape);
622            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
623                success = genericPoolingPrepare(input.shape(),
624                                                padding_left, padding_right,
625                                                padding_top, padding_bottom,
626                                                stride_width, stride_height,
627                                                filter_width, filter_height,
628                                                &outShape) &&
629                          setInfoAndAllocateIfNeeded(&output, outShape) &&
630                          averagePoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
631                                            input.shape(),
632                                            padding_left, padding_right,
633                                            padding_top, padding_bottom,
634                                            stride_width, stride_height,
635                                            filter_width, filter_height, activation,
636                                            reinterpret_cast<uint8_t*>(output.buffer),
637                                            outShape);
638            }
639        } break;
640        case OperationType::L2_POOL_2D: {
641            const size_t inCount = ins.size();
642            if ((inCount != 10 && inCount != 7) ||
643                    !allParametersPresent(inCount, 1)) {
644                return ANEURALNETWORKS_BAD_DATA;
645            }
646            const RunTimeOperandInfo& input = mOperands[ins[0]];
647
648            int32_t padding_left, padding_right;
649            int32_t padding_top, padding_bottom;
650            int32_t stride_width, stride_height;
651            int32_t filter_width, filter_height;
652            int32_t activation;
653
654            if (inCount == 10) {
655                padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
656                padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
657                padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
658                padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
659                stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
660                stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
661                filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
662                filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
663                activation       = getScalarData<int32_t>(mOperands[ins[9]]);
664            } else {
665                int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
666                stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
667                stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
668                filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
669                filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
670                activation       = getScalarData<int32_t>(mOperands[ins[6]]);
671
672                Shape inputShape = input.shape();
673                int32_t input_width  = getSizeOfDimension(inputShape, 2);
674                int32_t input_height = getSizeOfDimension(inputShape, 1);
675                calculateExplicitPadding(input_width, stride_width,
676                                         filter_width, padding_implicit,
677                                         &padding_left, &padding_right);
678                calculateExplicitPadding(input_height, stride_height,
679                                         filter_height, padding_implicit,
680                                         &padding_top, &padding_bottom);
681            }
682
683            RunTimeOperandInfo& output = mOperands[outs[0]];
684            Shape outShape = output.shape();
685
686            if (input.type == OperandType::TENSOR_FLOAT32) {
687                success = genericPoolingPrepare(input.shape(),
688                                                padding_left, padding_right,
689                                                padding_top, padding_bottom,
690                                                stride_width, stride_height,
691                                                filter_width, filter_height,
692                                                &outShape) &&
693                          setInfoAndAllocateIfNeeded(&output, outShape) &&
694                          l2PoolFloat32(reinterpret_cast<const float*>(input.buffer),
695                                        input.shape(),
696                                        padding_left, padding_right,
697                                        padding_top, padding_bottom,
698                                        stride_width, stride_height,
699                                        filter_width, filter_height, activation,
700                                        reinterpret_cast<float*>(output.buffer),
701                                        outShape);
702            }
703        } break;
704        case OperationType::MAX_POOL_2D: {
705            const size_t inCount = ins.size();
706            if ((inCount != 10 && inCount != 7) ||
707                    !allParametersPresent(inCount, 1)) {
708                return ANEURALNETWORKS_BAD_DATA;
709            }
710            const RunTimeOperandInfo& input = mOperands[ins[0]];
711
712            int32_t padding_left, padding_right;
713            int32_t padding_top, padding_bottom;
714            int32_t stride_width, stride_height;
715            int32_t filter_width, filter_height;
716            int32_t activation;
717
718            if (inCount == 10) {
719                padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
720                padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
721                padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
722                padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
723                stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
724                stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
725                filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
726                filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
727                activation       = getScalarData<int32_t>(mOperands[ins[9]]);
728            } else {
729                int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
730                stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
731                stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
732                filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
733                filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
734                activation       = getScalarData<int32_t>(mOperands[ins[6]]);
735
736                Shape inputShape = input.shape();
737                int32_t input_width  = getSizeOfDimension(inputShape, 2);
738                int32_t input_height = getSizeOfDimension(inputShape, 1);
739                calculateExplicitPadding(input_width, stride_width,
740                                         filter_width, padding_implicit,
741                                         &padding_left, &padding_right);
742                calculateExplicitPadding(input_height, stride_height,
743                                         filter_height, padding_implicit,
744                                         &padding_top, &padding_bottom);
745            }
746
747            RunTimeOperandInfo& output = mOperands[outs[0]];
748            Shape outShape = output.shape();
749
750            if (input.type == OperandType::TENSOR_FLOAT32) {
751                success = genericPoolingPrepare(input.shape(),
752                                                padding_left, padding_right,
753                                                padding_top, padding_bottom,
754                                                stride_width, stride_height,
755                                                filter_width, filter_height,
756                                                &outShape) &&
757                          setInfoAndAllocateIfNeeded(&output, outShape) &&
758                          maxPoolFloat32(reinterpret_cast<const float*>(input.buffer),
759                                         input.shape(),
760                                         padding_left, padding_right,
761                                         padding_top, padding_bottom,
762                                         stride_width, stride_height,
763                                         filter_width, filter_height, activation,
764                                         reinterpret_cast<float*>(output.buffer),
765                                         outShape);
766            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
767                success = genericPoolingPrepare(input.shape(),
768                                                padding_left, padding_right,
769                                                padding_top, padding_bottom,
770                                                stride_width, stride_height,
771                                                filter_width, filter_height,
772                                                &outShape) &&
773                          setInfoAndAllocateIfNeeded(&output, outShape) &&
774                          maxPoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
775                                        input.shape(),
776                                        padding_left, padding_right,
777                                        padding_top, padding_bottom,
778                                        stride_width, stride_height,
779                                        filter_width, filter_height, activation,
780                                        reinterpret_cast<uint8_t*>(output.buffer),
781                                        outShape);
782            }
783
784        } break;
785        case OperationType::RELU: {
786            if (!allParametersPresent(1, 1)) {
787                return ANEURALNETWORKS_BAD_DATA;
788            }
789            const RunTimeOperandInfo& input = mOperands[ins[0]];
790            RunTimeOperandInfo& output = mOperands[outs[0]];
791            Shape outShape = output.shape();
792
793            if (input.type == OperandType::TENSOR_FLOAT32) {
794                success = genericActivationPrepare(input.shape(), &outShape) &&
795                          setInfoAndAllocateIfNeeded(&output, outShape) &&
796                          reluFloat32(reinterpret_cast<const float*>(input.buffer),
797                                      input.shape(),
798                                      reinterpret_cast<float*>(output.buffer),
799                                      outShape);
800            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
801                success = genericActivationPrepare(input.shape(), &outShape) &&
802                          setInfoAndAllocateIfNeeded(&output, outShape) &&
803                          reluQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
804                                     input.shape(),
805                                     reinterpret_cast<uint8_t*>(output.buffer),
806                                     outShape);
807            }
808        } break;
809        case OperationType::RELU1: {
810            if (!allParametersPresent(1, 1)) {
811                return ANEURALNETWORKS_BAD_DATA;
812            }
813            const RunTimeOperandInfo& input = mOperands[ins[0]];
814            RunTimeOperandInfo& output = mOperands[outs[0]];
815            Shape outShape = output.shape();
816
817            if (input.type == OperandType::TENSOR_FLOAT32) {
818                success = genericActivationPrepare(input.shape(), &outShape) &&
819                          setInfoAndAllocateIfNeeded(&output, outShape) &&
820                          relu1Float32(reinterpret_cast<const float*>(input.buffer),
821                                       input.shape(),
822                                       reinterpret_cast<float*>(output.buffer),
823                                       outShape);
824            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
825                success = genericActivationPrepare(input.shape(), &outShape) &&
826                          setInfoAndAllocateIfNeeded(&output, outShape) &&
827                          relu1Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
828                                      input.shape(),
829                                      reinterpret_cast<uint8_t*>(output.buffer),
830                                      outShape);
831            }
832        } break;
833        case OperationType::RELU6: {
834            if (!allParametersPresent(1, 1)) {
835                return ANEURALNETWORKS_BAD_DATA;
836            }
837            const RunTimeOperandInfo& input = mOperands[ins[0]];
838            RunTimeOperandInfo& output = mOperands[outs[0]];
839            Shape outShape = output.shape();
840
841            if (input.type == OperandType::TENSOR_FLOAT32) {
842                success = genericActivationPrepare(input.shape(), &outShape) &&
843                          setInfoAndAllocateIfNeeded(&output, outShape) &&
844                          relu6Float32(reinterpret_cast<const float*>(input.buffer),
845                                       input.shape(),
846                                       reinterpret_cast<float*>(output.buffer),
847                                       outShape);
848            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
849                success = genericActivationPrepare(input.shape(), &outShape) &&
850                          setInfoAndAllocateIfNeeded(&output, outShape) &&
851                          relu6Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
852                                      input.shape(),
853                                      reinterpret_cast<uint8_t*>(output.buffer),
854                                      outShape);
855            }
856        } break;
857        case OperationType::TANH: {
858            if (!allParametersPresent(1, 1)) {
859                return ANEURALNETWORKS_BAD_DATA;
860            }
861            const RunTimeOperandInfo& input = mOperands[ins[0]];
862            RunTimeOperandInfo& output = mOperands[outs[0]];
863            Shape outShape = output.shape();
864
865            if (input.type == OperandType::TENSOR_FLOAT32) {
866                success = genericActivationPrepare(input.shape(), &outShape) &&
867                          setInfoAndAllocateIfNeeded(&output, outShape) &&
868                          tanhFloat32(reinterpret_cast<const float*>(input.buffer),
869                                      input.shape(),
870                                      reinterpret_cast<float*>(output.buffer),
871                                      outShape);
872            }
873        } break;
874        case OperationType::LOGISTIC: {
875            if (!allParametersPresent(1, 1)) {
876                return ANEURALNETWORKS_BAD_DATA;
877            }
878            const RunTimeOperandInfo& input = mOperands[ins[0]];
879            RunTimeOperandInfo& output = mOperands[outs[0]];
880            Shape outShape = output.shape();
881
882            if (input.type == OperandType::TENSOR_FLOAT32) {
883                success = genericActivationPrepare(input.shape(), &outShape) &&
884                          setInfoAndAllocateIfNeeded(&output, outShape) &&
885                          logisticFloat32(reinterpret_cast<const float*>(input.buffer),
886                                          input.shape(),
887                                          reinterpret_cast<float*>(output.buffer),
888                                          outShape);
889            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
890                success = genericActivationPrepare(input.shape(), &outShape) &&
891                          setInfoAndAllocateIfNeeded(&output, outShape) &&
892                          logisticQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
893                                         input.shape(),
894                                         reinterpret_cast<uint8_t*>(output.buffer),
895                                         outShape);
896            }
897        } break;
898        case OperationType::SOFTMAX: {
899            if (!allParametersPresent(2, 1)) {
900                return ANEURALNETWORKS_BAD_DATA;
901            }
902            RunTimeOperandInfo& input = mOperands[ins[0]];
903            float beta = getScalarData<float>(mOperands[ins[1]]);
904            if (beta <= 0.0f) {
905                LOG(ERROR) << "beta must be positive for softmax";
906                return ANEURALNETWORKS_BAD_DATA;
907            }
908
909            RunTimeOperandInfo& output = mOperands[outs[0]];
910            Shape outShape = output.shape();
911
912            if (input.type == OperandType::TENSOR_FLOAT32) {
913                success = genericActivationPrepare(input.shape(), &outShape) &&
914                          setInfoAndAllocateIfNeeded(&output, outShape) &&
915                          softmaxFloat32(reinterpret_cast<const float*>(input.buffer),
916                                         input.shape(),
917                                         beta,
918                                         reinterpret_cast<float*>(output.buffer),
919                                         output.shape());
920            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
921                success = genericActivationPrepare(input.shape(), &outShape) &&
922                          setInfoAndAllocateIfNeeded(&output, outShape) &&
923                          softmaxQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
924                                        input.shape(),
925                                        beta,
926                                        reinterpret_cast<uint8_t*>(output.buffer),
927                                        output.shape());
928            }
929        } break;
930        case OperationType::FULLY_CONNECTED: {
931            if (!allParametersPresent(4, 1)) {
932                return ANEURALNETWORKS_BAD_DATA;
933            }
934            RunTimeOperandInfo& input   = mOperands[ins[0]];
935            RunTimeOperandInfo& weights = mOperands[ins[1]];
936            RunTimeOperandInfo& bias    = mOperands[ins[2]];
937
938            int32_t activation = getScalarData<int32_t>(mOperands[ins[3]]);
939
940            RunTimeOperandInfo& output = mOperands[outs[0]];
941            Shape outShape = output.shape();
942
943            if (input.type == OperandType::TENSOR_FLOAT32) {
944                success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
945                                                &outShape) &&
946                          setInfoAndAllocateIfNeeded(&output, outShape) &&
947                          fullyConnectedFloat32(reinterpret_cast<const float*>(input.buffer),
948                                                input.shape(),
949                                                reinterpret_cast<const float*>(weights.buffer),
950                                                weights.shape(),
951                                                reinterpret_cast<const float*>(bias.buffer),
952                                                bias.shape(),
953                                                activation,
954                                                reinterpret_cast<float*>(output.buffer),
955                                                outShape);
956            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
957                success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
958                                                &outShape) &&
959                          setInfoAndAllocateIfNeeded(&output, outShape) &&
960                          fullyConnectedQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
961                                               input.shape(),
962                                               reinterpret_cast<const uint8_t*>(weights.buffer),
963                                               weights.shape(),
964                                               reinterpret_cast<const int32_t*>(bias.buffer),
965                                               bias.shape(),
966                                               activation,
967                                               reinterpret_cast<uint8_t*>(output.buffer),
968                                               outShape);
969            }
970        } break;
971        case OperationType::CONCATENATION: {
972            if (outs.size() != 1 || ins.size() < 2) {
973                return ANEURALNETWORKS_BAD_DATA;
974            }
975            int numInputTensors = ins.size() - 1;
976            int32_t axis = getScalarData<int32_t>(mOperands[ins[numInputTensors]]);
977
978            RunTimeOperandInfo& output = mOperands[outs[0]];
979            Shape outShape = output.shape();
980
981            const RunTimeOperandInfo& firstInput = mOperands[ins[0]];
982            if (firstInput.type == OperandType::TENSOR_FLOAT32) {
983                std::vector<Shape> inputShapes(numInputTensors);
984                std::vector<const float*> inputDataPtrs(numInputTensors);
985
986                for (int i=0; i<numInputTensors; i++) {
987                    RunTimeOperandInfo& input = mOperands[ins[i]];
988                    inputShapes[i] = input.shape();
989                    inputDataPtrs[i] = reinterpret_cast<const float*>(input.buffer);
990                }
991                success = concatenationPrepare(inputShapes, axis, &outShape) &&
992                          setInfoAndAllocateIfNeeded(&output, outShape) &&
993                          concatenationFloat32(inputDataPtrs, inputShapes, axis,
994                                               reinterpret_cast<float*>(output.buffer), outShape);
995            } else if (firstInput.type == OperandType::TENSOR_QUANT8_ASYMM) {
996                std::vector<Shape> inputShapes(numInputTensors);
997                std::vector<const uint8_t*> inputDataPtrs(numInputTensors);
998
999                for (int i=0; i<numInputTensors; i++) {
1000                    RunTimeOperandInfo& input = mOperands[ins[i]];
1001                    inputShapes[i] = input.shape();
1002                    inputDataPtrs[i] = reinterpret_cast<const uint8_t*>(input.buffer);
1003                }
1004                success = concatenationPrepare(inputShapes, axis, &outShape) &&
1005                          setInfoAndAllocateIfNeeded(&output, outShape) &&
1006                          concatenationQuant8(inputDataPtrs, inputShapes, axis,
1007                                              reinterpret_cast<uint8_t*>(output.buffer),
1008                                              outShape);
1009            }
1010        } break;
1011        case OperationType::L2_NORMALIZATION: {
1012            if (!allParametersPresent(1, 1)) {
1013                return ANEURALNETWORKS_BAD_DATA;
1014            }
1015            const RunTimeOperandInfo& input = mOperands[ins[0]];
1016            RunTimeOperandInfo& output = mOperands[outs[0]];
1017            Shape outShape = output.shape();
1018
1019            if (input.type == OperandType::TENSOR_FLOAT32) {
1020                success = genericNormalizationPrepare(input.shape(), &outShape) &&
1021                          setInfoAndAllocateIfNeeded(&output, outShape) &&
1022                          l2normFloat32(reinterpret_cast<const float*>(input.buffer),
1023                                        input.shape(),
1024                                        reinterpret_cast<float*>(output.buffer),
1025                                        outShape);
1026            } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
1027                success = genericNormalizationPrepare(input.shape(), &outShape) &&
1028                          setInfoAndAllocateIfNeeded(&output, outShape) &&
1029                          l2normQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
1030                                       input.shape(),
1031                                       reinterpret_cast<uint8_t*>(output.buffer),
1032                                       outShape);
1033            }
1034        } break;
1035        case OperationType::LOCAL_RESPONSE_NORMALIZATION: {
1036            if (!allParametersPresent(5, 1)) {
1037                return ANEURALNETWORKS_BAD_DATA;
1038            }
1039            const RunTimeOperandInfo& input = mOperands[ins[0]];
1040            int32_t radius = getScalarData<int32_t>(mOperands[ins[1]]);
1041            float bias = getScalarData<float>(mOperands[ins[2]]);
1042            float alpha = getScalarData<float>(mOperands[ins[3]]);
1043            float beta = getScalarData<float>(mOperands[ins[4]]);
1044
1045            RunTimeOperandInfo& output = mOperands[outs[0]];
1046            Shape outShape = output.shape();
1047
1048            if (input.type == OperandType::TENSOR_FLOAT32) {
1049                success = genericNormalizationPrepare(input.shape(), &outShape) &&
1050                          setInfoAndAllocateIfNeeded(&output, outShape) &&
1051                          localResponseNormFloat32(reinterpret_cast<const float*>(input.buffer),
1052                                                   input.shape(),
1053                                                   radius, bias, alpha, beta,
1054                                                   reinterpret_cast<float*>(output.buffer),
1055                                                   outShape);
1056            }
1057        } break;
1058        case OperationType::RESHAPE: {
1059            if (!allParametersPresent(2, 1)) {
1060                return ANEURALNETWORKS_BAD_DATA;
1061            }
1062            const RunTimeOperandInfo& input = mOperands[ins[0]];
1063            const RunTimeOperandInfo& targetShape = mOperands[ins[1]];
1064
1065            RunTimeOperandInfo& output = mOperands[outs[0]];
1066            Shape outShape = output.shape();
1067
1068            success = reshapePrepare(input.shape(),
1069                                     reinterpret_cast<const int32_t*>(targetShape.buffer),
1070                                     getNumberOfElements(targetShape.shape()),
1071                                     &outShape) &&
1072                      setInfoAndAllocateIfNeeded(&output, outShape) &&
1073                      reshapeGeneric(reinterpret_cast<const void*>(input.buffer),
1074                                     input.shape(),
1075                                     reinterpret_cast<void*>(output.buffer),
1076                                     outShape);
1077        } break;
1078        case OperationType::RESIZE_BILINEAR: {
1079            if (!allParametersPresent(3, 1)) {
1080                return ANEURALNETWORKS_BAD_DATA;
1081            }
1082            const RunTimeOperandInfo& input = mOperands[ins[0]];
1083            int32_t width = getScalarData<int32_t>(mOperands[ins[1]]);
1084            int32_t height = getScalarData<int32_t>(mOperands[ins[2]]);
1085
1086            RunTimeOperandInfo& output = mOperands[outs[0]];
1087            Shape outShape = output.shape();
1088
1089            if (input.type == OperandType::TENSOR_FLOAT32) {
1090                success = resizeBilinearPrepare(input.shape(),
1091                                                width, height,
1092                                                &outShape) &&
1093                          setInfoAndAllocateIfNeeded(&output, outShape) &&
1094                          resizeBilinearFloat32(reinterpret_cast<const float*>(input.buffer),
1095                                                input.shape(),
1096                                                reinterpret_cast<float*>(output.buffer),
1097                                                outShape);
1098            }
1099        } break;
1100        case OperationType::DEPTH_TO_SPACE: {
1101            if (!allParametersPresent(2, 1)) {
1102                return ANEURALNETWORKS_BAD_DATA;
1103            }
1104            const RunTimeOperandInfo& input = mOperands[ins[0]];
1105            int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
1106
1107            RunTimeOperandInfo& output = mOperands[outs[0]];
1108            Shape outShape = output.shape();
1109
1110            success = depthToSpacePrepare(input.shape(),
1111                                          blockSize,
1112                                          &outShape) &&
1113                      setInfoAndAllocateIfNeeded(&output, outShape) &&
1114                      depthToSpaceGeneric(input.buffer,
1115                                          input.shape(),
1116                                          blockSize,
1117                                          output.buffer,
1118                                          outShape);
1119        } break;
1120        case OperationType::SPACE_TO_DEPTH: {
1121            if (!allParametersPresent(2, 1)) {
1122                return ANEURALNETWORKS_BAD_DATA;
1123            }
1124            const RunTimeOperandInfo& input = mOperands[ins[0]];
1125            int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
1126
1127            RunTimeOperandInfo& output = mOperands[outs[0]];
1128            Shape outShape = output.shape();
1129
1130            success = spaceToDepthPrepare(input.shape(),
1131                                          blockSize,
1132                                          &outShape) &&
1133                      setInfoAndAllocateIfNeeded(&output, outShape) &&
1134                      spaceToDepthGeneric(input.buffer,
1135                                          input.shape(),
1136                                          blockSize,
1137                                          output.buffer,
1138                                          outShape);
1139        } break;
1140        case OperationType::EMBEDDING_LOOKUP: {
1141            const RunTimeOperandInfo &values =
1142                mOperands[ins[EmbeddingLookup::kValueTensor]];
1143            const RunTimeOperandInfo &lookups =
1144                mOperands[ins[EmbeddingLookup::kLookupTensor]];
1145            RunTimeOperandInfo &output =
1146                mOperands[outs[EmbeddingLookup::kOutputTensor]];
1147
1148            Shape outputShape;
1149            EmbeddingLookup lookup(operation, mOperands);
1150
1151            success = embeddingLookupPrepare(values.shape(), lookups.shape(), &outputShape) &&
1152                setInfoAndAllocateIfNeeded(&output, outputShape) &&
1153                lookup.Eval();
1154        } break;
1155        case OperationType::HASHTABLE_LOOKUP: {
1156            const RunTimeOperandInfo &lookups =
1157                mOperands[ins[HashtableLookup::kLookupTensor]];
1158            const RunTimeOperandInfo &keys =
1159                mOperands[ins[HashtableLookup::kKeyTensor]];
1160            const RunTimeOperandInfo &values =
1161                mOperands[ins[HashtableLookup::kValueTensor]];
1162
1163            RunTimeOperandInfo &output =
1164                mOperands[outs[HashtableLookup::kOutputTensor]];
1165            RunTimeOperandInfo &hits =
1166                mOperands[outs[HashtableLookup::kHitsTensor]];
1167
1168            Shape outputShape, hitShape;
1169            HashtableLookup lookup(operation, mOperands);
1170
1171            success = hashtableLookupPrepare(lookups.shape(), keys.shape(), values.shape(),
1172                                             &outputShape, &hitShape) &&
1173                setInfoAndAllocateIfNeeded(&output, outputShape) &&
1174                setInfoAndAllocateIfNeeded(&hits, hitShape) &&
1175                lookup.Eval();
1176        } break;
1177        case OperationType::LSH_PROJECTION: {
1178            RunTimeOperandInfo &output =
1179                mOperands[outs[LSHProjection::kOutputTensor]];
1180
1181            Shape outputShape;
1182            LSHProjection lsh(operation, mOperands);
1183
1184            success = LSHProjection::Prepare(operation, mOperands,
1185                                             &outputShape) &&
1186                setInfoAndAllocateIfNeeded(&output, outputShape) &&
1187                lsh.Eval();
1188        } break;
1189        case OperationType::LSTM: {
1190            RunTimeOperandInfo &scratch =
1191                mOperands[outs[LSTMCell::kScratchBufferTensor]];
1192            RunTimeOperandInfo &outputStateOut =
1193                mOperands[outs[LSTMCell::kOutputStateOutTensor]];
1194            RunTimeOperandInfo &cellStateOut =
1195                mOperands[outs[LSTMCell::kCellStateOutTensor]];
1196            RunTimeOperandInfo &output =
1197                mOperands[outs[LSTMCell::kOutputTensor]];
1198
1199            Shape scratchShape, outputStateShape, cellStateShape, outputShape;
1200            LSTMCell lstm_cell(operation, mOperands);
1201
1202            success = LSTMCell::Prepare(operation, mOperands,
1203                                        &scratchShape, &outputStateShape,
1204                                        &cellStateShape, &outputShape) &&
1205                setInfoAndAllocateIfNeeded(&scratch, scratchShape) &&
1206                setInfoAndAllocateIfNeeded(&outputStateOut, outputStateShape) &&
1207                setInfoAndAllocateIfNeeded(&cellStateOut, cellStateShape) &&
1208                setInfoAndAllocateIfNeeded(&output, outputShape) &&
1209                lstm_cell.Eval();
1210        } break;
1211        case OperationType::RNN: {
1212            RunTimeOperandInfo &hiddenStateOut =
1213                mOperands[outs[RNN::kHiddenStateOutTensor]];
1214            RunTimeOperandInfo &output =
1215                mOperands[outs[RNN::kOutputTensor]];
1216
1217            Shape hiddenStateShape, outputShape;
1218            RNN rnn_cell(operation, mOperands);
1219
1220            success = RNN::Prepare(operation, mOperands,
1221                                   &hiddenStateShape, &outputShape) &&
1222                setInfoAndAllocateIfNeeded(&hiddenStateOut, hiddenStateShape) &&
1223                setInfoAndAllocateIfNeeded(&output, outputShape) &&
1224                rnn_cell.Eval();
1225        } break;
1226        case OperationType::SVDF: {
1227            RunTimeOperandInfo &stateOut =
1228                mOperands[outs[SVDF::kStateOutTensor]];
1229            RunTimeOperandInfo &output =
1230                mOperands[outs[SVDF::kOutputTensor]];
1231
1232            Shape stateShape, outputShape;
1233            SVDF svdf(operation, mOperands);
1234
1235            success = SVDF::Prepare(operation, mOperands,
1236                                    &stateShape, &outputShape) &&
1237                setInfoAndAllocateIfNeeded(&stateOut, stateShape) &&
1238                setInfoAndAllocateIfNeeded(&output, outputShape) &&
1239                svdf.Eval();
1240        } break;
1241        default:
1242            nnAssert(false);
1243            break;
1244    }
1245    if (!success) {
1246        LOG(ERROR) << getOperationName(operation.type) << " failed.";
1247        return ANEURALNETWORKS_OP_FAILED;
1248    }
1249
1250    freeNoLongerUsedOperands(ins);
1251    return ANEURALNETWORKS_NO_ERROR;
1252}
1253
1254} // namespace nn
1255} // namespace android
1256