common/operations/SVDF.cpp

/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "SVDF.h"

#include "CpuExecutor.h"
#include "HalInterfaces.h"

namespace android {
namespace nn {

namespace {

template <typename T>
inline T *GetBuffer(RunTimeOperandInfo* operand) {
  return reinterpret_cast<T*>(operand->buffer);
}

template <typename T>
inline const T *GetBuffer(const RunTimeOperandInfo* operand) {
  return reinterpret_cast<const T*>(operand->buffer);
}

}

SVDF::SVDF(const Operation& operation,
           std::vector<RunTimeOperandInfo>& operands) {
    input_ = GetInput(operation, operands, kInputTensor);
    weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor);
    weights_time_ = GetInput(operation, operands, kWeightsTimeTensor);
    bias_ = GetInput(operation, operands, kBiasTensor);
    state_in_ = GetInput(operation, operands, kStateInTensor);

    params_.rank_ = getScalarData<int>(*GetInput(operation, operands, kRankParam));
    params_.activation_ = static_cast<TfLiteFusedActivation>(getScalarData<int>(
        *GetInput(operation, operands, kActivationParam)));

    state_out_ = GetOutput(operation, operands, kStateOutTensor);
    output_ = GetOutput(operation, operands, kOutputTensor);
}

bool SVDF::Prepare(const Operation &operation,
                   std::vector<RunTimeOperandInfo> &operands,
                   Shape *stateShape,
                   Shape *outputShape) {
  // Check we have all the inputs and outputs we need.
  const int num_inputs = NumInputsWithValues(operation, operands);

  NN_CHECK(num_inputs == 6 || num_inputs == 7);
  NN_CHECK_EQ(NumOutputs(operation), 2);

  const RunTimeOperandInfo *input =
      GetInput(operation, operands, SVDF::kInputTensor);
  const RunTimeOperandInfo *weights_feature =
      GetInput(operation, operands, SVDF::kWeightsFeatureTensor);
  const RunTimeOperandInfo *weights_time =
      GetInput(operation, operands, SVDF::kWeightsTimeTensor);

  // Check all the parameters of tensor match within themselves and match the
  // input configuration.
  const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam));
  const uint32_t batch_size = SizeOfDimension(input, 0);
  const uint32_t num_filters = SizeOfDimension(weights_feature, 0);
  NN_CHECK_EQ(num_filters % rank, 0);
  const uint32_t num_units = num_filters / rank;
  const uint32_t memory_size = SizeOfDimension(weights_time, 1);
  NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1));
  NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters);

  const RunTimeOperandInfo *bias =
      GetInput(operation, operands, kBiasTensor);
  if (!IsNullInput(bias)) {
    NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units);
  }

  // Resize state.
  const Shape &inputShape = input->shape();
  stateShape->type = inputShape.type;
  stateShape->dimensions = { batch_size, memory_size * num_filters };
  stateShape->offset = inputShape.offset;
  stateShape->scale = inputShape.scale;

  // Resize output.
  outputShape->type = inputShape.type;
  outputShape->dimensions = { batch_size, num_units };
  outputShape->offset = inputShape.offset;
  outputShape->scale = inputShape.scale;

  return true;
}

bool SVDF::Eval() {
    const int rank = params_.rank_;
    const int batch_size = SizeOfDimension(input_, 0);
    const int input_size = SizeOfDimension(input_, 1);
    const int num_filters = SizeOfDimension(weights_feature_, 0);
    const int num_units = num_filters / rank;
    const int memory_size = SizeOfDimension(weights_time_, 1);

    memcpy(GetBuffer<float>(state_out_), GetBuffer<float>(state_in_),
           sizeof(float) * batch_size * memory_size * num_filters);
    // Compute conv1d(inputs, weights_feature).
    for (int b = 0; b < batch_size; b++) {
        float* state_ptr_batch = GetBuffer<float>(state_out_) + b * memory_size * num_filters;
        for (int c = 0; c < num_filters; c++) {
            float* state_ptr = state_ptr_batch + c * memory_size;
            state_ptr[memory_size - 1] = 0.0;
        }
    }
    // The state left most column is used to save current cycle activation. This
    // is achieved by starting at state->data.f[memory_size - 1] and having the
    // stride equal to memory_size.
    tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
        GetBuffer<float>(weights_feature_), num_filters, input_size,
        GetBuffer<float>(input_),  batch_size,
        &GetBuffer<float>(state_out_)[memory_size - 1], memory_size);

    // Compute matmul(state, weights_time).
    // The right most column is used to save temporary output (with the size of
    // num_filters). This is achieved by starting at state->data.f and having the
    // stride equal to memory_size.
    float scratch[batch_size * num_filters];
    for (int b = 0; b < batch_size; b++) {
        float* state_out_ptr_batch =
            GetBuffer<float>(state_out_) + b * memory_size * num_filters;
        float* scratch_ptr_batch = scratch + b * num_filters;
        tflite::tensor_utils::BatchVectorBatchVectorDotProduct(
            GetBuffer<float>(weights_time_), state_out_ptr_batch, memory_size, num_filters,
            scratch_ptr_batch, /*result_stride=*/1);
    }

    // Initialize output with bias if provided.
    if (!IsNullInput(bias_)) {
        tflite::tensor_utils::VectorBatchVectorAssign(
            GetBuffer<float>(bias_), num_units, batch_size,
            GetBuffer<float>(output_));
    } else {
        tflite::tensor_utils::ZeroVector(
            GetBuffer<float>(output_), batch_size * num_units);
    }

    // Reduction sum
    for (int b = 0; b < batch_size; b++) {
        float* output_ptr_batch = GetBuffer<float>(output_) + b * num_units;
        float* scratch_ptr_batch = scratch + b * num_filters;
        tflite::tensor_utils::ReductionSumVector(
            scratch_ptr_batch, output_ptr_batch, num_units, rank);
    }

    // Apply activation.
    for (int b = 0; b < batch_size; b++) {
        float* output_ptr_batch = GetBuffer<float>(output_) + b * num_units;
        tflite::tensor_utils::ApplyActivationToVector(
            output_ptr_batch, num_units,
            params_.activation_, output_ptr_batch);
    }

    // Right shift the state.
    for (int b = 0; b < batch_size; b++) {
        float* state_out_ptr_batch =
            GetBuffer<float>(state_out_) + b * memory_size * num_filters;
        for (int f = 0; f < num_filters; f++) {
            tflite::tensor_utils::VectorShiftLeft(state_out_ptr_batch, memory_size,
                                          /*shift_value=*/0.0);
            state_out_ptr_batch += memory_size;
        }
    }
    return true;
}

}  // namespace nn
}  // namespace android