1915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni/*
2915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni * Copyright 2017 The Android Open Source Project
3915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni *
4915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni * Licensed under the Apache License, Version 2.0 (the "License");
5915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni * you may not use this file except in compliance with the License.
6915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni * You may obtain a copy of the License at
7915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni *
8915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni *      http://www.apache.org/licenses/LICENSE-2.0
9915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni *
10915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni * Unless required by applicable law or agreed to in writing, software
11915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni * distributed under the License is distributed on an "AS IS" BASIS,
12915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni * See the License for the specific language governing permissions and
14915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni * limitations under the License.
15915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni */
16915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
17915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni#ifndef FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_TENSOR_UTILS_H_
18915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni#define FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_TENSOR_UTILS_H_
19915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
20915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni#include "ActivationFunctor.h"
21915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
22915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ninamespace android {
23915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ninamespace nn {
24915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ninamespace tensor_utils {
25915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
26915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Limit a float input f betweeen +abs_limit and -abs_limit.
27915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nifloat Clip(float f, float abs_limit);
28915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
29915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Multiply a matrix by a batch vector, and store results in a batch-size
30915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// vector using a stride value provided in result_stride. 'result_stride' shows
31915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// how the number of elements between consecutive result values. For example
32915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// result_stride = 1, will cause the output to look like this:
33915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// [O_1, 0_2, ... O_rows] in memory, but result_stride = 3, will cause it to be
34915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// arranged like this in memory: [O_1, x, x, 0_2, x, x, ..., O_rows]
35915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
36915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                                         int m_cols, const float* vector,
37915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                                         int n_batch, float* result,
38915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                                         int result_stride);
39915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
40915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Cwise product of two vectors.
41915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid VectorVectorCwiseProduct(const float* vector1, const float* vector2,
42915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                              int v_size, float* result);
43915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
44915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Cwise product and accumulate of two vectors. Since it's a MAC opertation, the
45915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// assumption here is that result array is initialized to valid values.
46915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid VectorVectorCwiseProductAccumulate(const float* vector1,
47915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                                        const float* vector2, int v_size,
48915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                                        float* result);
49915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
50915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Dot product of two vectors.
51915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nifloat VectorVectorDotProduct(const float* vector1, const float* vector2,
52915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                             int v_size);
53915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
54915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Dot product of two batch vectors of size n_batch * v_size:
55915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// vector1 = [x_1_1, x_1_2, ..., x_1_vsize,
56915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni//            x_2_1, x_2_2, ..., x_2_vsize,
57915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni//            ...
58915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni//            x_nbatch_1,..., x_nbatch_vsize]
59915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// vector2 = [y_1_1, y_1_2, ..., y_1_vsize,
60915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni//            y_2_1, y_2_2, ..., y_2_vsize,
61915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni//            ...
62915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni//            y_nbatch_1,..., y_nbatch_vsize]
63915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Then result will be a vector of n_batch size which will be saved with a
64915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// stride of result_stride in memory starting from 'result':
65915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// [x_1_1 * y_1_1 + x_1_2 * y_1_2 + ... + x_1_vsize * y_1_vsize,
66915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni//  x_2_1 * y_2_1 + x_2_2 * y_2_2 + ... + x_2_vsize * y_2_vsize,
67915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni//  ...
68915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni//  x_nbatch_1 * y_nbatch_1 + ... + x_nbatch_vsize * y_nbatch_vsize]
69915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid BatchVectorBatchVectorDotProduct(const float* vector1,
70915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                                      const float* vector2, int v_size,
71915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                                      int n_batch, float* result,
72915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                                      int result_stride);
73915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
74915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
75915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// operation, the assumption here is that result array is initialized to valid
76915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// values.
77915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid VectorBatchVectorCwiseProductAccumulate(const float* vector, int v_size,
78915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                                             const float* batch_vector,
79915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                                             int n_batch, float* result);
80915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
81915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Batch vector initialization with another vector.
82915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid VectorBatchVectorAssign(const float* vector, int v_size, int n_batch,
83915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                             float* batch_vector);
84915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
85915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Apply sigmoid to elements of a vector.
86915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid ApplySigmoidToVector(const float* vector, int v_size, float* result);
87915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
88915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Apply activation function to elements of a vector.
89915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid ApplyActivationToVector(const float* vector, int v_size,
90915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                             ActivationFn activation, float* result);
91915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
92915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Copy vector to another vector.
93915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid CopyVector(const float* vector, int v_size, float* result);
94915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
95915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Compute "1.0f - elements of vector" (used in CIFG).
96915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid Sub1Vector(const float* vector, int v_size, float* result);
97915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
98915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Fill vector with 0.f.
99915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid ZeroVector(float* vector, int v_size);
100915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
101915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Clip elements of a vector using a abs_limit value.
102915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid ClipVector(const float* vector, int v_size, float abs_limit,
103915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                float* result);
104915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
105915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Shift left a vector in place with v_size size.
106915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid VectorShiftLeft(float* vector, int v_size, float shift_value);
107915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
108915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// Reduce-sum on a float input vector:
109915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// input_vector: float pointer to input vector.
110915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// input_stride: input vector stride.
111915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// output_vector: float pointer to vector.
112915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// output_size: output vector size.
113915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// reduction_size: number of consecutive elements from input vector which are
114915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni// added to get one element of output.
115915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Nivoid ReductionSumVector(const float* input_vector, int input_stride,
116915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                        float* output_vector, int output_size,
117915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni                        int reduction_size);
118915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni}  // namespace tensor_utils
119915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni}  // namespace nn
120915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni}  // namespace android
121915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni
122915cca2dab8ee7a05fec2905f11ec3664be49c6eYang Ni#endif  // FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_TENSOR_UTILS_H_
123