1eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang/*
2eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * Copyright (C) 2017 The Android Open Source Project
3eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang *
4eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * Licensed under the Apache License, Version 2.0 (the "License");
5eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * you may not use this file except in compliance with the License.
6eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * You may obtain a copy of the License at
7eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang *
8eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang *      http://www.apache.org/licenses/LICENSE-2.0
9eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang *
10eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * Unless required by applicable law or agreed to in writing, software
11eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * distributed under the License is distributed on an "AS IS" BASIS,
12eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * See the License for the specific language governing permissions and
14eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * limitations under the License.
15eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang */
16eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang
17eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang#include "Operations.h"
18d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang#include "CpuOperationUtils.h"
19eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang
20d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
21eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang
22eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wangnamespace android {
23eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wangnamespace nn {
24eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang
25eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang// If possible we will use this static buffer for the tensor.
269447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniakstatic constexpr size_t kStaticBufferSize = 1605632;
27eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wangstatic char static_scratch_buffer[kStaticBufferSize];
28eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang
29c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang// executionMutex is used to protect concurrent access of the static_scratch_buffer
30c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang// and other non-threadsafe resources like gemmlowp::GemmContext.
31c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang// std::mutex is safe for pthreads on Android.
32c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wangstatic std::mutex executionMutex;
33c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang
3427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang#define ANDROID_NN_CONV_PARAMETERS(Type)                                        \
3527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    uint32_t height       = getSizeOfDimension(inputShape, 1);                  \
3627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    uint32_t width        = getSizeOfDimension(inputShape, 2);                  \
3727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    uint32_t filterHeight = getSizeOfDimension(filterShape, 1);                 \
3827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    uint32_t filterWidth  = getSizeOfDimension(filterShape, 2);                 \
3927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    uint32_t outHeight    = getSizeOfDimension(outputShape, 1);                 \
4027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    uint32_t outWidth     = getSizeOfDimension(outputShape, 2);                 \
4127e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    uint32_t inDepth      = getSizeOfDimension(inputShape, 3);                  \
4227e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang                                                                                \
436cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang    uint32_t paddingHeight = (uint32_t)padding_top;                             \
446cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang    uint32_t paddingWidth = (uint32_t)padding_left;                             \
4527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang                                                                                \
46d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang    tflite::Dims<4> im2colDim;                                                  \
4727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    im2colDim.sizes[3] = (int)getSizeOfDimension(outputShape, 0);               \
4827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    im2colDim.sizes[2] = (int)getSizeOfDimension(outputShape, 1);               \
4927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    im2colDim.sizes[1] = (int)getSizeOfDimension(outputShape, 2);               \
5027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    im2colDim.sizes[0] = (int)inDepth * filterHeight * filterWidth;             \
5127e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang                                                                                \
5227e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    im2colDim.strides[0] = 1;                                                   \
5327e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    for (int i=1; i<4; i++) {                                                   \
5427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang        im2colDim.strides[i] = im2colDim.strides[i-1] * im2colDim.sizes[i-1];   \
5527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    }                                                                           \
5627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang                                                                                \
5727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    Type* im2colData = nullptr;                                                 \
589447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak    uint64_t im2colByteSize = sizeof(Type);                                     \
599447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak    std::unique_ptr<Type[]> im2colGuard;                                        \
6027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    for (int i=0; i<4; i++) {                                                   \
6127e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang        im2colByteSize *= im2colDim.sizes[i];                                   \
6227e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    }                                                                           \
636e1812de773691c1a42dbc1d1906ca158a5d4951Przemyslaw Szczepaniak    /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */   \
646e1812de773691c1a42dbc1d1906ca158a5d4951Przemyslaw Szczepaniak    if (im2colByteSize >= 0x7fffffff)  {                                        \
659447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak        LOG(ERROR) << "Conv size is too large, not enough memory";              \
669447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak        return false;                                                           \
679447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak    }                                                                           \
6827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    if (im2colByteSize <= kStaticBufferSize) {                                  \
6927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang        im2colData = reinterpret_cast<Type *>(static_scratch_buffer);           \
7027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    } else {                                                                    \
7127e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang        im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)];    \
729447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak        if (im2colData == nullptr) {                                            \
739447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak            LOG(ERROR) << "Conv size is too large, not enough memory";          \
749447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak            return false;                                                       \
759447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak        }                                                                       \
76eea11acb2a183642f0c5687b283f975844ea99c2Przemyslaw Szczepaniak        im2colGuard.reset(im2colData);                                          \
7727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    }
7827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang
79eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wangbool convFloat32(const float* inputData, const Shape& inputShape,
80eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang                 const float* filterData, const Shape& filterShape,
81eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang                 const float* biasData, const Shape& biasShape,
826cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang                 int32_t padding_left, int32_t padding_right,
836cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang                 int32_t padding_top, int32_t padding_bottom,
846cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang                 int32_t stride_width, int32_t stride_height,
856cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang                 int32_t activation,
86eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang                 float* outputData, const Shape& outputShape) {
87eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang
8827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    ANDROID_NN_CONV_PARAMETERS(float)
89eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang
90d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang    float output_activation_min, output_activation_max;
91d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang    CalculateActivationRangeFloat(activation, &output_activation_min,
92d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang                                  &output_activation_max);
93eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang
94c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang    // Prevent concurrent executions that may access the scratch buffer.
95c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang    std::unique_lock<std::mutex> lock(executionMutex);
96d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang    tflite::optimized_ops::Conv(
97d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            inputData, convertShapeToDims(inputShape),
98d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            filterData, convertShapeToDims(filterShape),
99d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            biasData, convertShapeToDims(biasShape),
100d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            stride_width, stride_height, paddingWidth, paddingHeight,
101d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            output_activation_min, output_activation_max,
102d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            outputData, convertShapeToDims(outputShape),
103d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            im2colData, im2colDim);
104eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang    return true;
105eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang}
106eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang
10727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wangbool convQuant8(const uint8_t* inputData, const Shape& inputShape,
10827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang                const uint8_t* filterData, const Shape& filterShape,
10927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang                const int32_t* biasData, const Shape& biasShape,
1106cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang                int32_t padding_left, int32_t padding_right,
1116cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang                int32_t padding_top, int32_t padding_bottom,
1126cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang                int32_t stride_width, int32_t stride_height,
1136cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang                int32_t activation,
11427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang                uint8_t* outputData, const Shape& outputShape) {
11527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang
11627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    ANDROID_NN_CONV_PARAMETERS(uint8_t)
11727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang
1188eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang    int32_t inputOffset = -inputShape.offset;
1198eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang    int32_t filterOffset = -filterShape.offset;
1208eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang    int32_t outputOffset = outputShape.offset;
1218eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang
12227e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    float real_multiplier = 0.0;
12327e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    int32_t output_multiplier = 0;
12427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    int32_t output_shift = 0;
12527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    int32_t output_activation_min = 0;
12627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    int32_t output_activation_max = 0;
12727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang
128be2b22578baf949d7be42ba002cee94304daf53cMiao Wang    if (!GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape,
129be2b22578baf949d7be42ba002cee94304daf53cMiao Wang                                          outputShape, &real_multiplier) ||
130be2b22578baf949d7be42ba002cee94304daf53cMiao Wang            !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier,
131be2b22578baf949d7be42ba002cee94304daf53cMiao Wang                                              &output_shift)){
132be2b22578baf949d7be42ba002cee94304daf53cMiao Wang        return false;
133be2b22578baf949d7be42ba002cee94304daf53cMiao Wang    }
13427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    CalculateActivationRangeUint8(activation, outputShape,
13527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang                                  &output_activation_min,
13627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang                                  &output_activation_max);
13727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang
13827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    static gemmlowp::GemmContext gemm_context;
13927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang
140c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang    // Prevent concurrent executions that may access the scratch buffer and
141c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang    // gemm_context.
142c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang    std::unique_lock<std::mutex> lock(executionMutex);
143c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang    // Alow gemmlowp automatically decide how many threads to use.
144c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang    gemm_context.set_max_num_threads(0);
145d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang    tflite::optimized_ops::Conv(
146d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            inputData, convertShapeToDims(inputShape), inputOffset,
147d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            filterData, convertShapeToDims(filterShape), filterOffset,
148d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            biasData, convertShapeToDims(biasShape),
149d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            stride_width, stride_height, paddingWidth, paddingHeight,
150d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            outputOffset, output_multiplier, output_shift,
151d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            output_activation_min, output_activation_max,
152d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            outputData, convertShapeToDims(outputShape),
153d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang            im2colData, im2colDim, &gemm_context);
15427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang    return true;
15527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang}
15627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang
15727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang#undef ANDROID_NN_CONV_PARAMETERS
158eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang}  // namespace nn
159eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang}  // namespace android
160