Conv2D.cpp revision 27e9be3904b034e422ee9b6ab70b35ea994d2b39
1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "Operations.h"
18#include "OperationsUtils.h"
19
20#include "internal/optimized/optimized_ops.h"
21
22namespace android {
23namespace nn {
24
25// If possible we will use this static buffer for the tensor.
26static constexpr int kStaticBufferSize = 1605632;
27static char static_scratch_buffer[kStaticBufferSize];
28
29bool convPrepare(const Shape& input,
30                 const Shape& filter,
31                 const Shape& bias,
32                 int32_t padding,
33                 int32_t stride_width, int32_t stride_height,
34                 Shape* output) {
35    DCHECK_EQ(getNumberOfDimensions(input), 4);
36    DCHECK_EQ(getNumberOfDimensions(filter), 4);
37    DCHECK_EQ(getNumberOfDimensions(bias), 1);
38
39    DCHECK_EQ(getSizeOfDimension(filter, 3), getSizeOfDimension(bias, 0));
40    DCHECK_EQ(stride_width, stride_height);
41
42    uint32_t channels_out = getSizeOfDimension(filter, 0);
43    uint32_t width        = getSizeOfDimension(input, 2);
44    uint32_t height       = getSizeOfDimension(input, 1);
45    uint32_t filterWidth  = getSizeOfDimension(filter, 2);
46    uint32_t filterHeight = getSizeOfDimension(filter, 1);
47    uint32_t batches      = getSizeOfDimension(input, 0);
48
49    // Matching GetWindowedOutputSize in TensorFlow.
50    // TODO: changing this to explicit padding.
51    auto computeOutSize = [padding](uint32_t imageSize, uint32_t filterSize,
52                                    uint32_t stride) -> int {
53        return padding == kPaddingSame
54                   ? (imageSize + stride - 1) / stride
55                   : padding == kPaddingValid
56                         ? (imageSize - filterSize + stride) / stride
57                         : 0;
58    };
59
60    uint32_t outWidth = computeOutSize(width, filterWidth, stride_width);
61    uint32_t outHeight = computeOutSize(height, filterHeight, stride_height);
62
63    output->type = input.type;
64    output->dimensions = {batches, outHeight, outWidth, channels_out};
65    return true;
66}
67
68#define ANDROID_NN_CONV_PARAMETERS(Type)                                        \
69    uint32_t height       = getSizeOfDimension(inputShape, 1);                  \
70    uint32_t width        = getSizeOfDimension(inputShape, 2);                  \
71    uint32_t filterHeight = getSizeOfDimension(filterShape, 1);                 \
72    uint32_t filterWidth  = getSizeOfDimension(filterShape, 2);                 \
73    uint32_t outHeight    = getSizeOfDimension(outputShape, 1);                 \
74    uint32_t outWidth     = getSizeOfDimension(outputShape, 2);                 \
75    uint32_t inDepth      = getSizeOfDimension(inputShape, 3);                  \
76                                                                                \
77    uint32_t paddingHeight =                                                    \
78            ComputePadding(stride_height, height, filterHeight, outHeight);     \
79    uint32_t paddingWidth =                                                     \
80            ComputePadding(stride_width, width, filterWidth, outWidth);         \
81                                                                                \
82    Dims<4> im2colDim;                                                          \
83    im2colDim.sizes[3] = (int)getSizeOfDimension(outputShape, 0);               \
84    im2colDim.sizes[2] = (int)getSizeOfDimension(outputShape, 1);               \
85    im2colDim.sizes[1] = (int)getSizeOfDimension(outputShape, 2);               \
86    im2colDim.sizes[0] = (int)inDepth * filterHeight * filterWidth;             \
87                                                                                \
88    im2colDim.strides[0] = 1;                                                   \
89    for (int i=1; i<4; i++) {                                                   \
90        im2colDim.strides[i] = im2colDim.strides[i-1] * im2colDim.sizes[i-1];   \
91    }                                                                           \
92                                                                                \
93    Type* im2colData = nullptr;                                                 \
94    int im2colByteSize = sizeof(Type);                                          \
95    for (int i=0; i<4; i++) {                                                   \
96        im2colByteSize *= im2colDim.sizes[i];                                   \
97    }                                                                           \
98    if (im2colByteSize <= kStaticBufferSize) {                                  \
99        im2colData = reinterpret_cast<Type *>(static_scratch_buffer);           \
100    } else {                                                                    \
101        im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)];    \
102    }
103
104
105bool convFloat32(const float* inputData, const Shape& inputShape,
106                 const float* filterData, const Shape& filterShape,
107                 const float* biasData, const Shape& biasShape,
108                 int32_t padding, int32_t stride_width, int32_t stride_height, int32_t activation,
109                 float* outputData, const Shape& outputShape) {
110
111    ANDROID_NN_CONV_PARAMETERS(float)
112
113    #define ANDROID_NN_CONV(activation)                                        \
114        optimized_ops::Conv<FusedActivationFunctionType::activation>(          \
115            inputData, convertShapeToDims(inputShape),                         \
116            filterData, convertShapeToDims(filterShape),                       \
117            biasData, convertShapeToDims(biasShape),                           \
118            stride_width, paddingWidth, paddingHeight,                         \
119            outputData, convertShapeToDims(outputShape),                       \
120            im2colData, im2colDim)
121
122    if (activation == kActivationNone) {
123        ANDROID_NN_CONV(kNone);
124    }
125    if (activation == kActivationRelu) {
126        ANDROID_NN_CONV(kRelu);
127    }
128    if (activation == kActivationRelu6) {
129        ANDROID_NN_CONV(kRelu6);
130    }
131
132    #undef ANDROID_NN_CONV
133
134    if (im2colByteSize > kStaticBufferSize) {
135        delete[] im2colData;
136    }
137    return true;
138}
139
140bool convQuant8(const uint8_t* inputData, const Shape& inputShape,
141                const uint8_t* filterData, const Shape& filterShape,
142                const int32_t* biasData, const Shape& biasShape,
143                int32_t padding, int32_t stride_width, int32_t stride_height, int32_t activation,
144                uint8_t* outputData, const Shape& outputShape) {
145
146    ANDROID_NN_CONV_PARAMETERS(uint8_t)
147
148    float real_multiplier = 0.0;
149    int32_t output_multiplier = 0;
150    int32_t output_shift = 0;
151    int32_t output_activation_min = 0;
152    int32_t output_activation_max = 0;
153
154    GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape,
155                                     outputShape, &real_multiplier);
156    QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier,
157                                     &output_shift);
158    CalculateActivationRangeUint8(activation, outputShape,
159                                  &output_activation_min,
160                                  &output_activation_max);
161
162    static gemmlowp::GemmContext gemm_context;
163
164    int32_t inputOffset = -inputShape.offset;
165    int32_t filterOffset = -filterShape.offset;
166    int32_t outputOffset = outputShape.offset;
167    #define ANDROID_NN_CONV(activation)                                        \
168        optimized_ops::Conv<FusedActivationFunctionType::activation>(          \
169            inputData, convertShapeToDims(inputShape), inputOffset,            \
170            filterData, convertShapeToDims(filterShape), filterOffset,         \
171            biasData, convertShapeToDims(biasShape),                           \
172            stride_width, paddingWidth, paddingHeight,                         \
173            outputOffset, output_multiplier, output_shift,                     \
174            output_activation_min, output_activation_max,                      \
175            outputData, convertShapeToDims(outputShape),                       \
176            im2colData, im2colDim, &gemm_context)
177
178    if (activation == kActivationNone) {
179        ANDROID_NN_CONV(kNone);
180    }
181    if (activation == kActivationRelu) {
182        ANDROID_NN_CONV(kRelu);
183    }
184    if (activation == kActivationRelu6) {
185        ANDROID_NN_CONV(kRelu6);
186    }
187
188    #undef ANDROID_NN_CONV
189
190    if (im2colByteSize > kStaticBufferSize) {
191        delete[] im2colData;
192    }
193    return true;
194}
195
196#undef ANDROID_NN_CONV_PARAMETERS
197}  // namespace nn
198}  // namespace android
199