1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
16#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
17
18#include <algorithm>
19
20#include "fixedpoint/fixedpoint.h"
21#include "public/gemmlowp.h"
22#include "tensorflow/contrib/lite/kernels/internal/common.h"
23#include "tensorflow/contrib/lite/kernels/internal/compatibility.h"
24#include "tensorflow/contrib/lite/kernels/internal/types.h"
25
26namespace tflite {
27namespace reference_ops {
28
29inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
30                          int32 input_offset, const uint8* filter_data,
31                          const Dims<4>& filter_dims, int32 filter_offset,
32                          const int32* bias_data, const Dims<4>& bias_dims,
33                          int stride_width, int stride_height, int pad_width,
34                          int pad_height, int depth_multiplier,
35                          int32 output_offset, int32 output_multiplier,
36                          int output_shift, int32 output_activation_min,
37                          int32 output_activation_max, uint8* output_data,
38                          const Dims<4>& output_dims) {
39  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
40  const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0);
41  const int input_height = ArraySize(input_dims, 2);
42  const int input_width = ArraySize(input_dims, 1);
43  const int input_depth = ArraySize(input_dims, 0);
44  const int filter_height = ArraySize(filter_dims, 2);
45  const int filter_width = ArraySize(filter_dims, 1);
46  const int output_height = ArraySize(output_dims, 2);
47  const int output_width = ArraySize(output_dims, 1);
48  TFLITE_DCHECK(output_depth == input_depth * depth_multiplier);
49
50  for (int b = 0; b < batches; ++b) {
51    for (int out_y = 0; out_y < output_height; ++out_y) {
52      for (int out_x = 0; out_x < output_width; ++out_x) {
53        for (int ic = 0; ic < input_depth; ++ic) {
54          for (int m = 0; m < depth_multiplier; m++) {
55            const int oc = m + ic * depth_multiplier;
56            const int in_x_origin = (out_x * stride_width) - pad_width;
57            const int in_y_origin = (out_y * stride_height) - pad_height;
58            int32 acc = 0;
59            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
60              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
61                const int in_x = in_x_origin + filter_x;
62                const int in_y = in_y_origin + filter_y;
63                // If the location is outside the bounds of the input image,
64                // use zero as a default value.
65                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
66                    (in_y < input_height)) {
67                  int32 input_val =
68                      input_data[Offset(input_dims, ic, in_x, in_y, b)];
69                  int32 filter_val = filter_data[Offset(filter_dims, oc,
70                                                        filter_x, filter_y, 0)];
71                  acc +=
72                      (filter_val + filter_offset) * (input_val + input_offset);
73                }
74              }
75            }
76            if (bias_data) {
77              acc += bias_data[Offset(bias_dims, oc, 0, 0, 0)];
78            }
79            acc = MultiplyByQuantizedMultiplierSmallerThanOne(
80                acc, output_multiplier, output_shift);
81            acc += output_offset;
82            acc = std::max(acc, output_activation_min);
83            acc = std::min(acc, output_activation_max);
84            output_data[Offset(output_dims, oc, out_x, out_y, b)] =
85                static_cast<uint8>(acc);
86          }
87        }
88      }
89    }
90  }
91}
92
93// Legacy, for compatibility with old checked-in code.
94template <FusedActivationFunctionType Ac>
95void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
96                   int32 input_offset, const uint8* filter_data,
97                   const Dims<4>& filter_dims, int32 filter_offset,
98                   const int32* bias_data, const Dims<4>& bias_dims,
99                   int stride_width, int stride_height, int pad_width,
100                   int pad_height, int depth_multiplier, int32 output_offset,
101                   int32 output_multiplier, int output_shift,
102                   int32 output_activation_min, int32 output_activation_max,
103                   uint8* output_data, const Dims<4>& output_dims) {
104  if (Ac == FusedActivationFunctionType::kNone) {
105    TFLITE_DCHECK_EQ(output_activation_min, 0);
106    TFLITE_DCHECK_EQ(output_activation_max, 255);
107  }
108  DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
109                filter_offset, bias_data, bias_dims, stride_width,
110                stride_height, pad_width, pad_height, depth_multiplier,
111                output_offset, output_multiplier, output_shift,
112                output_activation_min, output_activation_max, output_data,
113                output_dims);
114}
115
116// Legacy, for compatibility with old checked-in code.
117template <FusedActivationFunctionType Ac>
118void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
119                   int32 input_offset, const uint8* filter_data,
120                   const Dims<4>& filter_dims, int32 filter_offset,
121                   const int32* bias_data, const Dims<4>& bias_dims, int stride,
122                   int pad_width, int pad_height, int depth_multiplier,
123                   int32 output_offset, int32 output_multiplier,
124                   int output_shift, int32 output_activation_min,
125                   int32 output_activation_max, uint8* output_data,
126                   const Dims<4>& output_dims) {
127  DepthwiseConv<Ac>(input_data, input_dims, input_offset, filter_data,
128                    filter_dims, filter_offset, bias_data, bias_dims, stride,
129                    stride, pad_width, pad_height, depth_multiplier,
130                    output_offset, output_multiplier, output_shift,
131                    output_activation_min, output_activation_max, output_data,
132                    output_dims);
133}
134
135}  // end namespace reference_ops
136}  // end namespace tflite
137
138#endif  // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
139