1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ 16#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ 17 18#include <algorithm> 19 20#include "fixedpoint/fixedpoint.h" 21#include "public/gemmlowp.h" 22#include "tensorflow/contrib/lite/kernels/internal/common.h" 23#include "tensorflow/contrib/lite/kernels/internal/compatibility.h" 24#include "tensorflow/contrib/lite/kernels/internal/types.h" 25 26namespace tflite { 27namespace reference_ops { 28 29inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, 30 int32 input_offset, const uint8* filter_data, 31 const Dims<4>& filter_dims, int32 filter_offset, 32 const int32* bias_data, const Dims<4>& bias_dims, 33 int stride_width, int stride_height, int pad_width, 34 int pad_height, int depth_multiplier, 35 int32 output_offset, int32 output_multiplier, 36 int output_shift, int32 output_activation_min, 37 int32 output_activation_max, uint8* output_data, 38 const Dims<4>& output_dims) { 39 const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); 40 const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); 41 const int input_height = ArraySize(input_dims, 2); 42 const int input_width = ArraySize(input_dims, 1); 43 const int input_depth = ArraySize(input_dims, 0); 44 const int filter_height = ArraySize(filter_dims, 2); 45 const int filter_width = ArraySize(filter_dims, 1); 46 const int output_height = ArraySize(output_dims, 2); 47 const int output_width = ArraySize(output_dims, 1); 48 TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); 49 50 for (int b = 0; b < batches; ++b) { 51 for (int out_y = 0; out_y < output_height; ++out_y) { 52 for (int out_x = 0; out_x < output_width; ++out_x) { 53 for (int ic = 0; ic < input_depth; ++ic) { 54 for (int m = 0; m < depth_multiplier; m++) { 55 const int oc = m + ic * depth_multiplier; 56 const int in_x_origin = (out_x * stride_width) - pad_width; 57 const int in_y_origin = (out_y * stride_height) - pad_height; 58 int32 acc = 0; 59 for (int filter_y = 0; filter_y < filter_height; ++filter_y) { 60 for (int filter_x = 0; filter_x < filter_width; ++filter_x) { 61 const int in_x = in_x_origin + filter_x; 62 const int in_y = in_y_origin + filter_y; 63 // If the location is outside the bounds of the input image, 64 // use zero as a default value. 65 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && 66 (in_y < input_height)) { 67 int32 input_val = 68 input_data[Offset(input_dims, ic, in_x, in_y, b)]; 69 int32 filter_val = filter_data[Offset(filter_dims, oc, 70 filter_x, filter_y, 0)]; 71 acc += 72 (filter_val + filter_offset) * (input_val + input_offset); 73 } 74 } 75 } 76 if (bias_data) { 77 acc += bias_data[Offset(bias_dims, oc, 0, 0, 0)]; 78 } 79 acc = MultiplyByQuantizedMultiplierSmallerThanOne( 80 acc, output_multiplier, output_shift); 81 acc += output_offset; 82 acc = std::max(acc, output_activation_min); 83 acc = std::min(acc, output_activation_max); 84 output_data[Offset(output_dims, oc, out_x, out_y, b)] = 85 static_cast<uint8>(acc); 86 } 87 } 88 } 89 } 90 } 91} 92 93// Legacy, for compatibility with old checked-in code. 94template <FusedActivationFunctionType Ac> 95void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, 96 int32 input_offset, const uint8* filter_data, 97 const Dims<4>& filter_dims, int32 filter_offset, 98 const int32* bias_data, const Dims<4>& bias_dims, 99 int stride_width, int stride_height, int pad_width, 100 int pad_height, int depth_multiplier, int32 output_offset, 101 int32 output_multiplier, int output_shift, 102 int32 output_activation_min, int32 output_activation_max, 103 uint8* output_data, const Dims<4>& output_dims) { 104 if (Ac == FusedActivationFunctionType::kNone) { 105 TFLITE_DCHECK_EQ(output_activation_min, 0); 106 TFLITE_DCHECK_EQ(output_activation_max, 255); 107 } 108 DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims, 109 filter_offset, bias_data, bias_dims, stride_width, 110 stride_height, pad_width, pad_height, depth_multiplier, 111 output_offset, output_multiplier, output_shift, 112 output_activation_min, output_activation_max, output_data, 113 output_dims); 114} 115 116// Legacy, for compatibility with old checked-in code. 117template <FusedActivationFunctionType Ac> 118void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, 119 int32 input_offset, const uint8* filter_data, 120 const Dims<4>& filter_dims, int32 filter_offset, 121 const int32* bias_data, const Dims<4>& bias_dims, int stride, 122 int pad_width, int pad_height, int depth_multiplier, 123 int32 output_offset, int32 output_multiplier, 124 int output_shift, int32 output_activation_min, 125 int32 output_activation_max, uint8* output_data, 126 const Dims<4>& output_dims) { 127 DepthwiseConv<Ac>(input_data, input_dims, input_offset, filter_data, 128 filter_dims, filter_offset, bias_data, bias_dims, stride, 129 stride, pad_width, pad_height, depth_multiplier, 130 output_offset, output_multiplier, output_shift, 131 output_activation_min, output_activation_max, output_data, 132 output_dims); 133} 134 135} // end namespace reference_ops 136} // end namespace tflite 137 138#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ 139