1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16#include <unistd.h> 17#include <cassert> 18#include <cmath> 19#include <cstdio> 20#include <cstdlib> 21#include <iostream> 22#include <limits> 23 24#include "tensorflow/contrib/lite/builtin_op_data.h" 25#include "tensorflow/contrib/lite/context.h" 26#include "tensorflow/contrib/lite/kernels/activation_functor.h" 27#include "tensorflow/contrib/lite/kernels/gemm_support.h" 28#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" 29#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" 30#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" 31#include "tensorflow/contrib/lite/kernels/internal/tensor.h" 32#include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" 33#include "tensorflow/contrib/lite/kernels/kernel_util.h" 34#include "tensorflow/contrib/lite/kernels/op_macros.h" 35 36namespace tflite { 37namespace ops { 38namespace builtin { 39namespace fully_connected { 40 41// This file has four implementations of FullyConnected 42enum KernelType { 43 kReference, 44 kGenericOptimized, // Neon-free 45 kNeonOptimized, 46 kPie, // Used by the PIE team 47}; 48 49struct OpData { 50 // The scaling factor from input to output (aka the 'real multiplier') can 51 // be represented as a fixed point multipler plus a left shift. 52 int32_t output_multiplier; 53 int output_shift; 54 // The range of the fused activation layer. For example for kNone and 55 // uint8_t these would be 0 and 255. 56 int32_t output_activation_min; 57 int32_t output_activation_max; 58}; 59 60constexpr int kInputTensor = 0; 61constexpr int kWeightsTensor = 1; 62constexpr int kBiasTensor = 2; 63constexpr int kOutputTensor = 0; 64 65void* Init(TfLiteContext* context, const char* buffer, size_t length) { 66 // This is a builtin op, so we don't use the contents in 'buffer', if any. 67 // Instead, we allocate a new object to carry information from Prepare() to 68 // Eval(). 69 gemm_support::IncrementUsageCounter(context); 70 return new OpData; 71} 72 73void Free(TfLiteContext* context, void* buffer) { 74 gemm_support::DecrementUsageCounter(context); 75 delete reinterpret_cast<OpData*>(buffer); 76} 77 78TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { 79 auto* params = 80 reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data); 81 OpData* data = reinterpret_cast<OpData*>(node->user_data); 82 83 // Check we have all the inputs and outputs we need. 84 TF_LITE_ENSURE_EQ(context, node->inputs->size, 3); 85 TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); 86 87 TfLiteTensor* input = GetInput(context, node, kInputTensor); 88 TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); 89 TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); 90 TfLiteTensor* output = GetOutput(context, node, kOutputTensor); 91 92 // Check all the parameters of tensor match within themselves and match the 93 // input configuration. 94 int input_size = 1; 95 for (int i = 0; i < input->dims->size; i++) { 96 input_size *= input->dims->data[i]; 97 } 98 99 const int batch_size = input_size / filter->dims->data[1]; 100 const int num_units = filter->dims->data[0]; 101 102 TF_LITE_ASSERT_EQ(input_size, batch_size * filter->dims->data[1]); 103 if (bias) { 104 TF_LITE_ASSERT_EQ(bias->dims->data[0], num_units); 105 } 106 107 TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 2); 108 TF_LITE_ENSURE_EQ(context, NumDimensions(bias), 1); 109 110 // Note that quantized inference requires that all tensors have their 111 // parameters set. This is usually done during quantized training. 112 TfLiteType data_type = input->type; 113 if (data_type != kTfLiteFloat32) { 114 double real_multiplier = 0.0; 115 TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( 116 context, input, filter, bias, output, &real_multiplier)); 117 QuantizeMultiplierSmallerThanOne(real_multiplier, &data->output_multiplier, 118 &data->output_shift); 119 CalculateActivationRangeUint8(params->activation, output, 120 &data->output_activation_min, 121 &data->output_activation_max); 122 } 123 124 // Resize output. 125 TfLiteIntArray* output_size_array = TfLiteIntArrayCreate(2); 126 output_size_array->data[0] = batch_size; 127 output_size_array->data[1] = num_units; 128 TF_LITE_ENSURE_OK(context, 129 context->ResizeTensor(context, output, output_size_array)); 130 return kTfLiteOk; 131} 132 133TfLiteStatus EvalPie(TfLiteContext* context, TfLiteNode* node, 134 TfLiteFullyConnectedParams* params, OpData* data, 135 TfLiteTensor* input, TfLiteTensor* filter, 136 TfLiteTensor* bias, TfLiteTensor* output) { 137 int total_input_size = 1; 138 for (int i = 0; i < input->dims->size; i++) { 139 total_input_size *= input->dims->data[i]; 140 } 141 142 int input_size = filter->dims->data[1]; 143 const int batch_size = total_input_size / filter->dims->data[1]; 144 const int num_units = filter->dims->data[0]; 145 146 // Output = bias if bias tensor exists. 147 if (bias) { 148 tensor_utils::VectorBatchVectorAssign(bias->data.f, num_units, batch_size, 149 output->data.f); 150 } else { 151 tensor_utils::ZeroVector(output->data.f, batch_size * num_units); 152 } 153 154 // Compute output += weight * input 155 tensor_utils::MatrixBatchVectorMultiplyAccumulate( 156 filter->data.f, num_units, input_size, input->data.f, batch_size, 157 output->data.f, /*result_stride=*/1); 158 159 // Apply activation function 160 tensor_utils::ApplyActivationToVector(output->data.f, batch_size * num_units, 161 params->activation, output->data.f); 162 163 return kTfLiteOk; 164} 165 166#define TF_LITE_MACRO_DISPATCH(macro_name, params, target_namespace) \ 167 if (params->activation == kTfLiteActNone) { \ 168 macro_name(target_namespace, kNone); \ 169 } \ 170 if (params->activation == kTfLiteActRelu) { \ 171 macro_name(target_namespace, kRelu); \ 172 } \ 173 if (params->activation == kTfLiteActRelu6) { \ 174 macro_name(target_namespace, kRelu6); \ 175 } 176 177template <KernelType kernel_type> 178TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, 179 TfLiteFullyConnectedParams* params, OpData* data, 180 TfLiteTensor* input, TfLiteTensor* filter, 181 TfLiteTensor* bias, TfLiteTensor* output) { 182 gemmlowp::GemmContext* gemm_context = gemm_support::GetFromContext(context); 183 184 int32_t input_offset = -input->params.zero_point; 185 int32_t filter_offset = -filter->params.zero_point; 186 int32_t output_offset = output->params.zero_point; 187#define TF_LITE_FULLY_CONNECTED(type) \ 188 type::FullyConnected( \ 189 GetTensorData<uint8_t>(input), GetTensorDims(input), input_offset, \ 190 GetTensorData<uint8_t>(filter), GetTensorDims(filter), filter_offset, \ 191 GetTensorData<int32_t>(bias), GetTensorDims(bias), output_offset, \ 192 data->output_multiplier, data->output_shift, \ 193 data->output_activation_min, data->output_activation_max, \ 194 GetTensorData<uint8_t>(output), GetTensorDims(output), gemm_context) 195 if (kernel_type == kReference) { 196 TF_LITE_FULLY_CONNECTED(reference_ops); 197 } else if (kernel_type == kPie) { 198 // TODO(ahentz): we don't have a quantized version of the PIE kernels, so 199 // we just defer to the MINI ones. 200 TF_LITE_FULLY_CONNECTED(optimized_ops); 201 } else { 202 TF_LITE_FULLY_CONNECTED(optimized_ops); 203 } 204#undef TF_LITE_FULLY_CONNECTED 205 206 return kTfLiteOk; 207} 208 209template <KernelType kernel_type> 210TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, 211 TfLiteFullyConnectedParams* params, OpData* data, 212 TfLiteTensor* input, TfLiteTensor* filter, 213 TfLiteTensor* bias, TfLiteTensor* output) { 214 float output_activation_min, output_activation_max; 215 CalculateActivationRangeFloat(params->activation, &output_activation_min, 216 &output_activation_max); 217#define TF_LITE_FULLY_CONNECTED(type) \ 218 type::FullyConnected(GetTensorData<float>(input), GetTensorDims(input), \ 219 GetTensorData<float>(filter), GetTensorDims(filter), \ 220 GetTensorData<float>(bias), GetTensorDims(bias), \ 221 output_activation_min, output_activation_max, \ 222 GetTensorData<float>(output), GetTensorDims(output)) 223 if (kernel_type == kReference) { 224 TF_LITE_FULLY_CONNECTED(reference_ops); 225 } else if (kernel_type == kPie) { 226 return EvalPie(context, node, params, data, input, filter, bias, output); 227 } else { 228 TF_LITE_FULLY_CONNECTED(optimized_ops); 229 } 230#undef TF_LITE_FULLY_CONNECTED 231 232 return kTfLiteOk; 233} 234 235#undef TF_LITE_MACRO_DISPATCH 236 237template <KernelType kernel_type> 238TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { 239 auto* params = 240 reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data); 241 OpData* data = reinterpret_cast<OpData*>(node->user_data); 242 243 TfLiteTensor* input = GetInput(context, node, kInputTensor); 244 TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); 245 TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); 246 TfLiteTensor* output = GetOutput(context, node, kOutputTensor); 247 248 switch (input->type) { // Already know in/out types are same. 249 case kTfLiteFloat32: 250 return EvalFloat<kernel_type>(context, node, params, data, input, filter, 251 bias, output); 252 case kTfLiteUInt8: 253 return EvalQuantized<kernel_type>(context, node, params, data, input, 254 filter, bias, output); 255 default: 256 context->ReportError(context, "Type not currently supported."); 257 return kTfLiteError; 258 } 259 return kTfLiteOk; 260} 261 262} // namespace fully_connected 263 264TfLiteRegistration* Register_FULLY_CONNECTED_REF() { 265 static TfLiteRegistration r = { 266 fully_connected::Init, fully_connected::Free, fully_connected::Prepare, 267 fully_connected::Eval<fully_connected::kReference>}; 268 return &r; 269} 270 271TfLiteRegistration* Register_FULLY_CONNECTED_NEON_OPT() { 272 static TfLiteRegistration r = { 273 fully_connected::Init, fully_connected::Free, fully_connected::Prepare, 274 fully_connected::Eval<fully_connected::kNeonOptimized>}; 275 return &r; 276} 277 278TfLiteRegistration* Register_FULLY_CONNECTED_GENERIC_OPT() { 279 static TfLiteRegistration r = { 280 fully_connected::Init, fully_connected::Free, fully_connected::Prepare, 281 fully_connected::Eval<fully_connected::kGenericOptimized>}; 282 return &r; 283} 284 285TfLiteRegistration* Register_FULLY_CONNECTED_PIE() { 286 static TfLiteRegistration r = {fully_connected::Init, fully_connected::Free, 287 fully_connected::Prepare, 288 fully_connected::Eval<fully_connected::kPie>}; 289 return &r; 290} 291 292TfLiteRegistration* Register_FULLY_CONNECTED() { 293 // TODO(ahentz): We don't have a dedicated quantized version of the PIE 294 // kernel. For now, the quantized version just defer to the corresponding 295 // optimized MINI kernel. At some point we will allow different libraries to 296 // be built with different kernels, but for now we have to pick one here. 297 return Register_FULLY_CONNECTED_PIE(); 298#ifdef USE_NEON 299 return Register_FULLY_CONNECTED_NEON_OPT(); 300#else 301 return Register_FULLY_CONNECTED_GENERIC_OPT(); 302#endif 303} 304 305} // namespace builtin 306} // namespace ops 307} // namespace tflite 308