1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15#include <unistd.h>
16#include <cassert>
17#include <cmath>
18#include <cstdio>
19#include <cstdlib>
20#include <iostream>
21#include <limits>
22
23#include "tensorflow/contrib/lite/builtin_op_data.h"
24#include "tensorflow/contrib/lite/context.h"
25#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
26#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
27#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
28#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
29#include "tensorflow/contrib/lite/kernels/kernel_util.h"
30#include "tensorflow/contrib/lite/kernels/op_macros.h"
31
32namespace tflite {
33namespace ops {
34namespace builtin {
35namespace activations {
36
37struct OpData {
38  int32_t input_multiplier = 0;
39  int input_left_shift = 0;
40  int32_t input_range_radius = 0;
41  int diff_min = 0;
42};
43
44void* Init(TfLiteContext* context, const char* buffer, size_t length) {
45  // This is a builtin op, so we don't use the contents in 'buffer', if any.
46  // Instead, we allocate a new object to carry information from Prepare() to
47  // Eval().
48  return new OpData;
49}
50
51void Free(TfLiteContext* context, void* buffer) {
52  delete reinterpret_cast<OpData*>(buffer);
53}
54
55TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
56  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
57  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
58  TfLiteTensor* input = GetInput(context, node, 0);
59  TfLiteTensor* output = GetOutput(context, node, 0);
60  TF_LITE_ENSURE_EQ(context, input->type, output->type);
61
62  return context->ResizeTensor(context, output,
63                               TfLiteIntArrayCopy(input->dims));
64}
65
66TfLiteStatus SigmoidPrepare(TfLiteContext* context, TfLiteNode* node) {
67  OpData* data = reinterpret_cast<OpData*>(node->user_data);
68
69  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
70  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
71  TfLiteTensor* input = GetInput(context, node, 0);
72  TfLiteTensor* output = GetOutput(context, node, 0);
73  TF_LITE_ENSURE_EQ(context, input->type, output->type);
74
75  if (input->type == kTfLiteUInt8) {
76    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
77    TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
78
79    static constexpr int kInputIntegerBits = 4;
80
81    const double input_real_multiplier =
82        input->params.scale *
83        static_cast<double>(1 << (31 - kInputIntegerBits));
84
85    QuantizeMultiplierGreaterThanOne(input_real_multiplier,
86                                     &data->input_multiplier,
87                                     &data->input_left_shift);
88    data->input_range_radius =
89        CalculateInputRadius(kInputIntegerBits, data->input_left_shift);
90  }
91
92  return context->ResizeTensor(context, output,
93                               TfLiteIntArrayCopy(input->dims));
94}
95
96TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
97  auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
98  OpData* data = reinterpret_cast<OpData*>(node->user_data);
99
100  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
101  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
102  TfLiteTensor* input = GetInput(context, node, 0);
103  TfLiteTensor* output = GetOutput(context, node, 0);
104  TF_LITE_ENSURE_EQ(context, input->type, output->type);
105
106  TF_LITE_ENSURE(context,
107                 NumDimensions(input) == 2 || NumDimensions(input) == 4);
108
109  if (input->type == kTfLiteUInt8) {
110    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
111    TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
112
113    static const int kScaledDiffIntegerBits = 5;
114
115    tflite::PreprocessSoftmaxScaling(
116        params->beta, input->params.scale, kScaledDiffIntegerBits,
117        &data->input_multiplier, &data->input_left_shift);
118    data->diff_min = -1.0 * tflite::CalculateInputRadius(
119                                kScaledDiffIntegerBits, data->input_left_shift);
120  }
121
122  return context->ResizeTensor(context, output,
123                               TfLiteIntArrayCopy(input->dims));
124}
125
126TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
127  TfLiteTensor* input = GetInput(context, node, 0);
128  TfLiteTensor* output = GetOutput(context, node, 0);
129  switch (input->type) {
130    case kTfLiteFloat32: {
131      size_t elements = input->bytes / sizeof(float);
132      float* in = input->data.f;
133      float* in_end = in + elements;
134      float* out = output->data.f;
135      for (; in < in_end; in++, out++) *out = std::max(0.f, *in);
136      return kTfLiteOk;
137    } break;
138    default:
139      context->ReportError(context, "Only float32 supported currently.");
140      return kTfLiteError;
141  }
142}
143
144TfLiteStatus Relu1Eval(TfLiteContext* context, TfLiteNode* node) {
145  TfLiteTensor* input = GetInput(context, node, 0);
146  TfLiteTensor* output = GetOutput(context, node, 0);
147  switch (input->type) {
148    case kTfLiteFloat32: {
149      size_t elements = input->bytes / sizeof(float);
150      float* in = input->data.f;
151      float* in_end = in + elements;
152      float* out = output->data.f;
153      for (; in < in_end; in++, out++) {
154        *out = std::min(std::max(-1.f, *in), 1.f);
155      }
156      return kTfLiteOk;
157    } break;
158    default:
159      context->ReportError(context, "Only float32 supported currently.");
160      return kTfLiteError;
161  }
162}
163
164TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
165  TfLiteTensor* input = GetInput(context, node, 0);
166  TfLiteTensor* output = GetOutput(context, node, 0);
167  switch (input->type) {
168    case kTfLiteFloat32: {
169      size_t elements = input->bytes / sizeof(float);
170      float* in = input->data.f;
171      float* in_end = in + elements;
172      float* out = output->data.f;
173      for (; in < in_end; in++, out++) *out = std::min(std::max(0.f, *in), 6.f);
174      return kTfLiteOk;
175    } break;
176    default:
177      context->ReportError(context, "Only float32 supported currently.");
178      return kTfLiteError;
179  }
180}
181
182TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
183  TfLiteTensor* input = GetInput(context, node, 0);
184  TfLiteTensor* output = GetOutput(context, node, 0);
185  switch (input->type) {
186    case kTfLiteFloat32: {
187      size_t elements = input->bytes / sizeof(float);
188      float* in = input->data.f;
189      float* in_end = in + elements;
190      float* out = output->data.f;
191      for (; in < in_end; in++, out++) *out = std::tanh(*in);
192      return kTfLiteOk;
193    } break;
194    default:
195      context->ReportError(context, "Only float32 supported currently.");
196      return kTfLiteError;
197  }
198}
199
200// Sigmoid is also know as "Logistic".
201TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
202  OpData* data = reinterpret_cast<OpData*>(node->user_data);
203
204  TfLiteTensor* input = GetInput(context, node, 0);
205  TfLiteTensor* output = GetOutput(context, node, 0);
206  switch (input->type) {
207    case kTfLiteFloat32: {
208      size_t elements = input->bytes / sizeof(float);
209      float* in = input->data.f;
210      float* in_end = in + elements;
211      float* out = output->data.f;
212      for (; in < in_end; in++, out++) *out = 1.f / (1.f + std::exp(-*in));
213      break;
214    }
215    case kTfLiteUInt8: {
216      optimized_ops::Logistic(
217          GetTensorData<uint8_t>(input), GetTensorDims(input),
218          input->params.zero_point, data->input_range_radius,
219          data->input_multiplier, data->input_left_shift,
220          GetTensorData<uint8_t>(output), GetTensorDims(output));
221      break;
222    }
223    default:
224      context->ReportError(context, "Only float32 supported currently.");
225      return kTfLiteError;
226  }
227  return kTfLiteOk;
228}
229
230// Takes a 2D tensor and perform softmax along the second dimension.
231void Softmax2DFloat(TfLiteTensor* input, TfLiteTensor* output,
232                    TfLiteSoftmaxParams* params) {
233  const int batch_size = input->dims->data[0];
234  const int input_size = input->dims->data[1];
235  float* in = input->data.f;
236  float* out = output->data.f;
237  TF_LITE_ASSERT(input_size > 0);
238
239  // For each batch
240  for (int b = 0; b < batch_size; b++) {
241    // Find the max coeff.
242    float max_coeff = in[0];
243    for (int i = 1; i < input_size; i++) {
244      if (in[i] > max_coeff) max_coeff = in[i];
245    }
246
247    // Compute the normalized sum of exps.
248    float exp_sum = 0.0;
249    for (int i = 0; i < input_size; i++) {
250      out[i] = std::exp((in[i] - max_coeff) * params->beta);
251      exp_sum += out[i];
252    }
253
254    // Divide by the sum of exps.
255    float reciprocal_sum_exp = 1.f / exp_sum;
256    for (int i = 0; i < input_size; i++) {
257      out[i] *= reciprocal_sum_exp;
258    }
259
260    // Advance in and out pointers for the next batch.
261    in += input_size;
262    out += input_size;
263  }
264}
265
266void Softmax2DQuantized(TfLiteTensor* input, TfLiteTensor* output,
267                        TfLiteSoftmaxParams* params, OpData* data) {
268  // TODO(ahentz): this is arguably a dirty trick. Since the implementation
269  // always traverses the last dimension of a 4D tensor, we will pretend our 2D
270  // tensor is 4D in a special way. We will convert a (X, Y) shape into a (X,
271  // 1, 1, Y) shape.
272  const int batch_size = input->dims->data[0];
273  const int input_size = input->dims->data[1];
274  optimized_ops::Softmax(GetTensorData<uint8_t>(input),
275                         GetTensorDims({batch_size, 1, 1, input_size}),
276                         data->input_multiplier, data->input_left_shift,
277                         data->diff_min, GetTensorData<uint8_t>(output),
278                         GetTensorDims({batch_size, 1, 1, input_size}));
279}
280
281// Takes a 4D tensor and perform softmax along the forth dimension.
282void Softmax4DFloat(TfLiteTensor* input, TfLiteTensor* output,
283                    TfLiteSoftmaxParams* params) {
284  optimized_ops::Softmax(GetTensorData<float>(input), GetTensorDims(input),
285                         params->beta, GetTensorData<float>(output),
286                         GetTensorDims(output));
287}
288
289void Softmax4DQuantized(TfLiteTensor* input, TfLiteTensor* output,
290                        TfLiteSoftmaxParams* params, OpData* data) {
291  optimized_ops::Softmax(GetTensorData<uint8_t>(input), GetTensorDims(input),
292                         data->input_multiplier, data->input_left_shift,
293                         data->diff_min, GetTensorData<uint8_t>(output),
294                         GetTensorDims(output));
295}
296
297TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
298  auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
299  OpData* data = reinterpret_cast<OpData*>(node->user_data);
300
301  TfLiteTensor* input = GetInput(context, node, 0);
302  TfLiteTensor* output = GetOutput(context, node, 0);
303
304  // TODO(ahentz): consider an implementation that works for many (all?)
305  // dimensions.
306  switch (input->type) {
307    case kTfLiteFloat32: {
308      if (NumDimensions(input) == 2) {
309        Softmax2DFloat(input, output, params);
310        return kTfLiteOk;
311      }
312      if (NumDimensions(input) == 4) {
313        Softmax4DFloat(input, output, params);
314        return kTfLiteOk;
315      }
316      context->ReportError(context,
317                           "Only 2D and 4D tensors supported currently.");
318      return kTfLiteError;
319    }
320    case kTfLiteUInt8: {
321      if (NumDimensions(input) == 2) {
322        Softmax2DQuantized(input, output, params, data);
323        return kTfLiteOk;
324      }
325      if (NumDimensions(input) == 4) {
326        Softmax4DQuantized(input, output, params, data);
327        return kTfLiteOk;
328      }
329      context->ReportError(context,
330                           "Only 2D and 4D tensors supported currently.");
331      return kTfLiteError;
332    }
333    default:
334      context->ReportError(context,
335                           "Only float32 and uint8_t supported currently.");
336      return kTfLiteError;
337  }
338}
339
340}  // namespace activations
341
342TfLiteRegistration* Register_RELU() {
343  static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
344                                 activations::GenericPrepare,
345                                 activations::ReluEval};
346  return &r;
347}
348
349TfLiteRegistration* Register_RELU_N1_TO_1() {
350  static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
351                                 activations::GenericPrepare,
352                                 activations::Relu1Eval};
353  return &r;
354}
355
356TfLiteRegistration* Register_RELU6() {
357  static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
358                                 activations::GenericPrepare,
359                                 activations::Relu6Eval};
360  return &r;
361}
362
363TfLiteRegistration* Register_TANH() {
364  static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
365                                 activations::GenericPrepare,
366                                 activations::TanhEval};
367  return &r;
368}
369
370TfLiteRegistration* Register_LOGISTIC() {
371  static TfLiteRegistration r = {activations::Init, activations::Free,
372                                 activations::SigmoidPrepare,
373                                 activations::SigmoidEval};
374  return &r;
375}
376
377TfLiteRegistration* Register_SOFTMAX() {
378  static TfLiteRegistration r = {activations::Init, activations::Free,
379                                 activations::SoftmaxPrepare,
380                                 activations::SoftmaxEval};
381  return &r;
382}
383
384}  // namespace builtin
385}  // namespace ops
386}  // namespace tflite
387