1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// LSH Projection projects an input to a bit vector via locality senstive
17// hashing.
18//
19// Options:
20//   Sparse:
21//     Computed bit vector is considered to be sparse.
22//     Each output element is an int32 made up by multiple bits computed from
23// hash functions.
24//
25//   Dense:
26//     Computed bit vector is considered to be dense. Each output element is
27// either 0 or 1 that represents a bit.
28//
29// Input:
30//   Tensor[0]: Hash functions. Dim.size == 2, DataType: Float.
31//              Tensor[0].Dim[0]: Num of hash functions.
32//              Tensor[0].Dim[1]: Num of projected output bits generated by
33//                                each hash function.
34//   In sparse case, Tensor[0].Dim[1] + ceil( log2(Tensor[0].Dim[0] )) <= 32.
35//
36//   Tensor[1]: Input. Dim.size >= 1, No restriction on DataType.
37//   Tensor[2]: Optional, Weight. Dim.size == 1, DataType: Float.
38//              If not set, each element of input is considered to have same
39// weight of 1.0 Tensor[1].Dim[0] == Tensor[2].Dim[0]
40//
41// Output:
42//   Sparse:
43//     Output.Dim == { Tensor[0].Dim[0] }
44//     A tensor of int32 that represents hash signatures,
45//
46//     NOTE: To avoid collisions across hash functions, an offset value of
47//     k * (1 << Tensor[0].Dim[1]) will be added to each signature,
48//     k is the index of the hash function.
49//   Dense:
50//     Output.Dim == { Tensor[0].Dim[0] * Tensor[0].Dim[1] }
51//     A flattened tensor represents projected bit vectors.
52
53#include <unistd.h>
54#include <cassert>
55#include <cmath>
56#include <cstdio>
57#include <cstdlib>
58#include <cstring>
59#include <iostream>
60#include <limits>
61#include <memory>
62
63#include "tensorflow/contrib/lite/builtin_op_data.h"
64#include "tensorflow/contrib/lite/context.h"
65#include "tensorflow/contrib/lite/kernels/kernel_util.h"
66#include "tensorflow/contrib/lite/kernels/op_macros.h"
67#include "util/hash/farmhash.h"
68
69namespace tflite {
70namespace ops {
71namespace builtin {
72namespace lsh_projection {
73
74TfLiteStatus Resize(TfLiteContext* context, TfLiteNode* node) {
75  auto* params =
76      reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data);
77  TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
78  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
79
80  TfLiteTensor* hash = GetInput(context, node, 0);
81  TF_LITE_ENSURE_EQ(context, NumDimensions(hash), 2);
82  // Support up to 32 bits.
83  TF_LITE_ENSURE(context, SizeOfDimension(hash, 1) <= 32);
84
85  TfLiteTensor* input = GetInput(context, node, 1);
86  TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
87
88  if (NumInputs(node) == 3) {
89    TfLiteTensor* weight = GetInput(context, node, 2);
90    TF_LITE_ENSURE_EQ(context, NumDimensions(weight), 1);
91    TF_LITE_ENSURE_EQ(context, SizeOfDimension(weight, 0),
92                      SizeOfDimension(input, 0));
93  }
94
95  TfLiteTensor* output = GetOutput(context, node, 0);
96  TfLiteIntArray* outputSize = TfLiteIntArrayCreate(1);
97  switch (params->type) {
98    case kTfLiteLshProjectionSparse:
99      outputSize->data[0] = SizeOfDimension(hash, 0);
100      break;
101    case kTfLiteLshProjectionDense:
102      outputSize->data[0] = SizeOfDimension(hash, 0) * SizeOfDimension(hash, 1);
103      break;
104    default:
105      return kTfLiteError;
106  }
107  return context->ResizeTensor(context, output, outputSize);
108}
109
110// Compute sign bit of dot product of hash(seed, input) and weight.
111// NOTE: use float as seed, and convert it to double as a temporary solution
112//       to match the trained model. This is going to be changed once the new
113//       model is trained in an optimized method.
114//
115int RunningSignBit(const TfLiteTensor* input, const TfLiteTensor* weight,
116                   float seed) {
117  double score = 0.0;
118  int input_item_bytes = input->bytes / SizeOfDimension(input, 0);
119  char* input_ptr = input->data.raw;
120
121  const size_t seed_size = sizeof(float);
122  const size_t key_bytes = sizeof(float) + input_item_bytes;
123  std::unique_ptr<char[]> key(new char[key_bytes]);
124
125  for (int i = 0; i < SizeOfDimension(input, 0); ++i) {
126    // Create running hash id and value for current dimension.
127    memcpy(key.get(), &seed, seed_size);
128    memcpy(key.get() + seed_size, input_ptr, input_item_bytes);
129
130    int64_t hash_signature = farmhash::Fingerprint64(key.get(), key_bytes);
131    double running_value = static_cast<double>(hash_signature);
132    input_ptr += input_item_bytes;
133    if (weight == nullptr) {
134      score += running_value;
135    } else {
136      score += weight->data.f[i] * running_value;
137    }
138  }
139
140  return (score > 0) ? 1 : 0;
141}
142
143void SparseLshProjection(const TfLiteTensor* hash, const TfLiteTensor* input,
144                         const TfLiteTensor* weight, int32_t* out_buf) {
145  int num_hash = SizeOfDimension(hash, 0);
146  int num_bits = SizeOfDimension(hash, 1);
147  for (int i = 0; i < num_hash; i++) {
148    int32_t hash_signature = 0;
149    for (int j = 0; j < num_bits; j++) {
150      float seed = hash->data.f[i * num_bits + j];
151      int bit = RunningSignBit(input, weight, seed);
152      hash_signature = (hash_signature << 1) | bit;
153    }
154    *out_buf++ = hash_signature + i * (1 << num_bits);
155  }
156}
157
158void DenseLshProjection(const TfLiteTensor* hash, const TfLiteTensor* input,
159                        const TfLiteTensor* weight, int32_t* out_buf) {
160  int num_hash = SizeOfDimension(hash, 0);
161  int num_bits = SizeOfDimension(hash, 1);
162  for (int i = 0; i < num_hash; i++) {
163    for (int j = 0; j < num_bits; j++) {
164      float seed = hash->data.f[i * num_bits + j];
165      int bit = RunningSignBit(input, weight, seed);
166      *out_buf++ = bit;
167    }
168  }
169}
170
171TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
172  auto* params =
173      reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data);
174
175  int32_t* out_buf = GetOutput(context, node, 0)->data.i32;
176  TfLiteTensor* hash = GetInput(context, node, 0);
177  TfLiteTensor* input = GetInput(context, node, 1);
178  TfLiteTensor* weight =
179      NumInputs(node) == 2 ? nullptr : GetInput(context, node, 2);
180
181  switch (params->type) {
182    case kTfLiteLshProjectionDense:
183      DenseLshProjection(hash, input, weight, out_buf);
184      break;
185    case kTfLiteLshProjectionSparse:
186      SparseLshProjection(hash, input, weight, out_buf);
187      break;
188    default:
189      return kTfLiteError;
190  }
191
192  return kTfLiteOk;
193}
194}  // namespace lsh_projection
195
196TfLiteRegistration* Register_LSH_PROJECTION() {
197  static TfLiteRegistration r = {nullptr, nullptr, lsh_projection::Resize,
198                                 lsh_projection::Eval};
199  return &r;
200}
201
202}  // namespace builtin
203}  // namespace ops
204}  // namespace tflite
205