1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16// LSH Projection projects an input to a bit vector via locality senstive 17// hashing. 18// 19// Options: 20// Sparse: 21// Computed bit vector is considered to be sparse. 22// Each output element is an int32 made up by multiple bits computed from 23// hash functions. 24// 25// Dense: 26// Computed bit vector is considered to be dense. Each output element is 27// either 0 or 1 that represents a bit. 28// 29// Input: 30// Tensor[0]: Hash functions. Dim.size == 2, DataType: Float. 31// Tensor[0].Dim[0]: Num of hash functions. 32// Tensor[0].Dim[1]: Num of projected output bits generated by 33// each hash function. 34// In sparse case, Tensor[0].Dim[1] + ceil( log2(Tensor[0].Dim[0] )) <= 32. 35// 36// Tensor[1]: Input. Dim.size >= 1, No restriction on DataType. 37// Tensor[2]: Optional, Weight. Dim.size == 1, DataType: Float. 38// If not set, each element of input is considered to have same 39// weight of 1.0 Tensor[1].Dim[0] == Tensor[2].Dim[0] 40// 41// Output: 42// Sparse: 43// Output.Dim == { Tensor[0].Dim[0] } 44// A tensor of int32 that represents hash signatures, 45// 46// NOTE: To avoid collisions across hash functions, an offset value of 47// k * (1 << Tensor[0].Dim[1]) will be added to each signature, 48// k is the index of the hash function. 49// Dense: 50// Output.Dim == { Tensor[0].Dim[0] * Tensor[0].Dim[1] } 51// A flattened tensor represents projected bit vectors. 52 53#include <unistd.h> 54#include <cassert> 55#include <cmath> 56#include <cstdio> 57#include <cstdlib> 58#include <cstring> 59#include <iostream> 60#include <limits> 61#include <memory> 62 63#include "tensorflow/contrib/lite/builtin_op_data.h" 64#include "tensorflow/contrib/lite/context.h" 65#include "tensorflow/contrib/lite/kernels/kernel_util.h" 66#include "tensorflow/contrib/lite/kernels/op_macros.h" 67#include "util/hash/farmhash.h" 68 69namespace tflite { 70namespace ops { 71namespace builtin { 72namespace lsh_projection { 73 74TfLiteStatus Resize(TfLiteContext* context, TfLiteNode* node) { 75 auto* params = 76 reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data); 77 TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3); 78 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); 79 80 TfLiteTensor* hash = GetInput(context, node, 0); 81 TF_LITE_ENSURE_EQ(context, NumDimensions(hash), 2); 82 // Support up to 32 bits. 83 TF_LITE_ENSURE(context, SizeOfDimension(hash, 1) <= 32); 84 85 TfLiteTensor* input = GetInput(context, node, 1); 86 TF_LITE_ENSURE(context, NumDimensions(input) >= 1); 87 88 if (NumInputs(node) == 3) { 89 TfLiteTensor* weight = GetInput(context, node, 2); 90 TF_LITE_ENSURE_EQ(context, NumDimensions(weight), 1); 91 TF_LITE_ENSURE_EQ(context, SizeOfDimension(weight, 0), 92 SizeOfDimension(input, 0)); 93 } 94 95 TfLiteTensor* output = GetOutput(context, node, 0); 96 TfLiteIntArray* outputSize = TfLiteIntArrayCreate(1); 97 switch (params->type) { 98 case kTfLiteLshProjectionSparse: 99 outputSize->data[0] = SizeOfDimension(hash, 0); 100 break; 101 case kTfLiteLshProjectionDense: 102 outputSize->data[0] = SizeOfDimension(hash, 0) * SizeOfDimension(hash, 1); 103 break; 104 default: 105 return kTfLiteError; 106 } 107 return context->ResizeTensor(context, output, outputSize); 108} 109 110// Compute sign bit of dot product of hash(seed, input) and weight. 111// NOTE: use float as seed, and convert it to double as a temporary solution 112// to match the trained model. This is going to be changed once the new 113// model is trained in an optimized method. 114// 115int RunningSignBit(const TfLiteTensor* input, const TfLiteTensor* weight, 116 float seed) { 117 double score = 0.0; 118 int input_item_bytes = input->bytes / SizeOfDimension(input, 0); 119 char* input_ptr = input->data.raw; 120 121 const size_t seed_size = sizeof(float); 122 const size_t key_bytes = sizeof(float) + input_item_bytes; 123 std::unique_ptr<char[]> key(new char[key_bytes]); 124 125 for (int i = 0; i < SizeOfDimension(input, 0); ++i) { 126 // Create running hash id and value for current dimension. 127 memcpy(key.get(), &seed, seed_size); 128 memcpy(key.get() + seed_size, input_ptr, input_item_bytes); 129 130 int64_t hash_signature = farmhash::Fingerprint64(key.get(), key_bytes); 131 double running_value = static_cast<double>(hash_signature); 132 input_ptr += input_item_bytes; 133 if (weight == nullptr) { 134 score += running_value; 135 } else { 136 score += weight->data.f[i] * running_value; 137 } 138 } 139 140 return (score > 0) ? 1 : 0; 141} 142 143void SparseLshProjection(const TfLiteTensor* hash, const TfLiteTensor* input, 144 const TfLiteTensor* weight, int32_t* out_buf) { 145 int num_hash = SizeOfDimension(hash, 0); 146 int num_bits = SizeOfDimension(hash, 1); 147 for (int i = 0; i < num_hash; i++) { 148 int32_t hash_signature = 0; 149 for (int j = 0; j < num_bits; j++) { 150 float seed = hash->data.f[i * num_bits + j]; 151 int bit = RunningSignBit(input, weight, seed); 152 hash_signature = (hash_signature << 1) | bit; 153 } 154 *out_buf++ = hash_signature + i * (1 << num_bits); 155 } 156} 157 158void DenseLshProjection(const TfLiteTensor* hash, const TfLiteTensor* input, 159 const TfLiteTensor* weight, int32_t* out_buf) { 160 int num_hash = SizeOfDimension(hash, 0); 161 int num_bits = SizeOfDimension(hash, 1); 162 for (int i = 0; i < num_hash; i++) { 163 for (int j = 0; j < num_bits; j++) { 164 float seed = hash->data.f[i * num_bits + j]; 165 int bit = RunningSignBit(input, weight, seed); 166 *out_buf++ = bit; 167 } 168 } 169} 170 171TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { 172 auto* params = 173 reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data); 174 175 int32_t* out_buf = GetOutput(context, node, 0)->data.i32; 176 TfLiteTensor* hash = GetInput(context, node, 0); 177 TfLiteTensor* input = GetInput(context, node, 1); 178 TfLiteTensor* weight = 179 NumInputs(node) == 2 ? nullptr : GetInput(context, node, 2); 180 181 switch (params->type) { 182 case kTfLiteLshProjectionDense: 183 DenseLshProjection(hash, input, weight, out_buf); 184 break; 185 case kTfLiteLshProjectionSparse: 186 SparseLshProjection(hash, input, weight, out_buf); 187 break; 188 default: 189 return kTfLiteError; 190 } 191 192 return kTfLiteOk; 193} 194} // namespace lsh_projection 195 196TfLiteRegistration* Register_LSH_PROJECTION() { 197 static TfLiteRegistration r = {nullptr, nullptr, lsh_projection::Resize, 198 lsh_projection::Eval}; 199 return &r; 200} 201 202} // namespace builtin 203} // namespace ops 204} // namespace tflite 205