example_proto_fast_parsing.cc revision c27d1561bd89f1062e4cbb19262905e609daef80
1db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 3db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerLicensed under the Apache License, Version 2.0 (the "License"); 4db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFloweryou may not use this file except in compliance with the License. 5db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerYou may obtain a copy of the License at 6db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 7db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower http://www.apache.org/licenses/LICENSE-2.0 8db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 9db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerUnless required by applicable law or agreed to in writing, software 10db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerdistributed under the License is distributed on an "AS IS" BASIS, 11db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerSee the License for the specific language governing permissions and 13db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerlimitations under the License. 14db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower==============================================================================*/ 15db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/example_proto_fast_parsing.h" 16db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 17db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include <vector> 18db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 19db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/example/example.pb.h" 20db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/example/feature.pb_text.h" 21db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/numeric_op.h" 22db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/op_kernel.h" 23db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/register_types.h" 24db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/blocking_counter.h" 25db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/casts.h" 26db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/errors.h" 27db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/threadpool.h" 287705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower#include "tensorflow/core/lib/gtl/inlined_vector.h" 29353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower#include "tensorflow/core/lib/monitoring/counter.h" 30db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/platform/logging.h" 31db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/platform/protobuf.h" 32db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/presized_cuckoo_map.h" 33db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/sparse/sparse_tensor.h" 34db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 35db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace tensorflow { 36db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace example { 37db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 38db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace { 397705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower 407705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlowertemplate <typename T> 417705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlowerusing SmallVector = gtl::InlinedVector<T, 4>; 427705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower 43db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowertemplate <typename A> 44db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerauto EnableAliasing(A* a) -> decltype(a->EnableAliasing(true), void()) { 45db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower a->EnableAliasing(true); 46db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 47db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 48db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowertemplate <typename A> 49db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowervoid EnableAliasing(A&& a) {} 50db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 51db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFloweruint8 PeekTag(protobuf::io::CodedInputStream* stream) { 52db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 53db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const void* ptr; 54db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower int size; 55db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->GetDirectBufferPointer(&ptr, &size)) return 0; 56db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return *static_cast<const uint8*>(ptr); 57db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 58db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 59e0bd1353e3cd9ca7d40ebf99913368fc604d1296Derek Murrayconstexpr uint8 kVarintTag(uint32 tag) { return (tag << 3) | 0; } 60e0bd1353e3cd9ca7d40ebf99913368fc604d1296Derek Murrayconstexpr uint8 kDelimitedTag(uint32 tag) { return (tag << 3) | 2; } 61e0bd1353e3cd9ca7d40ebf99913368fc604d1296Derek Murrayconstexpr uint8 kFixed32Tag(uint32 tag) { return (tag << 3) | 5; } 62db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 63db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace parsed { 64db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 65db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower// ParseDataType has to be called first, then appropriate ParseZzzzList. 66db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerclass Feature { 67db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower public: 68db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Feature() {} 69db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Feature(StringPiece serialized) : serialized_(serialized) {} 70db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 71db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Status ParseDataType(DataType* dtype) { 72db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(dtype != nullptr); 73db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (serialized_.empty()) { 74db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *dtype = DT_INVALID; 75db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 76db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 77db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint8 oneof_tag = static_cast<uint8>(*serialized_.data()); 78db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower serialized_.remove_prefix(1); 79db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (oneof_tag) { 80db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case kDelimitedTag(1): 81db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *dtype = DT_STRING; 82db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 83db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case kDelimitedTag(2): 84db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *dtype = DT_FLOAT; 85db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 86db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case kDelimitedTag(3): 87db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *dtype = DT_INT64; 88db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 89db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 90db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument("Unsuported datatype."); 91db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 92db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 93db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 94db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 95c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower template <typename Result> 96c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower bool ParseBytesList(Result* bytes_list) { 97db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(bytes_list != nullptr); 98db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower protobuf::io::CodedInputStream stream( 99db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size()); 100db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 101db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower EnableAliasing(&stream); 102db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 103db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 104db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&length)) return false; 105db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream.PushLimit(length); 106db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 107db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 108db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kDelimitedTag(1))) return false; 109db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // parse string 110db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 bytes_length; 111db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&bytes_length)) return false; 112db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower string bytes; 113db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadString(&bytes, bytes_length)) return false; 114db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower bytes_list->push_back(std::move(bytes)); 115db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 116db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(limit); 117db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 118db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 119db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 120c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower template <typename Result> 121c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower bool ParseFloatList(Result* float_list) { 122db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(float_list != nullptr); 123db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower protobuf::io::CodedInputStream stream( 124db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size()); 125db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower EnableAliasing(&stream); 126db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 127db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&length)) return false; 128db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream.PushLimit(length); 129db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 130db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectAtEnd()) { 131db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint8 peek_tag = PeekTag(&stream); 132db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (peek_tag != kDelimitedTag(1) && peek_tag != kFixed32Tag(1)) { 133db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return false; 134db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 135db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 136db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (peek_tag == kDelimitedTag(1)) { // packed 137db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kDelimitedTag(1))) return false; // packed tag 138db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 packed_length; 139db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&packed_length)) return false; 140db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto packed_limit = stream.PushLimit(packed_length); 141db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 142db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 143db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 buffer32; 144db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadLittleEndian32(&buffer32)) return false; 145db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower float_list->push_back(bit_cast<float>(buffer32)); 146db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 147db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 148db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(packed_limit); 149db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } else { // non-packed 150db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 151db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kFixed32Tag(1))) return false; 152db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 buffer32; 153db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadLittleEndian32(&buffer32)) return false; 154db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower float_list->push_back(bit_cast<float>(buffer32)); 155db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 156db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 157db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 158db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 159db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(limit); 160db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 161db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 162db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 163c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower template <typename Result> 164c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower bool ParseInt64List(Result* int64_list) { 165db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(int64_list != nullptr); 166db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower protobuf::io::CodedInputStream stream( 167db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size()); 168db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower EnableAliasing(&stream); 169db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 170db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&length)) return false; 171db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream.PushLimit(length); 172db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 173db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectAtEnd()) { 174db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint8 peek_tag = PeekTag(&stream); 175db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (peek_tag != kDelimitedTag(1) && peek_tag != kVarintTag(1)) { 176db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return false; 177db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 178db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (peek_tag == kDelimitedTag(1)) { // packed 179db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kDelimitedTag(1))) return false; // packed tag 180db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 packed_length; 181db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&packed_length)) return false; 182db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto packed_limit = stream.PushLimit(packed_length); 183db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 184db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 185967376bdf3ae9007f8b4c996a4a260a911dfc409A. Unique TensorFlower protobuf_uint64 n; // There is no API for int64 186db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint64(&n)) return false; 187db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower int64_list->push_back(n); 188db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 189db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 190db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(packed_limit); 191db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } else { // non-packed 192db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 193db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kVarintTag(1))) return false; 194967376bdf3ae9007f8b4c996a4a260a911dfc409A. Unique TensorFlower protobuf_uint64 n; // There is no API for int64 195db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint64(&n)) return false; 196db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower int64_list->push_back(n); 197db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 198db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 199db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 200db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(limit); 201db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 202db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 203db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 204db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower StringPiece GetSerialized() const { return serialized_; } 205db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 206db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower private: 207db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // TODO(lew): Pair of uint8* would be more natural. 208db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower StringPiece serialized_; 209db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}; 210db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 211db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing FeatureMapEntry = std::pair<StringPiece, Feature>; 212db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing Example = std::vector<FeatureMapEntry>; 213db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 214db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace parsed 215db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 216db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseString(protobuf::io::CodedInputStream* stream, StringPiece* result) { 217db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 218db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(result != nullptr); 219db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 220db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ReadVarint32(&length)) return false; 221db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (length == 0) { 222db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *result = StringPiece(nullptr, 0); 223db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 224db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 225db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const void* stream_alias; 226db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower int stream_size; 227db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->GetDirectBufferPointer(&stream_alias, &stream_size)) { 228db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return false; 229db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 230db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (static_cast<uint32>(stream_size) < length) return false; 231db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *result = StringPiece(static_cast<const char*>(stream_alias), length); 232db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream->Skip(length); 233db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 234db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 235db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 236db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseFeatureMapEntry(protobuf::io::CodedInputStream* stream, 237db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::FeatureMapEntry* feature_map_entry) { 238db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 239db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(feature_map_entry != nullptr); 240db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 241db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ReadVarint32(&length)) return false; 242db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream->PushLimit(length); 243db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ExpectTag(kDelimitedTag(1))) return false; 244db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseString(stream, &feature_map_entry->first)) return false; 245db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ExpectTag(kDelimitedTag(2))) return false; 246db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower StringPiece feature_string_piece; 247db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseString(stream, &feature_string_piece)) return false; 248db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower feature_map_entry->second = parsed::Feature(feature_string_piece); 249db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ExpectAtEnd()) return false; 250db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream->PopLimit(limit); 251db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 252db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 253db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 254db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseFeatures(protobuf::io::CodedInputStream* stream, 255db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Example* example) { 256db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 257db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(example != nullptr); 258db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 259db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ReadVarint32(&length)) return false; 260db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream->PushLimit(length); 261db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream->ExpectAtEnd()) { 262db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::FeatureMapEntry feature_map_entry; 263db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ExpectTag(kDelimitedTag(1))) return false; 264db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseFeatureMapEntry(stream, &feature_map_entry)) return false; 265db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower example->push_back(std::move(feature_map_entry)); 266db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 267db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream->PopLimit(limit); 268db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 269db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 270db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 271db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseExample(protobuf::io::CodedInputStream* stream, 272db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Example* example) { 273db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 274db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(example != nullptr); 27589f358fdfd701b4118fe0f80e36f9cd098ee691eKiril Gorovoy // Loop over the input stream which may contain multiple serialized Example 27689f358fdfd701b4118fe0f80e36f9cd098ee691eKiril Gorovoy // protos merged together as strings. This behavior is consistent with Proto's 27789f358fdfd701b4118fe0f80e36f9cd098ee691eKiril Gorovoy // ParseFromString when string representations are concatenated. 27889f358fdfd701b4118fe0f80e36f9cd098ee691eKiril Gorovoy while (!stream->ExpectAtEnd()) { 279353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (!stream->ExpectTag(kDelimitedTag(1))) return false; 280353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (!ParseFeatures(stream, example)) return false; 281db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 282db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 283db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 284db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 285db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseExample(StringPiece serialized, parsed::Example* example) { 286db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(example != nullptr); 287db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower protobuf::io::CodedInputStream stream( 288db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower reinterpret_cast<const uint8*>(serialized.data()), serialized.size()); 289db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower EnableAliasing(&stream); 290db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return ParseExample(&stream, example); 291db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 292db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 293db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace 294db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 295db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool TestFastParse(const string& serialized, Example* example) { 296db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(example != nullptr); 297db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Example parsed_example; 298db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseExample(serialized, &parsed_example)) return false; 299db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto& features = *example->mutable_features(); 300353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower size_t parsed_example_size = parsed_example.size(); 301353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower for (size_t i = 0; i < parsed_example_size; ++i) { 302353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // This is a logic that standard protobuf parsing is implementing. 303353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // I.e. last entry in the map overwrites all the previous ones. 304353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower parsed::FeatureMapEntry& name_and_feature = 305353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower parsed_example[parsed_example_size - i - 1]; 306353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower string name = name_and_feature.first.ToString(); 307353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if ((*features.mutable_feature()).count(name) > 0) continue; 308353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 309353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower auto& value = (*features.mutable_feature())[name]; 310db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DataType dtype; 311353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (!name_and_feature.second.ParseDataType(&dtype).ok()) return false; 312db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (dtype) { 313db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INVALID: 314db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 315db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 3167705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower SmallVector<string> list; 317353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (!name_and_feature.second.ParseBytesList(&list)) return false; 318db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto* result_list = value.mutable_bytes_list(); 319db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (auto& bytes : list) { 32009326f005455a32c5d9276919db4e4b8de4c3117A. Unique TensorFlower auto* new_value = result_list->add_value(); 32109326f005455a32c5d9276919db4e4b8de4c3117A. Unique TensorFlower new_value->swap(bytes); 322db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 323db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 324db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 325db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 3267705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower SmallVector<float> list; 327353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (!name_and_feature.second.ParseFloatList(&list)) return false; 328db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto* result_list = value.mutable_float_list(); 329db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (float f : list) { 330db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result_list->add_value(f); 331db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 332db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 333db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 334db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 3357705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower SmallVector<int64> list; 336353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (!name_and_feature.second.ParseInt64List(&list)) return false; 337db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto* result_list = value.mutable_int64_list(); 338db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (int64 i : list) { 339db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result_list->add_value(i); 340db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 341db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 342db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 343db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 344db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower CHECK(false) << "Should not happen."; 345db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 346db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 347db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 348db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 349db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 350db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower// ----------------------------------------------------------------------------- 351db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 352db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace { 353db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 354db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing Config = FastParseExampleConfig; 355db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 356db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowervoid ParallelFor(const std::function<void(size_t)>& f, size_t n, 357db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower thread::ThreadPool* thread_pool) { 358db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (n == 0) return; 3597705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower if (thread_pool == nullptr) { 3607705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower for (size_t i = 0; i < n; ++i) { 361db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower f(i); 3627705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower } 3637705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower } else { 3647705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower BlockingCounter counter(n - 1); 3657705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower for (size_t i = 1; i < n; ++i) { 3667705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower thread_pool->Schedule([i, &f, &counter] { 3677705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower f(i); 3687705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower counter.DecrementCount(); 3697705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower }); 3707705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower } 3717705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower f(0); 3727705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower counter.Wait(); 373db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 374db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 375db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 376db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerenum class Type { Sparse, Dense }; 377db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 378db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerstruct SparseBuffer { 379db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Features are in one of the 3 vectors below depending on config's dtype. 380db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Other 2 vectors remain empty. 3817705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower SmallVector<string> bytes_list; 3827705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower SmallVector<float> float_list; 3837705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower SmallVector<int64> int64_list; 384db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 385db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Features of example i are elements with indices 386db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // from example_end_indices[i-1] to example_end_indices[i]-1 on the 387db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // appropriate xxxxx_list 388db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<size_t> example_end_indices; 389db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}; 390db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 391db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerstruct SeededHasher { 392db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint64 operator()(StringPiece s) const { 393db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Hash64(s.data(), s.size(), seed); 394db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 395db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint64 seed{0xDECAFCAFFE}; 396db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}; 397db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 398c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlowertemplate <typename T> 399c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlowerclass LimitedArraySlice { 400c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower public: 401c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower LimitedArraySlice(T* begin, size_t num_elements) 402c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower : current_(begin), end_(begin + num_elements) {} 403c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower 404c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower // May return negative if there were push_back calls after slice was filled. 405c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower int64 EndDistance() const { return end_ - current_; } 406c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower 407c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower // Attempts to push value to the back of this. If the slice has 408c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower // already been filled, this method has no effect on the underlying data, but 409c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower // it changes the number returned by EndDistance into negative values. 410c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower void push_back(T&& value) { 411c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower if (EndDistance() > 0) *current_ = std::move(value); 412c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower ++current_; 413c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower } 414c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower 415c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower private: 416c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower T* current_; 417c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower T* end_; 418c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower}; 419c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower 420db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus FastParseSerializedExample( 421db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const string& serialized_example, const string& example_name, 422db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const size_t example_index, const Config& config, 423db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const PresizedCuckooMap<std::pair<size_t, Type>>& config_index, 424db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower SeededHasher hasher, std::vector<Tensor>* output_dense, 425db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<SparseBuffer>* output_sparse) { 426db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(output_dense != nullptr); 427db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(output_sparse != nullptr); 428db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Example parsed_example; 429db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseExample(serialized_example, &parsed_example)) { 430db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument("Could not parse example input, value: '", 431db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower serialized_example, "'"); 432db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 433353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower std::vector<int64> sparse_feature_last_example(config.sparse.size(), -1); 434353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower std::vector<int64> dense_feature_last_example(config.dense.size(), -1); 435db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 436db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Handle features present in the example. 437353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower const size_t parsed_example_size = parsed_example.size(); 438353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower for (size_t i = 0; i < parsed_example_size; ++i) { 439353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // This is a logic that standard protobuf parsing is implementing. 440353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // I.e. last entry in the map overwrites all the previous ones. 441353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower parsed::FeatureMapEntry& name_and_feature = 442353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower parsed_example[parsed_example_size - i - 1]; 443353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 444353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower const StringPiece feature_name = name_and_feature.first; 445db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Feature& feature = name_and_feature.second; 446353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 447db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::pair<size_t, Type> d_and_type; 448353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower uint64 h = hasher(feature_name); 449db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!config_index.Find(h, &d_and_type)) continue; 450353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 451db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t d = d_and_type.first; 452353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower bool is_dense = d_and_type.second == Type::Dense; 453353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 454353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower { 455353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // Testing for PresizedCuckooMap collision. 456353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // TODO(lew): Use dense_hash_map and avoid this and hasher creation. 457353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower const string& config_feature_name = is_dense 458353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower ? config.dense[d].feature_name 459353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower : config.sparse[d].feature_name; 460353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (feature_name != config_feature_name) continue; 461353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower } 462db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 463353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower auto example_error = [&](StringPiece suffix) { 464db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument("Name: ", example_name, ", Key: ", 465db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower feature_name, ", Index: ", example_index, 466353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower ". ", suffix); 467353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower }; 468353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 469353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower auto parse_error = [&] { 470353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower return example_error("Can't parse serialized Example."); 471db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }; 472db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 473353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower DataType example_dtype; 474353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower TF_RETURN_IF_ERROR(feature.ParseDataType(&example_dtype)); 475353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 476353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (is_dense) { 477db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype == DT_INVALID) continue; 478db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 479353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // If feature was already visited, skip. 480353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // Compare comment at the beginning of the loop. 481353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (dense_feature_last_example[d] == example_index) { 482353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower LOG(WARNING) << "Data loss! Feature '" << feature_name 483353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower << "' in present in multiple concatenated " 484353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower "tf.Examples. Ignoring all but last one."; 485353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower static auto* duplicated_dense_feature = monitoring::Counter<0>::New( 486353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower "/tensorflow/core/util/example_proto_fast_parsing/" 487353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower "duplicated_dense_feature", 488353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower "Dense feature appears twice in a tf.Example"); 489353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower duplicated_dense_feature->GetCell()->IncrementBy(1); 490353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower continue; 491353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower } 492353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower dense_feature_last_example[d] = example_index; 493353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 494db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != config.dense[d].dtype) { 495353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower return example_error( 496353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower strings::StrCat("Data types don't match. Data type: ", 497353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower DataTypeString(example_dtype), "Expected type: ", 498353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower DataTypeString(config.dense[d].dtype))); 499db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 500db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Tensor& out = (*output_dense)[d]; 501db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 502353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower const std::size_t num_elements = config.dense[d].shape.num_elements(); 503db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const std::size_t offset = example_index * num_elements; 504db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 505db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto shape_error = [&](size_t size, StringPiece type_str) { 506353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower return example_error(strings::StrCat( 507353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower "Number of ", type_str, 508db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower " values != expected. " 509db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower "Values size: ", 510353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower size, " but output shape: ", config.dense[d].shape.DebugString())); 511db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }; 512db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 513db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (config.dense[d].dtype) { 514db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 515db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto out_p = out.flat<int64>().data() + offset; 516c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower LimitedArraySlice<int64> slice(out_p, num_elements); 517c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower if (!feature.ParseInt64List(&slice)) return parse_error(); 518c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower if (slice.EndDistance() != 0) { 519c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower return shape_error(num_elements - slice.EndDistance(), "int64"); 520c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower } 521db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 522db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 523db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 524db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto out_p = out.flat<float>().data() + offset; 525c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower LimitedArraySlice<float> slice(out_p, num_elements); 526c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower if (!feature.ParseFloatList(&slice)) return parse_error(); 527c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower if (slice.EndDistance() != 0) { 528c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower return shape_error(num_elements - slice.EndDistance(), "float"); 529c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower } 530db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 531db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 532db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 533db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto out_p = out.flat<string>().data() + offset; 534c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower LimitedArraySlice<string> slice(out_p, num_elements); 535c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower if (!feature.ParseBytesList(&slice)) return parse_error(); 536c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower if (slice.EndDistance() != 0) { 537c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower return shape_error(num_elements - slice.EndDistance(), "bytes"); 538db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 539db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 540db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 541db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 542db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower CHECK(false) << "Should not happen."; 543db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 544db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } else { 545353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // If feature was already visited, skip. 546353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // Compare comment at the beginning of the loop. 547353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (sparse_feature_last_example[d] == example_index) { 548353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower LOG(WARNING) << "Data loss! Feature '" << feature_name 549353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower << "' in present in multiple concatenated " 550353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower "tf.Examples. Ignoring all but last one."; 551353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower static auto* duplicated_sparse_feature = monitoring::Counter<0>::New( 552353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower "/tensorflow/core/util/example_proto_fast_parsing/" 553353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower "duplicated_sparse_feature", 554353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower "sparse feature appears twice in a tf.Example"); 555353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower duplicated_sparse_feature->GetCell()->IncrementBy(1); 556353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower continue; 557353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower } 558353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower sparse_feature_last_example[d] = example_index; 559353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 560db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Handle sparse features. 561db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower SparseBuffer& out = (*output_sparse)[d]; 562db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != DT_INVALID && 563db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower example_dtype != config.sparse[d].dtype) { 564353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower return example_error( 565353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower strings::StrCat("Data types don't match. ", "Expected type: ", 566353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower DataTypeString(config.sparse[d].dtype))); 567db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 568db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 569db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (config.sparse[d].dtype) { 570db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 571db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != DT_INVALID) { 572db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!feature.ParseInt64List(&out.int64_list)) { 573353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower return parse_error(); 574db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 575db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 576db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.push_back(out.int64_list.size()); 577db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 578db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 579db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 580db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != DT_INVALID) { 581db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!feature.ParseFloatList(&out.float_list)) { 582353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower return parse_error(); 583db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 584db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 585db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.push_back(out.float_list.size()); 586db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 587db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 588db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 589db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != DT_INVALID) { 590db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!feature.ParseBytesList(&out.bytes_list)) { 591353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower return parse_error(); 592db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 593db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 594db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.push_back(out.bytes_list.size()); 595db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 596db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 597db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 598db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower CHECK(false) << "Should not happen."; 599db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 600db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 601db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 602db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 603db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Handle missing dense features. 604db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.dense.size(); ++d) { 605353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (dense_feature_last_example[d] == example_index) continue; 606db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (config.dense[d].default_value.NumElements() == 0) { 607db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument("Name: ", example_name, ", Feature: ", 608db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower config.dense[d].feature_name, 609db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower " is required but could not be found."); 610db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 611db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 612db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const Tensor& in = config.dense[d].default_value; 613db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Tensor& out = (*output_dense)[d]; 614db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const std::size_t num_elements = in.shape().num_elements(); 615db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const std::size_t offset = example_index * num_elements; 616db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 617db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (config.dense[d].dtype) { 618db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 619db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy_n(in.flat<int64>().data(), num_elements, 620db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.flat<int64>().data() + offset); 621db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 622db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 623db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 624db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy_n(in.flat<float>().data(), num_elements, 625db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.flat<float>().data() + offset); 626db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 627db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 628db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 629db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy_n(in.flat<string>().data(), num_elements, 630db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.flat<string>().data() + offset); 631db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 632db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 633db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 634db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower CHECK(false) << "Should not happen."; 635db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 636db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 637db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 638db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Handle missing sparse features. 639db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.sparse.size(); ++d) { 640353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (sparse_feature_last_example[d] == example_index) continue; 641db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower SparseBuffer& out = (*output_sparse)[d]; 642db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t prev_example_end_index = 643db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.empty() ? 0 : out.example_end_indices.back(); 644db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.push_back(prev_example_end_index); 645db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 646db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 647db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 648db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 649db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 650db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus CheckConfigDataType(DataType dtype) { 651db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (dtype) { 652db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: 653db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: 654db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: 655db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 656db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 657db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument("Invalid config dtype: ", 658db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DataTypeString(dtype)); 659db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 660db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 661db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 662db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace 663db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 664db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus FastParseExample(const Config& config, 665db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower gtl::ArraySlice<string> serialized, 666db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower gtl::ArraySlice<string> example_names, 667db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower thread::ThreadPool* thread_pool, Result* result) { 668db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(result != nullptr); 669db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Check config so we can safely CHECK(false) in switches on config.*.dtype 670db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (auto& c : config.sparse) { 671db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype)); 672db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 673db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (auto& c : config.dense) { 674db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype)); 675db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 676db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 677db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t config_size = config.dense.size() + config.sparse.size(); 678db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower SeededHasher hasher; 679db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Build config index. 680db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower PresizedCuckooMap<std::pair<size_t, Type>> config_index(config_size); 681db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower bool ok = true; 682db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 0; i < 1000; ++i) { 683db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.dense.size(); ++d) { 684db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ok &= config_index.InsertUnique(hasher(config.dense[d].feature_name), 685db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower {d, Type::Dense}); 686db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 687db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.sparse.size(); ++d) { 688db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ok &= config_index.InsertUnique(hasher(config.sparse[d].feature_name), 689db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower {d, Type::Sparse}); 690db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 691db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (ok) break; 692db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower LOG(WARNING) << "Collision found. This should happen only if you have " 693db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower "around 2^32 entries in your config."; 694db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower hasher.seed++; 695db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower config_index.Clear(config_size); 696db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 697db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ok) { 698db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::Internal( 699db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower "Could not avoid collision. This should not happen."); 700db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 701db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 702db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Allocate dense output (sparse have to be buffered). 703db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.dense.size(); ++d) { 704db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TensorShape out_shape; 705db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out_shape.AddDim(serialized.size()); 706db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (const int64 dim : config.dense[d].shape.dim_sizes()) { 707db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out_shape.AddDim(dim); 708db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 709db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result->dense_values.emplace_back(config.dense[d].dtype, out_shape); 710db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 711db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 712db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // This parameter affects performance in a big and data-dependent way. 7137705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower const size_t kMiniBatchSizeBytes = 50000; 714db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 7157705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // Calculate number of minibatches. 7167705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // In main regime make each minibatch around kMiniBatchSizeBytes bytes. 7177705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // Apply 'special logic' below for small and big regimes. 7187705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower const size_t num_minibatches = [&] { 7197705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower size_t result = 0; 720db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t minibatch_bytes = 0; 721db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 0; i < serialized.size(); i++) { 722db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (minibatch_bytes == 0) { // start minibatch 7237705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower result++; 724db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 725db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower minibatch_bytes += serialized[i].size() + 1; 726db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (minibatch_bytes > kMiniBatchSizeBytes) { 727db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower minibatch_bytes = 0; 728db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 729db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 7307705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // 'special logic' 7317705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower const size_t min_minibatches = std::min<size_t>(8, serialized.size()); 7327705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower const size_t max_minibatches = 64; 7337705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower return std::max<size_t>(min_minibatches, 7347705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower std::min<size_t>(max_minibatches, result)); 735db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }(); 736db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 7377705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower auto first_example_of_minibatch = [&](size_t minibatch) -> size_t { 7387705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower return (serialized.size() * minibatch) / num_minibatches; 7397705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower }; 7407705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower 7417705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // TODO(lew): A big performance low-hanging fruit here is to improve 7427705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // num_minibatches calculation to take into account actual amount of work 7437705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // needed, as the size in bytes is not perfect. Linear combination of 7447705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // size in bytes and average number of features per example is promising. 7457705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // Even better: measure time instead of estimating, but this is too costly 7467705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // in small batches. 7477705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // Maybe accept outside parameter #num_minibatches? 748db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 749db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Do minibatches in parallel. 750db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<std::vector<SparseBuffer>> sparse_buffers(num_minibatches); 751db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<Status> status_of_minibatch(num_minibatches); 752db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto ProcessMiniBatch = [&](size_t minibatch) { 753db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower sparse_buffers[minibatch].resize(config.sparse.size()); 7547705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower size_t start = first_example_of_minibatch(minibatch); 7557705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower size_t end = first_example_of_minibatch(minibatch + 1); 756db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t e = start; e < end; ++e) { 757db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower status_of_minibatch[minibatch] = FastParseSerializedExample( 758db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower serialized[e], 759db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower (example_names.size() > 0 ? example_names[e] : "<unknown>"), e, 760db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower config, config_index, hasher, &result->dense_values, 761db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower &sparse_buffers[minibatch]); 762db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!status_of_minibatch[minibatch].ok()) break; 763db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 764db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }; 765db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 766db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ParallelFor(ProcessMiniBatch, num_minibatches, thread_pool); 767db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 768db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (Status& status : status_of_minibatch) { 769db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TF_RETURN_IF_ERROR(status); 770db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 771db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 772db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Merge SparseBuffers from all minibatches for every config.sparse. 773db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto MergeMinibatches = [&](size_t d) { 774db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Loop over minibatches 775db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t total_num_features = 0; 776db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t max_num_features = 0; 777db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (auto& sparse_values_tmp : sparse_buffers) { 778db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<size_t>& end_indices = 779db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower sparse_values_tmp[d].example_end_indices; 780db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower total_num_features += end_indices.back(); 781db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower max_num_features = std::max(max_num_features, end_indices[0]); 782db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 1; i < end_indices.size(); ++i) { 783db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t example_size = end_indices[i] - end_indices[i - 1]; 784db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower max_num_features = std::max(max_num_features, example_size); 785db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 786db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 787db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 788db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TensorShape indices_shape; 789db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower indices_shape.AddDim(total_num_features); 790db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower indices_shape.AddDim(2); 791db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result->sparse_indices.emplace_back(DT_INT64, indices_shape); 792db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Tensor* indices = &result->sparse_indices.back(); 793db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 794db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TensorShape values_shape; 795db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower values_shape.AddDim(total_num_features); 796db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result->sparse_values.emplace_back(config.sparse[d].dtype, values_shape); 797db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Tensor* values = &result->sparse_values.back(); 798db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 799db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result->sparse_shapes.emplace_back(DT_INT64, TensorShape({2})); 800db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto shapes_shape_t = result->sparse_shapes.back().vec<int64>(); 801db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower shapes_shape_t(0) = serialized.size(); 802db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower shapes_shape_t(1) = max_num_features; 803db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 804db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t offset = 0; 805db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 0; i < sparse_buffers.size(); ++i) { 806db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const SparseBuffer& buffer = sparse_buffers[i][d]; 807db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 808db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Update indices. 809db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower int64* ix_p = &indices->matrix<int64>()(offset, 0); 810db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t delta = 0; 8117705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower size_t example_index = first_example_of_minibatch(i); 812db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t example_end_index : buffer.example_end_indices) { 813db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t feature_index = 0; 814db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (; delta < example_end_index; ++delta) { 815db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Column 0: example index 816db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *ix_p = example_index; 817db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Column 1: the feature index buffer example 818db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *(ix_p + 1) = feature_index; 819db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ix_p += 2; 820db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ++feature_index; 821db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 822db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ++example_index; 823db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 824db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 825db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Copy values over. 826db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (config.sparse[d].dtype) { 827db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 828db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy(buffer.int64_list.begin(), buffer.int64_list.end(), 829db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower values->flat<int64>().data() + offset); 830db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 831db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 832db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 833db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy(buffer.float_list.begin(), buffer.float_list.end(), 834db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower values->flat<float>().data() + offset); 835db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 836db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 837db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 838db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::move(buffer.bytes_list.begin(), buffer.bytes_list.end(), 839db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower values->flat<string>().data() + offset); 840db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 841db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 842db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 843db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower CHECK(false) << "Should not happen."; 844db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 845db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 846db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower offset += delta; 847db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 848db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }; 849db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 850db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.sparse.size(); ++d) { 851db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower MergeMinibatches(d); 852db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 853db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 854db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 855db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 856db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 857db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace example 858db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace tensorflow 859