example_proto_fast_parsing.cc revision db7bdab6e586e02051556d9f36a7887500378cf9
1db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 3db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerLicensed under the Apache License, Version 2.0 (the "License"); 4db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFloweryou may not use this file except in compliance with the License. 5db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerYou may obtain a copy of the License at 6db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 7db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower http://www.apache.org/licenses/LICENSE-2.0 8db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 9db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerUnless required by applicable law or agreed to in writing, software 10db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerdistributed under the License is distributed on an "AS IS" BASIS, 11db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerSee the License for the specific language governing permissions and 13db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerlimitations under the License. 14db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower==============================================================================*/ 15db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/example_proto_fast_parsing.h" 16db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 17db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include <vector> 18db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 19db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/example/example.pb.h" 20db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/example/feature.pb_text.h" 21db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/numeric_op.h" 22db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/op_kernel.h" 23db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/register_types.h" 24db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/blocking_counter.h" 25db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/casts.h" 26db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/errors.h" 27db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/threadpool.h" 28db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/platform/logging.h" 29db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/platform/protobuf.h" 30db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/presized_cuckoo_map.h" 31db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/sparse/sparse_tensor.h" 32db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 33db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace tensorflow { 34db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace example { 35db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 36db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace { 37db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowertemplate <typename A> 38db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerauto EnableAliasing(A* a) -> decltype(a->EnableAliasing(true), void()) { 39db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower a->EnableAliasing(true); 40db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 41db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 42db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowertemplate <typename A> 43db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowervoid EnableAliasing(A&& a) {} 44db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 45db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFloweruint8 PeekTag(protobuf::io::CodedInputStream* stream) { 46db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 47db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const void* ptr; 48db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower int size; 49db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->GetDirectBufferPointer(&ptr, &size)) return 0; 50db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return *static_cast<const uint8*>(ptr); 51db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 52db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 53db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerconstexpr uint8 kVarintTag(uint tag) { return (tag << 3) | 0; } 54db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerconstexpr uint8 kDelimitedTag(uint tag) { return (tag << 3) | 2; } 55db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerconstexpr uint8 kFixed32Tag(uint tag) { return (tag << 3) | 5; } 56db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 57db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace parsed { 58db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 59db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower// ParseDataType has to be called first, then appropriate ParseZzzzList. 60db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerclass Feature { 61db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower public: 62db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Feature() {} 63db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Feature(StringPiece serialized) : serialized_(serialized) {} 64db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 65db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Status ParseDataType(DataType* dtype) { 66db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(dtype != nullptr); 67db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (serialized_.empty()) { 68db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *dtype = DT_INVALID; 69db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 70db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 71db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint8 oneof_tag = static_cast<uint8>(*serialized_.data()); 72db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower serialized_.remove_prefix(1); 73db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (oneof_tag) { 74db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case kDelimitedTag(1): 75db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *dtype = DT_STRING; 76db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 77db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case kDelimitedTag(2): 78db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *dtype = DT_FLOAT; 79db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 80db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case kDelimitedTag(3): 81db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *dtype = DT_INT64; 82db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 83db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 84db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument("Unsuported datatype."); 85db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 86db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 87db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 88db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 89db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower bool ParseBytesList(std::vector<string>* bytes_list) { 90db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(bytes_list != nullptr); 91db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower protobuf::io::CodedInputStream stream( 92db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size()); 93db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 94db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower EnableAliasing(&stream); 95db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 96db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 97db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&length)) return false; 98db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream.PushLimit(length); 99db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 100db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 101db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kDelimitedTag(1))) return false; 102db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // parse string 103db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 bytes_length; 104db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&bytes_length)) return false; 105db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower string bytes; 106db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadString(&bytes, bytes_length)) return false; 107db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower bytes_list->push_back(std::move(bytes)); 108db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 109db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(limit); 110db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 111db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 112db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 113db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower bool ParseFloatList(std::vector<float>* float_list) { 114db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(float_list != nullptr); 115db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower protobuf::io::CodedInputStream stream( 116db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size()); 117db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower EnableAliasing(&stream); 118db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 119db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&length)) return false; 120db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream.PushLimit(length); 121db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 122db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectAtEnd()) { 123db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint8 peek_tag = PeekTag(&stream); 124db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (peek_tag != kDelimitedTag(1) && peek_tag != kFixed32Tag(1)) { 125db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return false; 126db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 127db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 128db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (peek_tag == kDelimitedTag(1)) { // packed 129db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kDelimitedTag(1))) return false; // packed tag 130db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 packed_length; 131db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&packed_length)) return false; 132db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto packed_limit = stream.PushLimit(packed_length); 133db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 134db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 135db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 buffer32; 136db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadLittleEndian32(&buffer32)) return false; 137db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower float_list->push_back(bit_cast<float>(buffer32)); 138db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 139db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 140db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(packed_limit); 141db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } else { // non-packed 142db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 143db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kFixed32Tag(1))) return false; 144db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 buffer32; 145db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadLittleEndian32(&buffer32)) return false; 146db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower float_list->push_back(bit_cast<float>(buffer32)); 147db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 148db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 149db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 150db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 151db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(limit); 152db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 153db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 154db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 155db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower bool ParseInt64List(std::vector<int64>* int64_list) { 156db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(int64_list != nullptr); 157db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower protobuf::io::CodedInputStream stream( 158db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size()); 159db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower EnableAliasing(&stream); 160db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 161db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&length)) return false; 162db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream.PushLimit(length); 163db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 164db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectAtEnd()) { 165db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint8 peek_tag = PeekTag(&stream); 166db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (peek_tag != kDelimitedTag(1) && peek_tag != kVarintTag(1)) { 167db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return false; 168db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 169db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (peek_tag == kDelimitedTag(1)) { // packed 170db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kDelimitedTag(1))) return false; // packed tag 171db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 packed_length; 172db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&packed_length)) return false; 173db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto packed_limit = stream.PushLimit(packed_length); 174db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 175db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 176db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint64 n; // There is no API for int64 177db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint64(&n)) return false; 178db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower int64_list->push_back(n); 179db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 180db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 181db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(packed_limit); 182db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } else { // non-packed 183db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 184db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kVarintTag(1))) return false; 185db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint64 n; // There is no API for int64 186db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint64(&n)) return false; 187db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower int64_list->push_back(n); 188db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 189db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 190db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 191db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(limit); 192db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 193db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 194db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 195db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower StringPiece GetSerialized() const { return serialized_; } 196db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 197db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower private: 198db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // TODO(lew): Pair of uint8* would be more natural. 199db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower StringPiece serialized_; 200db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}; 201db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 202db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing FeatureMapEntry = std::pair<StringPiece, Feature>; 203db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing Example = std::vector<FeatureMapEntry>; 204db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 205db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace parsed 206db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 207db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseString(protobuf::io::CodedInputStream* stream, StringPiece* result) { 208db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 209db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(result != nullptr); 210db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 211db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ReadVarint32(&length)) return false; 212db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (length == 0) { 213db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *result = StringPiece(nullptr, 0); 214db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 215db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 216db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const void* stream_alias; 217db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower int stream_size; 218db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->GetDirectBufferPointer(&stream_alias, &stream_size)) { 219db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return false; 220db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 221db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (static_cast<uint32>(stream_size) < length) return false; 222db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *result = StringPiece(static_cast<const char*>(stream_alias), length); 223db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream->Skip(length); 224db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 225db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 226db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 227db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseFeatureMapEntry(protobuf::io::CodedInputStream* stream, 228db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::FeatureMapEntry* feature_map_entry) { 229db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 230db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(feature_map_entry != nullptr); 231db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 232db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ReadVarint32(&length)) return false; 233db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream->PushLimit(length); 234db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ExpectTag(kDelimitedTag(1))) return false; 235db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseString(stream, &feature_map_entry->first)) return false; 236db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ExpectTag(kDelimitedTag(2))) return false; 237db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower StringPiece feature_string_piece; 238db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseString(stream, &feature_string_piece)) return false; 239db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower feature_map_entry->second = parsed::Feature(feature_string_piece); 240db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ExpectAtEnd()) return false; 241db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream->PopLimit(limit); 242db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 243db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 244db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 245db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseFeatures(protobuf::io::CodedInputStream* stream, 246db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Example* example) { 247db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 248db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(example != nullptr); 249db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 250db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ReadVarint32(&length)) return false; 251db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream->PushLimit(length); 252db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream->ExpectAtEnd()) { 253db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::FeatureMapEntry feature_map_entry; 254db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ExpectTag(kDelimitedTag(1))) return false; 255db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseFeatureMapEntry(stream, &feature_map_entry)) return false; 256db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower example->push_back(std::move(feature_map_entry)); 257db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 258db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream->PopLimit(limit); 259db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 260db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 261db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 262db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseExample(protobuf::io::CodedInputStream* stream, 263db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Example* example) { 264db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 265db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(example != nullptr); 266db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (stream->ExpectTag(kDelimitedTag(1))) { 267db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseFeatures(stream, example)) return false; 268db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 269db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ExpectAtEnd()) return false; 270db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 271db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 272db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 273db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseExample(StringPiece serialized, parsed::Example* example) { 274db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(example != nullptr); 275db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower protobuf::io::CodedInputStream stream( 276db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower reinterpret_cast<const uint8*>(serialized.data()), serialized.size()); 277db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower EnableAliasing(&stream); 278db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return ParseExample(&stream, example); 279db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 280db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 281db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace 282db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 283db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool TestFastParse(const string& serialized, Example* example) { 284db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(example != nullptr); 285db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Example parsed_example; 286db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseExample(serialized, &parsed_example)) return false; 287db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto& features = *example->mutable_features(); 288db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (parsed::FeatureMapEntry& entry : parsed_example) { 289db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto& value = (*features.mutable_feature())[entry.first.ToString()]; 290db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DataType dtype; 291db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!entry.second.ParseDataType(&dtype).ok()) return false; 292db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (dtype) { 293db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INVALID: 294db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 295db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 296db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<string> list; 297db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!entry.second.ParseBytesList(&list)) return false; 298db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto* result_list = value.mutable_bytes_list(); 299db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (auto& bytes : list) { 300db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result_list->add_value(std::move(bytes)); 301db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 302db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 303db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 304db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 305db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<float> list; 306db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!entry.second.ParseFloatList(&list)) return false; 307db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto* result_list = value.mutable_float_list(); 308db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (float f : list) { 309db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result_list->add_value(f); 310db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 311db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 312db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 313db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 314db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<int64> list; 315db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!entry.second.ParseInt64List(&list)) return false; 316db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto* result_list = value.mutable_int64_list(); 317db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (int64 i : list) { 318db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result_list->add_value(i); 319db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 320db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 321db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 322db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 323db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower CHECK(false) << "Should not happen."; 324db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 325db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 326db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 327db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 328db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 329db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower// ----------------------------------------------------------------------------- 330db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 331db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace { 332db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 333db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing Config = FastParseExampleConfig; 334db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 335db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowervoid ParallelFor(const std::function<void(size_t)>& f, size_t n, 336db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower thread::ThreadPool* thread_pool) { 337db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(thread_pool != nullptr); 338db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (n == 0) return; 339db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower BlockingCounter counter(n - 1); 340db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 1; i < n; ++i) { 341db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower thread_pool->Schedule([i, &f, &counter] { 342db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower f(i); 343db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower counter.DecrementCount(); 344db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }); 345db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 346db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower f(0); 347db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower counter.Wait(); 348db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 349db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 350db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerenum class Type { Sparse, Dense }; 351db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 352db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerstruct SparseBuffer { 353db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // TODO(lew): Use InlinedVector. 354db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Features are in one of the 3 vectors below depending on config's dtype. 355db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Other 2 vectors remain empty. 356db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<string> bytes_list; 357db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<float> float_list; 358db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<int64> int64_list; 359db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 360db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Features of example i are elements with indices 361db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // from example_end_indices[i-1] to example_end_indices[i]-1 on the 362db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // appropriate xxxxx_list 363db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<size_t> example_end_indices; 364db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}; 365db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 366db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerstruct SeededHasher { 367db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint64 operator()(StringPiece s) const { 368db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Hash64(s.data(), s.size(), seed); 369db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 370db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint64 seed{0xDECAFCAFFE}; 371db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}; 372db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 373db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus FastParseSerializedExample( 374db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const string& serialized_example, const string& example_name, 375db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const size_t example_index, const Config& config, 376db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const PresizedCuckooMap<std::pair<size_t, Type>>& config_index, 377db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower SeededHasher hasher, std::vector<Tensor>* output_dense, 378db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<SparseBuffer>* output_sparse) { 379db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(output_dense != nullptr); 380db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(output_sparse != nullptr); 381db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Example parsed_example; 382db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseExample(serialized_example, &parsed_example)) { 383db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument("Could not parse example input, value: '", 384db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower serialized_example, "'"); 385db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 386db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower constexpr size_t kMax = std::numeric_limits<size_t>::max(); 387db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<size_t> sparse_features_found(config.sparse.size(), kMax); 388db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<size_t> dense_features_found(config.dense.size(), kMax); 389db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 390db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Handle features present in the example. 391db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (parsed::FeatureMapEntry& name_and_feature : parsed_example) { 392db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Feature& feature = name_and_feature.second; 393db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::pair<size_t, Type> d_and_type; 394db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint64 h = hasher(name_and_feature.first); 395db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!config_index.Find(h, &d_and_type)) continue; 396db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t d = d_and_type.first; 397db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 398db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto parse_error = [&](StringPiece feature_name) { 399db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument("Name: ", example_name, ", Key: ", 400db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower feature_name, ", Index: ", example_index, 401db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ". Can't parse serialized Example."); 402db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }; 403db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 404db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (d_and_type.second == Type::Dense) { 405db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DataType example_dtype; 406db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TF_RETURN_IF_ERROR(feature.ParseDataType(&example_dtype)); 407db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype == DT_INVALID) continue; 408db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 409db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower dense_features_found[d] = example_index; 410db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != config.dense[d].dtype) { 411db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument( 412db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower "Name: ", example_name, ", Feature: ", config.dense[d].feature_name, 413db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ". Data types don't match. ", "Data type: ", 414db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DataTypeString(example_dtype), "Expected type: ", 415db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DataTypeString(config.dense[d].dtype)); 416db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 417db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const string& feature_name = config.dense[d].feature_name; 418db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const TensorShape& shape = config.dense[d].shape; 419db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Tensor& out = (*output_dense)[d]; 420db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 421db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const std::size_t num_elements = shape.num_elements(); 422db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const std::size_t offset = example_index * num_elements; 423db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 424db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto shape_error = [&](size_t size, StringPiece type_str) { 425db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument( 426db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower "Name: ", example_name, ", Key: ", feature_name, ", Index: ", 427db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower example_index, ". Number of ", type_str, 428db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower " values != expected. " 429db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower "Values size: ", 430db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size, " but output shape: ", shape.DebugString()); 431db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }; 432db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 433db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (config.dense[d].dtype) { 434db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 435db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<int64> list; 436db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!feature.ParseInt64List(&list)) return parse_error(feature_name); 437db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (list.size() != num_elements) { 438db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return shape_error(list.size(), "int64"); 439db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 440db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto out_p = out.flat<int64>().data() + offset; 441db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy_n(list.begin(), list.size(), out_p); 442db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 443db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 444db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 445db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<float> list; 446db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!feature.ParseFloatList(&list)) return parse_error(feature_name); 447db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (list.size() != num_elements) { 448db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return shape_error(list.size(), "float"); 449db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 450db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto out_p = out.flat<float>().data() + offset; 451db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy_n(list.begin(), list.size(), out_p); 452db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 453db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 454db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 455db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<string> list; 456db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!feature.ParseBytesList(&list)) return parse_error(feature_name); 457db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (list.size() != num_elements) { 458db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return shape_error(list.size(), "bytes"); 459db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 460db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto out_p = out.flat<string>().data() + offset; 461db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 0; i < list.size(); ++i) { 462db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out_p[i] = std::move(list[i]); 463db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 464db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 465db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 466db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 467db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower CHECK(false) << "Should not happen."; 468db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 469db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } else { 470db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Handle sparse features. 471db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower sparse_features_found[d] = example_index; 472db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const string& feature_name = config.sparse[d].feature_name; 473db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower SparseBuffer& out = (*output_sparse)[d]; 474db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DataType example_dtype; 475db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TF_RETURN_IF_ERROR(feature.ParseDataType(&example_dtype)); 476db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != DT_INVALID && 477db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower example_dtype != config.sparse[d].dtype) { 478db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument( 479db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower "Name: ", example_name, ", Feature: ", 480db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower config.sparse[d].feature_name, ". Data types don't match. ", 481db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower "Expected type: ", DataTypeString(config.sparse[d].dtype)); 482db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 483db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 484db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (config.sparse[d].dtype) { 485db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 486db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != DT_INVALID) { 487db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!feature.ParseInt64List(&out.int64_list)) { 488db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return parse_error(feature_name); 489db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 490db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 491db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.push_back(out.int64_list.size()); 492db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 493db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 494db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 495db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != DT_INVALID) { 496db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!feature.ParseFloatList(&out.float_list)) { 497db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return parse_error(feature_name); 498db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 499db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 500db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.push_back(out.float_list.size()); 501db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 502db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 503db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 504db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != DT_INVALID) { 505db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!feature.ParseBytesList(&out.bytes_list)) { 506db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return parse_error(feature_name); 507db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 508db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 509db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.push_back(out.bytes_list.size()); 510db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 511db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 512db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 513db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower CHECK(false) << "Should not happen."; 514db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 515db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 516db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 517db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 518db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Handle missing dense features. 519db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.dense.size(); ++d) { 520db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (dense_features_found[d] == example_index) continue; 521db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (config.dense[d].default_value.NumElements() == 0) { 522db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument("Name: ", example_name, ", Feature: ", 523db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower config.dense[d].feature_name, 524db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower " is required but could not be found."); 525db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 526db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 527db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const Tensor& in = config.dense[d].default_value; 528db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Tensor& out = (*output_dense)[d]; 529db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const std::size_t num_elements = in.shape().num_elements(); 530db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const std::size_t offset = example_index * num_elements; 531db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 532db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (config.dense[d].dtype) { 533db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 534db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy_n(in.flat<int64>().data(), num_elements, 535db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.flat<int64>().data() + offset); 536db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 537db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 538db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 539db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy_n(in.flat<float>().data(), num_elements, 540db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.flat<float>().data() + offset); 541db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 542db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 543db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 544db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy_n(in.flat<string>().data(), num_elements, 545db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.flat<string>().data() + offset); 546db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 547db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 548db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 549db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower CHECK(false) << "Should not happen."; 550db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 551db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 552db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 553db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Handle missing sparse features. 554db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.sparse.size(); ++d) { 555db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (sparse_features_found[d] == example_index) continue; 556db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower SparseBuffer& out = (*output_sparse)[d]; 557db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t prev_example_end_index = 558db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.empty() ? 0 : out.example_end_indices.back(); 559db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.push_back(prev_example_end_index); 560db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 561db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 562db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 563db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 564db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 565db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus CheckConfigDataType(DataType dtype) { 566db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (dtype) { 567db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: 568db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: 569db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: 570db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 571db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 572db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument("Invalid config dtype: ", 573db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DataTypeString(dtype)); 574db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 575db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 576db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 577db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace 578db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 579db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus FastParseExample(const Config& config, 580db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower gtl::ArraySlice<string> serialized, 581db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower gtl::ArraySlice<string> example_names, 582db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower thread::ThreadPool* thread_pool, Result* result) { 583db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(thread_pool != nullptr); 584db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(result != nullptr); 585db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Check config so we can safely CHECK(false) in switches on config.*.dtype 586db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (auto& c : config.sparse) { 587db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype)); 588db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 589db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (auto& c : config.dense) { 590db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype)); 591db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 592db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 593db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t config_size = config.dense.size() + config.sparse.size(); 594db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower SeededHasher hasher; 595db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Build config index. 596db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower PresizedCuckooMap<std::pair<size_t, Type>> config_index(config_size); 597db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower bool ok = true; 598db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 0; i < 1000; ++i) { 599db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.dense.size(); ++d) { 600db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ok &= config_index.InsertUnique(hasher(config.dense[d].feature_name), 601db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower {d, Type::Dense}); 602db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 603db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.sparse.size(); ++d) { 604db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ok &= config_index.InsertUnique(hasher(config.sparse[d].feature_name), 605db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower {d, Type::Sparse}); 606db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 607db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (ok) break; 608db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower LOG(WARNING) << "Collision found. This should happen only if you have " 609db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower "around 2^32 entries in your config."; 610db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower hasher.seed++; 611db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower config_index.Clear(config_size); 612db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 613db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ok) { 614db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::Internal( 615db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower "Could not avoid collision. This should not happen."); 616db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 617db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 618db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Allocate dense output (sparse have to be buffered). 619db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.dense.size(); ++d) { 620db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TensorShape out_shape; 621db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out_shape.AddDim(serialized.size()); 622db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (const int64 dim : config.dense[d].shape.dim_sizes()) { 623db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out_shape.AddDim(dim); 624db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 625db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result->dense_values.emplace_back(config.dense[d].dtype, out_shape); 626db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 627db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 628db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // This parameter affects performance in a big and data-dependent way. 629db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const size_t kMiniBatchSizeBytes = 100000; 630db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 631db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Split examples into mini-batches for parallel processing. 632db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto first_example_of_minibatch = [&] { 633db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<size_t> result; 634db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t minibatch_bytes = 0; 635db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 0; i < serialized.size(); i++) { 636db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (minibatch_bytes == 0) { // start minibatch 637db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result.push_back(i); 638db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 639db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower minibatch_bytes += serialized[i].size() + 1; 640db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (minibatch_bytes > kMiniBatchSizeBytes) { 641db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower minibatch_bytes = 0; 642db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 643db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 644db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return result; 645db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }(); 646db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 647db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t num_minibatches = first_example_of_minibatch.size(); 648db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 649db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Do minibatches in parallel. 650db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<std::vector<SparseBuffer>> sparse_buffers(num_minibatches); 651db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<Status> status_of_minibatch(num_minibatches); 652db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 653db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto ProcessMiniBatch = [&](size_t minibatch) { 654db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower sparse_buffers[minibatch].resize(config.sparse.size()); 655db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t start = first_example_of_minibatch[minibatch]; 656db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t end = minibatch + 1 < num_minibatches 657db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ? first_example_of_minibatch[minibatch + 1] 658db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower : serialized.size(); 659db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t e = start; e < end; ++e) { 660db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower status_of_minibatch[minibatch] = FastParseSerializedExample( 661db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower serialized[e], 662db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower (example_names.size() > 0 ? example_names[e] : "<unknown>"), e, 663db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower config, config_index, hasher, &result->dense_values, 664db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower &sparse_buffers[minibatch]); 665db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!status_of_minibatch[minibatch].ok()) break; 666db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 667db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }; 668db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 669db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ParallelFor(ProcessMiniBatch, num_minibatches, thread_pool); 670db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 671db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (Status& status : status_of_minibatch) { 672db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TF_RETURN_IF_ERROR(status); 673db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 674db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 675db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Merge SparseBuffers from all minibatches for every config.sparse. 676db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto MergeMinibatches = [&](size_t d) { 677db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Loop over minibatches 678db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t total_num_features = 0; 679db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t max_num_features = 0; 680db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (auto& sparse_values_tmp : sparse_buffers) { 681db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<size_t>& end_indices = 682db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower sparse_values_tmp[d].example_end_indices; 683db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower total_num_features += end_indices.back(); 684db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower max_num_features = std::max(max_num_features, end_indices[0]); 685db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 1; i < end_indices.size(); ++i) { 686db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t example_size = end_indices[i] - end_indices[i - 1]; 687db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower max_num_features = std::max(max_num_features, example_size); 688db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 689db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 690db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 691db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TensorShape indices_shape; 692db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower indices_shape.AddDim(total_num_features); 693db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower indices_shape.AddDim(2); 694db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result->sparse_indices.emplace_back(DT_INT64, indices_shape); 695db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Tensor* indices = &result->sparse_indices.back(); 696db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 697db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TensorShape values_shape; 698db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower values_shape.AddDim(total_num_features); 699db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result->sparse_values.emplace_back(config.sparse[d].dtype, values_shape); 700db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Tensor* values = &result->sparse_values.back(); 701db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 702db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result->sparse_shapes.emplace_back(DT_INT64, TensorShape({2})); 703db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto shapes_shape_t = result->sparse_shapes.back().vec<int64>(); 704db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower shapes_shape_t(0) = serialized.size(); 705db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower shapes_shape_t(1) = max_num_features; 706db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 707db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t offset = 0; 708db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 0; i < sparse_buffers.size(); ++i) { 709db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const SparseBuffer& buffer = sparse_buffers[i][d]; 710db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 711db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Update indices. 712db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower int64* ix_p = &indices->matrix<int64>()(offset, 0); 713db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t delta = 0; 714db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t example_index = first_example_of_minibatch[i]; 715db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t example_end_index : buffer.example_end_indices) { 716db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t feature_index = 0; 717db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (; delta < example_end_index; ++delta) { 718db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Column 0: example index 719db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *ix_p = example_index; 720db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Column 1: the feature index buffer example 721db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *(ix_p + 1) = feature_index; 722db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ix_p += 2; 723db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ++feature_index; 724db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 725db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ++example_index; 726db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 727db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 728db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Copy values over. 729db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (config.sparse[d].dtype) { 730db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 731db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy(buffer.int64_list.begin(), buffer.int64_list.end(), 732db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower values->flat<int64>().data() + offset); 733db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 734db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 735db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 736db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy(buffer.float_list.begin(), buffer.float_list.end(), 737db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower values->flat<float>().data() + offset); 738db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 739db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 740db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 741db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::move(buffer.bytes_list.begin(), buffer.bytes_list.end(), 742db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower values->flat<string>().data() + offset); 743db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 744db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 745db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 746db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower CHECK(false) << "Should not happen."; 747db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 748db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 749db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower offset += delta; 750db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 751db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }; 752db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 753db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.sparse.size(); ++d) { 754db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower MergeMinibatches(d); 755db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 756db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 757db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 758db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 759db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 760db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace example 761db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace tensorflow 762