1db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 3db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerLicensed under the Apache License, Version 2.0 (the "License"); 4db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFloweryou may not use this file except in compliance with the License. 5db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerYou may obtain a copy of the License at 6db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 7db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower http://www.apache.org/licenses/LICENSE-2.0 8db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 9db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerUnless required by applicable law or agreed to in writing, software 10db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerdistributed under the License is distributed on an "AS IS" BASIS, 11db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerSee the License for the specific language governing permissions and 13db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerlimitations under the License. 14db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower==============================================================================*/ 15db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/example_proto_fast_parsing.h" 16db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 17db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include <vector> 18db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 19db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/example/example.pb.h" 20db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/example/feature.pb_text.h" 21db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/numeric_op.h" 22db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/op_kernel.h" 23db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/register_types.h" 24db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/blocking_counter.h" 25db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/casts.h" 26db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/errors.h" 27db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/threadpool.h" 287705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower#include "tensorflow/core/lib/gtl/inlined_vector.h" 29353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower#include "tensorflow/core/lib/monitoring/counter.h" 30db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/platform/logging.h" 31db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/platform/protobuf.h" 32db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/presized_cuckoo_map.h" 33db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/sparse/sparse_tensor.h" 34db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 35db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace tensorflow { 36db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace example { 37db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 38db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace { 397705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower 407705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlowertemplate <typename T> 417705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlowerusing SmallVector = gtl::InlinedVector<T, 4>; 427705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower 43db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowertemplate <typename A> 44db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerauto EnableAliasing(A* a) -> decltype(a->EnableAliasing(true), void()) { 45db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower a->EnableAliasing(true); 46db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 47db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 48db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowertemplate <typename A> 49db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowervoid EnableAliasing(A&& a) {} 50db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 51db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFloweruint8 PeekTag(protobuf::io::CodedInputStream* stream) { 52db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 53db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const void* ptr; 54db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower int size; 55db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->GetDirectBufferPointer(&ptr, &size)) return 0; 56db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return *static_cast<const uint8*>(ptr); 57db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 58db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 59e0bd1353e3cd9ca7d40ebf99913368fc604d1296Derek Murrayconstexpr uint8 kVarintTag(uint32 tag) { return (tag << 3) | 0; } 60e0bd1353e3cd9ca7d40ebf99913368fc604d1296Derek Murrayconstexpr uint8 kDelimitedTag(uint32 tag) { return (tag << 3) | 2; } 61e0bd1353e3cd9ca7d40ebf99913368fc604d1296Derek Murrayconstexpr uint8 kFixed32Tag(uint32 tag) { return (tag << 3) | 5; } 62db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 63db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace parsed { 64db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 65db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower// ParseDataType has to be called first, then appropriate ParseZzzzList. 66db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerclass Feature { 67db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower public: 68db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Feature() {} 696882effb863dcd0da00d3287959deac46734a0b2A. Unique TensorFlower explicit Feature(StringPiece serialized) : serialized_(serialized) {} 70db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 71db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Status ParseDataType(DataType* dtype) { 72db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(dtype != nullptr); 73db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (serialized_.empty()) { 74db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *dtype = DT_INVALID; 75db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 76db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 77db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint8 oneof_tag = static_cast<uint8>(*serialized_.data()); 78db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower serialized_.remove_prefix(1); 79db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (oneof_tag) { 80db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case kDelimitedTag(1): 81db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *dtype = DT_STRING; 82db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 83db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case kDelimitedTag(2): 84db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *dtype = DT_FLOAT; 85db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 86db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case kDelimitedTag(3): 87db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *dtype = DT_INT64; 88db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 89db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 9054e5000e0b980abe905900599c4493fadae34a15A. Unique TensorFlower // Initialize variable to avoid compiler warning 9154e5000e0b980abe905900599c4493fadae34a15A. Unique TensorFlower *dtype = DT_INVALID; 921b5235fd897f7ea5cffc715300f67b4dc852fa27Jonathan Hseu return errors::InvalidArgument("Unsupported datatype."); 93db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 94db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 95db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 96db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 9796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray bool GetNumElementsInBytesList(int* num_elements) { 9896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray protobuf::io::CodedInputStream stream( 9996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size()); 10096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray EnableAliasing(&stream); 10196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray uint32 length = 0; 10296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!stream.ReadVarint32(&length)) return false; 10396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray auto limit = stream.PushLimit(length); 10496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray *num_elements = 0; 10596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray while (!stream.ExpectAtEnd()) { 10696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!stream.ExpectTag(kDelimitedTag(1))) return false; 10796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray uint32 bytes_length = 0; 10896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!stream.ReadVarint32(&bytes_length)) return false; 10996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!stream.Skip(bytes_length)) return false; 11096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray ++*num_elements; 11196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 11296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray stream.PopLimit(limit); 11396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return true; 11496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 11596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 116c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower template <typename Result> 117c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower bool ParseBytesList(Result* bytes_list) { 118db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(bytes_list != nullptr); 11996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 120db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower protobuf::io::CodedInputStream stream( 121db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size()); 122db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 123db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower EnableAliasing(&stream); 124db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 125db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 126db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&length)) return false; 127db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream.PushLimit(length); 128db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 129db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 130db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kDelimitedTag(1))) return false; 131db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // parse string 132db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 bytes_length; 133db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&bytes_length)) return false; 134c07b80ea58751c658a012f93d7c71c61e66062c2Derek Murray string bytes; 135c07b80ea58751c658a012f93d7c71c61e66062c2Derek Murray if (!stream.ReadString(&bytes, bytes_length)) return false; 136c07b80ea58751c658a012f93d7c71c61e66062c2Derek Murray bytes_list->push_back(std::move(bytes)); 137db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 138db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(limit); 139db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 140db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 141db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 142c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower template <typename Result> 143c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower bool ParseFloatList(Result* float_list) { 144db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(float_list != nullptr); 145db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower protobuf::io::CodedInputStream stream( 146db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size()); 147db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower EnableAliasing(&stream); 148db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 149db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&length)) return false; 150db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream.PushLimit(length); 151db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 152db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectAtEnd()) { 153db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint8 peek_tag = PeekTag(&stream); 154db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (peek_tag != kDelimitedTag(1) && peek_tag != kFixed32Tag(1)) { 155db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return false; 156db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 157db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 158db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (peek_tag == kDelimitedTag(1)) { // packed 159db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kDelimitedTag(1))) return false; // packed tag 160db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 packed_length; 161db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&packed_length)) return false; 162db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto packed_limit = stream.PushLimit(packed_length); 163db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 164db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 165db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 buffer32; 166db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadLittleEndian32(&buffer32)) return false; 167db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower float_list->push_back(bit_cast<float>(buffer32)); 168db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 169db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 170db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(packed_limit); 171db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } else { // non-packed 172db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 173db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kFixed32Tag(1))) return false; 174db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 buffer32; 175db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadLittleEndian32(&buffer32)) return false; 176db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower float_list->push_back(bit_cast<float>(buffer32)); 177db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 178db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 179db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 180db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 181db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(limit); 182db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 183db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 184db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 185c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower template <typename Result> 186c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower bool ParseInt64List(Result* int64_list) { 187db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(int64_list != nullptr); 188db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower protobuf::io::CodedInputStream stream( 189db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size()); 190db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower EnableAliasing(&stream); 191db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 192db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&length)) return false; 193db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream.PushLimit(length); 194db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 195db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectAtEnd()) { 196db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint8 peek_tag = PeekTag(&stream); 197db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (peek_tag != kDelimitedTag(1) && peek_tag != kVarintTag(1)) { 198db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return false; 199db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 200db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (peek_tag == kDelimitedTag(1)) { // packed 201db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kDelimitedTag(1))) return false; // packed tag 202db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 packed_length; 203db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint32(&packed_length)) return false; 204db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto packed_limit = stream.PushLimit(packed_length); 205db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 206db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 207967376bdf3ae9007f8b4c996a4a260a911dfc409A. Unique TensorFlower protobuf_uint64 n; // There is no API for int64 208db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint64(&n)) return false; 209eb64b92917070676c5cf110055a24033b4f2d34aPatrick Nguyen int64_list->push_back(static_cast<int64>(n)); 210db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 211db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 212db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(packed_limit); 213db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } else { // non-packed 214db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream.ExpectAtEnd()) { 215db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ExpectTag(kVarintTag(1))) return false; 216967376bdf3ae9007f8b4c996a4a260a911dfc409A. Unique TensorFlower protobuf_uint64 n; // There is no API for int64 217db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream.ReadVarint64(&n)) return false; 218eb64b92917070676c5cf110055a24033b4f2d34aPatrick Nguyen int64_list->push_back(static_cast<int64>(n)); 219db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 220db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 221db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 222db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream.PopLimit(limit); 223db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 224db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 225db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 226db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower StringPiece GetSerialized() const { return serialized_; } 227db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 228db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower private: 229db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // TODO(lew): Pair of uint8* would be more natural. 230db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower StringPiece serialized_; 231db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}; 232db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 233db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing FeatureMapEntry = std::pair<StringPiece, Feature>; 234db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing Example = std::vector<FeatureMapEntry>; 235db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 236db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace parsed 237db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 23878be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlowerinline bool SkipExtraneousTag(protobuf::io::CodedInputStream* stream) { 23978be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower uint32 data; 24078be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower protobuf_uint64 dummy; 24178be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower switch (stream->ReadTag() & 0x7) { 24278be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower case 0: // varint 24378be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower if (!stream->ReadVarint32(&data)) return false; 24478be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower return true; 24578be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower case 1: // fixed64 24678be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower if (!stream->ReadLittleEndian64(&dummy)) return false; 24778be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower return true; 24878be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower case 2: // length delimited 24978be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower if (!stream->ReadVarint32(&data)) return false; 25078be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower stream->Skip(data); 25178be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower return true; 25278be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower case 3: // group begin 25378be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower return false; // groups not supported. 25478be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower case 4: // group end 25578be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower return false; // groups not supported. 25678be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower case 5: // fixed32 25778be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower if (!stream->ReadLittleEndian32(&data)) return false; 25878be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower return true; 25978be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower } 26078be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower return false; // unrecognized tag type 26178be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower} 26278be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower 263db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseString(protobuf::io::CodedInputStream* stream, StringPiece* result) { 264db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 265db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(result != nullptr); 266db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 267db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ReadVarint32(&length)) return false; 268db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (length == 0) { 269db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *result = StringPiece(nullptr, 0); 270db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 271db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 272db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const void* stream_alias; 273db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower int stream_size; 274db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->GetDirectBufferPointer(&stream_alias, &stream_size)) { 275db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return false; 276db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 277db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (static_cast<uint32>(stream_size) < length) return false; 278db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *result = StringPiece(static_cast<const char*>(stream_alias), length); 279db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream->Skip(length); 280db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 281db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 282db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 283db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseFeatureMapEntry(protobuf::io::CodedInputStream* stream, 284db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::FeatureMapEntry* feature_map_entry) { 285db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 286db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(feature_map_entry != nullptr); 287db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 288db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ReadVarint32(&length)) return false; 289db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream->PushLimit(length); 290db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ExpectTag(kDelimitedTag(1))) return false; 291db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseString(stream, &feature_map_entry->first)) return false; 292db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ExpectTag(kDelimitedTag(2))) return false; 293db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower StringPiece feature_string_piece; 294db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseString(stream, &feature_string_piece)) return false; 295db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower feature_map_entry->second = parsed::Feature(feature_string_piece); 296db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ExpectAtEnd()) return false; 297db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream->PopLimit(limit); 298db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 299db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 300db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 301db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseFeatures(protobuf::io::CodedInputStream* stream, 302db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Example* example) { 303db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 304db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(example != nullptr); 305db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint32 length; 306db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ReadVarint32(&length)) return false; 307db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto limit = stream->PushLimit(length); 308db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower while (!stream->ExpectAtEnd()) { 309db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::FeatureMapEntry feature_map_entry; 310db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!stream->ExpectTag(kDelimitedTag(1))) return false; 311db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseFeatureMapEntry(stream, &feature_map_entry)) return false; 312db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower example->push_back(std::move(feature_map_entry)); 313db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 314db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower stream->PopLimit(limit); 315db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 316db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 317db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 318db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseExample(protobuf::io::CodedInputStream* stream, 319db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Example* example) { 320db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(stream != nullptr); 321db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(example != nullptr); 32289f358fdfd701b4118fe0f80e36f9cd098ee691eKiril Gorovoy // Loop over the input stream which may contain multiple serialized Example 32389f358fdfd701b4118fe0f80e36f9cd098ee691eKiril Gorovoy // protos merged together as strings. This behavior is consistent with Proto's 32489f358fdfd701b4118fe0f80e36f9cd098ee691eKiril Gorovoy // ParseFromString when string representations are concatenated. 32589f358fdfd701b4118fe0f80e36f9cd098ee691eKiril Gorovoy while (!stream->ExpectAtEnd()) { 32678be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower if (!stream->ExpectTag(kDelimitedTag(1))) { 32778be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower if (!SkipExtraneousTag(stream)) return false; 32878be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower continue; 32978be42e00ec29e507edbf92014709dc1b7ee6a38A. Unique TensorFlower } 330353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (!ParseFeatures(stream, example)) return false; 331db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 332db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 333db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 334db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 335db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseExample(StringPiece serialized, parsed::Example* example) { 336db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(example != nullptr); 337db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower protobuf::io::CodedInputStream stream( 338db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower reinterpret_cast<const uint8*>(serialized.data()), serialized.size()); 339db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower EnableAliasing(&stream); 340db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return ParseExample(&stream, example); 341db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 342db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 343db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace 344db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 345db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool TestFastParse(const string& serialized, Example* example) { 346db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(example != nullptr); 347db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Example parsed_example; 348db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseExample(serialized, &parsed_example)) return false; 349db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto& features = *example->mutable_features(); 350353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower size_t parsed_example_size = parsed_example.size(); 351353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower for (size_t i = 0; i < parsed_example_size; ++i) { 352353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // This is a logic that standard protobuf parsing is implementing. 353353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // I.e. last entry in the map overwrites all the previous ones. 354353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower parsed::FeatureMapEntry& name_and_feature = 355353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower parsed_example[parsed_example_size - i - 1]; 356353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower string name = name_and_feature.first.ToString(); 357353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if ((*features.mutable_feature()).count(name) > 0) continue; 358353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 359353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower auto& value = (*features.mutable_feature())[name]; 360db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DataType dtype; 361353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (!name_and_feature.second.ParseDataType(&dtype).ok()) return false; 362db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (dtype) { 363db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INVALID: 364db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 365db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 3667705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower SmallVector<string> list; 367353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (!name_and_feature.second.ParseBytesList(&list)) return false; 368db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto* result_list = value.mutable_bytes_list(); 369db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (auto& bytes : list) { 37009326f005455a32c5d9276919db4e4b8de4c3117A. Unique TensorFlower auto* new_value = result_list->add_value(); 37109326f005455a32c5d9276919db4e4b8de4c3117A. Unique TensorFlower new_value->swap(bytes); 372db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 373db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 374db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 375db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 3767705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower SmallVector<float> list; 377353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (!name_and_feature.second.ParseFloatList(&list)) return false; 378db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto* result_list = value.mutable_float_list(); 379db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (float f : list) { 380db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result_list->add_value(f); 381db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 382db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 383db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 384db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 3857705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower SmallVector<int64> list; 386353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (!name_and_feature.second.ParseInt64List(&list)) return false; 387db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto* result_list = value.mutable_int64_list(); 388db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (int64 i : list) { 389db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result_list->add_value(i); 390db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 391db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 392db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 393db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 39488cdf1f81fa1938c5bb81c5d293fc0ed0758cadcA. Unique TensorFlower LOG(FATAL) << "Should not happen."; 395db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 396db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 397db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return true; 398db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 399db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 400db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower// ----------------------------------------------------------------------------- 401db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 402db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace { 403db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 404db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing Config = FastParseExampleConfig; 405db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 406db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowervoid ParallelFor(const std::function<void(size_t)>& f, size_t n, 407db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower thread::ThreadPool* thread_pool) { 408db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (n == 0) return; 4097705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower if (thread_pool == nullptr) { 4107705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower for (size_t i = 0; i < n; ++i) { 411db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower f(i); 4127705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower } 4137705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower } else { 4147705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower BlockingCounter counter(n - 1); 4157705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower for (size_t i = 1; i < n; ++i) { 4167705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower thread_pool->Schedule([i, &f, &counter] { 4177705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower f(i); 4187705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower counter.DecrementCount(); 4197705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower }); 4207705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower } 4217705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower f(0); 4227705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower counter.Wait(); 423db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 424db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 425db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 426db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerenum class Type { Sparse, Dense }; 427db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 428db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerstruct SparseBuffer { 429db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Features are in one of the 3 vectors below depending on config's dtype. 430db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Other 2 vectors remain empty. 4317705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower SmallVector<string> bytes_list; 4327705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower SmallVector<float> float_list; 4337705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower SmallVector<int64> int64_list; 434db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 435db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Features of example i are elements with indices 436db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // from example_end_indices[i-1] to example_end_indices[i]-1 on the 437db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // appropriate xxxxx_list 438db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<size_t> example_end_indices; 439db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}; 440db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 441db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerstruct SeededHasher { 442db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint64 operator()(StringPiece s) const { 443db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Hash64(s.data(), s.size(), seed); 444db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 445db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower uint64 seed{0xDECAFCAFFE}; 446db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}; 447db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 448c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlowertemplate <typename T> 449c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlowerclass LimitedArraySlice { 450c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower public: 451c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower LimitedArraySlice(T* begin, size_t num_elements) 452c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower : current_(begin), end_(begin + num_elements) {} 453c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower 454c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower // May return negative if there were push_back calls after slice was filled. 455c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower int64 EndDistance() const { return end_ - current_; } 456c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower 457c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower // Attempts to push value to the back of this. If the slice has 458c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower // already been filled, this method has no effect on the underlying data, but 459c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower // it changes the number returned by EndDistance into negative values. 460c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower void push_back(T&& value) { 461c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower if (EndDistance() > 0) *current_ = std::move(value); 462c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower ++current_; 463c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower } 464c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower 465c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower private: 466c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower T* current_; 467c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower T* end_; 468c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower}; 469c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower 47096f3023b6a8b154c3840776c5feff3e028860a36Derek Murrayvoid LogDenseFeatureDataLoss(StringPiece feature_name) { 47196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LOG(WARNING) << "Data loss! Feature '" << feature_name 47296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray << "' is present in multiple concatenated " 47396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "tf.Examples. Ignoring all but last one."; 47496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray static auto* duplicated_dense_feature = monitoring::Counter<0>::New( 47596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "/tensorflow/core/util/example_proto_fast_parsing/" 47696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "duplicated_dense_feature", 47796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "Dense feature appears twice in a tf.Example"); 47896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray duplicated_dense_feature->GetCell()->IncrementBy(1); 47996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray} 48096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 48196f3023b6a8b154c3840776c5feff3e028860a36Derek Murrayvoid LogSparseFeatureDataLoss(StringPiece feature_name) { 48296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LOG(WARNING) << "Data loss! Feature '" << feature_name 48396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray << "' is present in multiple concatenated " 48496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "tf.Examples. Ignoring all but last one."; 48596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray static auto* duplicated_sparse_feature = monitoring::Counter<0>::New( 48696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "/tensorflow/core/util/example_proto_fast_parsing/" 48796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "duplicated_sparse_feature", 48896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "Sparse feature appears twice in a tf.Example"); 48996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray duplicated_sparse_feature->GetCell()->IncrementBy(1); 49096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray} 49196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 492db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus FastParseSerializedExample( 493db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const string& serialized_example, const string& example_name, 494db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const size_t example_index, const Config& config, 495db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const PresizedCuckooMap<std::pair<size_t, Type>>& config_index, 496db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower SeededHasher hasher, std::vector<Tensor>* output_dense, 497780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo std::vector<SparseBuffer>* output_varlen_dense, 498db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<SparseBuffer>* output_sparse) { 499db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(output_dense != nullptr); 500db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(output_sparse != nullptr); 501db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Example parsed_example; 502db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ParseExample(serialized_example, &parsed_example)) { 503db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument("Could not parse example input, value: '", 504db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower serialized_example, "'"); 505db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 506353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower std::vector<int64> sparse_feature_last_example(config.sparse.size(), -1); 507353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower std::vector<int64> dense_feature_last_example(config.dense.size(), -1); 508db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 509db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Handle features present in the example. 510353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower const size_t parsed_example_size = parsed_example.size(); 511353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower for (size_t i = 0; i < parsed_example_size; ++i) { 512353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // This is a logic that standard protobuf parsing is implementing. 513353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // I.e. last entry in the map overwrites all the previous ones. 514353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower parsed::FeatureMapEntry& name_and_feature = 515353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower parsed_example[parsed_example_size - i - 1]; 516353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 517353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower const StringPiece feature_name = name_and_feature.first; 518db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower parsed::Feature& feature = name_and_feature.second; 519353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 520db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::pair<size_t, Type> d_and_type; 521353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower uint64 h = hasher(feature_name); 522db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!config_index.Find(h, &d_and_type)) continue; 523353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 524db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t d = d_and_type.first; 525353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower bool is_dense = d_and_type.second == Type::Dense; 526353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 527353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower { 528353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // Testing for PresizedCuckooMap collision. 529353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // TODO(lew): Use dense_hash_map and avoid this and hasher creation. 530353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower const string& config_feature_name = is_dense 531353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower ? config.dense[d].feature_name 532353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower : config.sparse[d].feature_name; 533353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (feature_name != config_feature_name) continue; 534353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower } 535db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 536353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower auto example_error = [&](StringPiece suffix) { 537780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return errors::InvalidArgument("Name: ", example_name, 538780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo ", Key: ", feature_name, 539780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo ", Index: ", example_index, ". ", suffix); 540353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower }; 541353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 542353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower auto parse_error = [&] { 543353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower return example_error("Can't parse serialized Example."); 544db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }; 545db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 546353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower DataType example_dtype; 547353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower TF_RETURN_IF_ERROR(feature.ParseDataType(&example_dtype)); 548353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 549353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (is_dense) { 550db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype == DT_INVALID) continue; 551db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 552353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // If feature was already visited, skip. 553353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // Compare comment at the beginning of the loop. 554353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (dense_feature_last_example[d] == example_index) { 55596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LogDenseFeatureDataLoss(feature_name); 556353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower continue; 557353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower } 558353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower dense_feature_last_example[d] = example_index; 559353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 560db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != config.dense[d].dtype) { 561780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return example_error(strings::StrCat( 562780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo "Data types don't match. Data type: ", 563780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo DataTypeString(example_dtype), 56492eb06d2e4d98bcd58c8f4d7c68de0d3c637e181A. Unique TensorFlower " but expected type: ", DataTypeString(config.dense[d].dtype))); 565db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 566780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (!config.dense[d].variable_length) { 567780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo Tensor& out = (*output_dense)[d]; 568780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 569780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo const std::size_t num_elements = config.dense[d].elements_per_stride; 570780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo const std::size_t offset = example_index * num_elements; 571780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 572780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo auto shape_error = [&](size_t size, StringPiece type_str) { 573780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return example_error(strings::StrCat( 574780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo "Number of ", type_str, 575780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo " values != expected. " 576780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo "Values size: ", 577780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo size, 578780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo " but output shape: ", config.dense[d].shape.DebugString())); 579780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo }; 580780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 581780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo switch (config.dense[d].dtype) { 582780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo case DT_INT64: { 583780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo auto out_p = out.flat<int64>().data() + offset; 584780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo LimitedArraySlice<int64> slice(out_p, num_elements); 585780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (!feature.ParseInt64List(&slice)) return parse_error(); 586780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (slice.EndDistance() != 0) { 587780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return shape_error(num_elements - slice.EndDistance(), "int64"); 588780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 589780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo break; 590780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 591780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo case DT_FLOAT: { 592780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo auto out_p = out.flat<float>().data() + offset; 593780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo LimitedArraySlice<float> slice(out_p, num_elements); 594780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (!feature.ParseFloatList(&slice)) return parse_error(); 595780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (slice.EndDistance() != 0) { 596780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return shape_error(num_elements - slice.EndDistance(), "float"); 597780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 598780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo break; 599780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 600780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo case DT_STRING: { 601780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo auto out_p = out.flat<string>().data() + offset; 602780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo LimitedArraySlice<string> slice(out_p, num_elements); 603780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (!feature.ParseBytesList(&slice)) return parse_error(); 604780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (slice.EndDistance() != 0) { 605780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return shape_error(num_elements - slice.EndDistance(), "bytes"); 606780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 607780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo break; 608780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 609780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo default: 61088cdf1f81fa1938c5bb81c5d293fc0ed0758cadcA. Unique TensorFlower LOG(FATAL) << "Should not happen."; 611780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 612780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } else { // if variable length 613780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo SparseBuffer& out = (*output_varlen_dense)[d]; 614db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 615780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo const std::size_t num_elements = config.dense[d].elements_per_stride; 616db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 617780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (example_dtype != DT_INVALID && 618780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo example_dtype != config.dense[d].dtype) { 619780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return example_error(strings::StrCat( 620780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo "Data types don't match. ", 621780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo "Expected type: ", DataTypeString(config.dense[d].dtype))); 622780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 623db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 624780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo auto shape_error = [&](size_t size, StringPiece type_str) { 625780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return example_error(strings::StrCat( 626780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo "Number of ", type_str, 627780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo " values is not a multiple of stride length. Saw ", size, 628780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo " values but output shape is: ", 629780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo config.dense[d].shape.DebugString())); 630780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo }; 631780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 632780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo switch (config.dense[d].dtype) { 633780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo case DT_INT64: { 634780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (example_dtype != DT_INVALID) { 635780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (!feature.ParseInt64List(&out.int64_list)) { 636780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return parse_error(); 637780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 638780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (out.int64_list.size() % num_elements != 0) { 639780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return shape_error(out.int64_list.size(), "int64"); 640780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 641780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 642780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo out.example_end_indices.push_back(out.int64_list.size()); 643780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo break; 644c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower } 645780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo case DT_FLOAT: { 646780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (example_dtype != DT_INVALID) { 647780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (!feature.ParseFloatList(&out.float_list)) { 648780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return parse_error(); 649780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 650780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (out.float_list.size() % num_elements != 0) { 651780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return shape_error(out.float_list.size(), "float"); 652780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 653780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 654780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo out.example_end_indices.push_back(out.float_list.size()); 655780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo break; 656c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower } 657780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo case DT_STRING: { 658780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (example_dtype != DT_INVALID) { 659780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (!feature.ParseBytesList(&out.bytes_list)) { 660780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return parse_error(); 661780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 662780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (out.bytes_list.size() % num_elements != 0) { 663780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return shape_error(out.bytes_list.size(), "bytes"); 664780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 665780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 666780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo out.example_end_indices.push_back(out.bytes_list.size()); 667780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo break; 668db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 669780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo default: 67088cdf1f81fa1938c5bb81c5d293fc0ed0758cadcA. Unique TensorFlower LOG(FATAL) << "Should not happen."; 671db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 672db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 673db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } else { 674353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // If feature was already visited, skip. 675353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower // Compare comment at the beginning of the loop. 676353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (sparse_feature_last_example[d] == example_index) { 67796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LogSparseFeatureDataLoss(feature_name); 678353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower continue; 679353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower } 680353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower sparse_feature_last_example[d] = example_index; 681353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower 682db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Handle sparse features. 683db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower SparseBuffer& out = (*output_sparse)[d]; 684db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != DT_INVALID && 685db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower example_dtype != config.sparse[d].dtype) { 686780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return example_error(strings::StrCat( 687780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo "Data types don't match. ", 6881ca4b32229b7f32ebb048d263302c297496f5575Brian Williammee "Expected type: ", DataTypeString(config.sparse[d].dtype), 6891ca4b32229b7f32ebb048d263302c297496f5575Brian Williammee ", Actual type: ", DataTypeString(example_dtype))); 690db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 691db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 692db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (config.sparse[d].dtype) { 693db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 694db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != DT_INVALID) { 695db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!feature.ParseInt64List(&out.int64_list)) { 696353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower return parse_error(); 697db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 698db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 699db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.push_back(out.int64_list.size()); 700db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 701db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 702db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 703db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != DT_INVALID) { 704db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!feature.ParseFloatList(&out.float_list)) { 705353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower return parse_error(); 706db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 707db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 708db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.push_back(out.float_list.size()); 709db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 710db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 711db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 712db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (example_dtype != DT_INVALID) { 713db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!feature.ParseBytesList(&out.bytes_list)) { 714353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower return parse_error(); 715db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 716db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 717db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.push_back(out.bytes_list.size()); 718db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 719db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 720db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 72188cdf1f81fa1938c5bb81c5d293fc0ed0758cadcA. Unique TensorFlower LOG(FATAL) << "Should not happen."; 722db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 723db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 724db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 725db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 726780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo // Handle missing dense features for fixed strides. 727db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.dense.size(); ++d) { 728780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (config.dense[d].variable_length) continue; 729353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (dense_feature_last_example[d] == example_index) continue; 730db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (config.dense[d].default_value.NumElements() == 0) { 7316e6deda498b0188bf2c1787730be174c61555386A. Unique TensorFlower return errors::InvalidArgument( 7326e6deda498b0188bf2c1787730be174c61555386A. Unique TensorFlower "Name: ", example_name, ", Feature: ", config.dense[d].feature_name, 7336e6deda498b0188bf2c1787730be174c61555386A. Unique TensorFlower " (data type: ", DataTypeString(config.dense[d].dtype), ")", 7346e6deda498b0188bf2c1787730be174c61555386A. Unique TensorFlower " is required but could not be found."); 735db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 736db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const Tensor& in = config.dense[d].default_value; 737db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Tensor& out = (*output_dense)[d]; 738db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const std::size_t num_elements = in.shape().num_elements(); 739db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const std::size_t offset = example_index * num_elements; 740db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 741db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (config.dense[d].dtype) { 742db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 743db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy_n(in.flat<int64>().data(), num_elements, 744db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.flat<int64>().data() + offset); 745db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 746db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 747db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 748db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy_n(in.flat<float>().data(), num_elements, 749db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.flat<float>().data() + offset); 750db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 751db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 752db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 753db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy_n(in.flat<string>().data(), num_elements, 754db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.flat<string>().data() + offset); 755db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 756db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 757db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 75888cdf1f81fa1938c5bb81c5d293fc0ed0758cadcA. Unique TensorFlower LOG(FATAL) << "Should not happen."; 759db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 760db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 761db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 762780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo // Handle missing varlen dense features. 763780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo for (size_t d = 0; d < config.dense.size(); ++d) { 764780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (!config.dense[d].variable_length) continue; 765780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (dense_feature_last_example[d] == example_index) continue; 766780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo SparseBuffer& out = (*output_varlen_dense)[d]; 767780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo size_t prev_example_end_index = 768780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo out.example_end_indices.empty() ? 0 : out.example_end_indices.back(); 769780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo out.example_end_indices.push_back(prev_example_end_index); 770780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 771780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 772db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Handle missing sparse features. 773db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.sparse.size(); ++d) { 774353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower if (sparse_feature_last_example[d] == example_index) continue; 775db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower SparseBuffer& out = (*output_sparse)[d]; 776db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t prev_example_end_index = 777db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.empty() ? 0 : out.example_end_indices.back(); 778db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out.example_end_indices.push_back(prev_example_end_index); 779db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 780db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 781db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 782db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 783db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 784db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus CheckConfigDataType(DataType dtype) { 785db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (dtype) { 786db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: 787db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: 788db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: 789db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 790db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 791db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::InvalidArgument("Invalid config dtype: ", 792db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DataTypeString(dtype)); 793db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 794db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 795db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 796780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdotemplate <typename T> 797780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdoconst SmallVector<T>& GetListFromBuffer(const SparseBuffer& buffer); 798780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 799780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdotemplate <> 800780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdoconst SmallVector<int64>& GetListFromBuffer<int64>(const SparseBuffer& buffer) { 801780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return buffer.int64_list; 802780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo} 803780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdotemplate <> 804780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdoconst SmallVector<float>& GetListFromBuffer<float>(const SparseBuffer& buffer) { 805780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return buffer.float_list; 806780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo} 807780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdotemplate <> 808780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdoconst SmallVector<string>& GetListFromBuffer<string>( 809780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo const SparseBuffer& buffer) { 810780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo return buffer.bytes_list; 811780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo} 812780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 813780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdotemplate <typename T> 814780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdovoid CopyOrMoveBlock(const T* b, const T* e, T* t) { 815780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo std::copy(b, e, t); 816780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo} 817780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdotemplate <> 818780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdovoid CopyOrMoveBlock(const string* b, const string* e, string* t) { 819780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo std::move(b, e, t); 820780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo} 821780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 822780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdotemplate <typename T> 823780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdovoid FillAndCopyVarLen( 824780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo const int d, const size_t num_elements, 825504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo const size_t num_elements_per_minibatch, const Config& config, 826780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo const std::vector<std::vector<SparseBuffer>>& varlen_dense_buffers, 827780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo Tensor* values) { 828780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo const Tensor& default_value = config.dense[d].default_value; 829780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 830780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo // Copy-fill the tensors (creating the zero/fill-padding) 831780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo std::fill(values->flat<T>().data(), values->flat<T>().data() + num_elements, 832780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo default_value.flat<T>()(0)); 833780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 834504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo // Data is [batch_size, max_num_elements, data_stride_size] 835504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo // and num_elements_per_minibatch = max_num_elements * data_stride_size 836504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo auto data = values->flat<T>().data(); 837504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo 838780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo // Iterate over minibatch elements 839780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo for (size_t i = 0; i < varlen_dense_buffers.size(); ++i) { 840780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo const SparseBuffer& buffer = varlen_dense_buffers[i][d]; 841504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo // Number of examples being stored in this buffer 842504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo const auto& end_indices = buffer.example_end_indices; 843504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo const size_t examples_in_buffer = end_indices.size(); 844504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo // const size_t stride_size = config.dense[d].elements_per_stride; 845780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 846504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo const auto& list = GetListFromBuffer<T>(buffer); 847780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo auto list_ptr = list.begin(); 848504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo 849504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo size_t elements_tally = 0; 850504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo // Iterate through all the examples stored in this buffer. 851504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo for (size_t j = 0; j < examples_in_buffer; ++j) { 852504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo // Number of elements stored for this example. 853504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo const size_t num_elems = end_indices[j] - elements_tally; 854504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo CopyOrMoveBlock(list_ptr, list_ptr + num_elems, data); 855504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo // Move forward this many elements in the varlen buffer. 856504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo list_ptr += num_elems; 857504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo // Move forward to the next minibatch entry in the values output. 858504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo data += num_elements_per_minibatch; 859504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo elements_tally = end_indices[j]; 860780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 861504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo DCHECK(elements_tally == list.size()); 862780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 863780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo} 864780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 865db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace 866db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 867db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus FastParseExample(const Config& config, 868db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower gtl::ArraySlice<string> serialized, 869db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower gtl::ArraySlice<string> example_names, 870db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower thread::ThreadPool* thread_pool, Result* result) { 871db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower DCHECK(result != nullptr); 872db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Check config so we can safely CHECK(false) in switches on config.*.dtype 873db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (auto& c : config.sparse) { 874db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype)); 875db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 876db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (auto& c : config.dense) { 877db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype)); 878db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 879db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 880db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t config_size = config.dense.size() + config.sparse.size(); 881db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower SeededHasher hasher; 882db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Build config index. 883db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower PresizedCuckooMap<std::pair<size_t, Type>> config_index(config_size); 884db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower bool ok = true; 885db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 0; i < 1000; ++i) { 886db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.dense.size(); ++d) { 887db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ok &= config_index.InsertUnique(hasher(config.dense[d].feature_name), 888db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower {d, Type::Dense}); 889db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 890db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.sparse.size(); ++d) { 891db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ok &= config_index.InsertUnique(hasher(config.sparse[d].feature_name), 892db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower {d, Type::Sparse}); 893db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 894db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (ok) break; 895db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower LOG(WARNING) << "Collision found. This should happen only if you have " 896db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower "around 2^32 entries in your config."; 897db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower hasher.seed++; 898db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower config_index.Clear(config_size); 899db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 900db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!ok) { 901db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return errors::Internal( 902db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower "Could not avoid collision. This should not happen."); 903db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 904db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 905780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo // Allocate dense output for fixed length dense values 906780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo // (variable-length dense and sparse have to be buffered). 907780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo std::vector<Tensor> fixed_dense_values(config.dense.size()); 908db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.dense.size(); ++d) { 909780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (config.dense[d].variable_length) continue; 910db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TensorShape out_shape; 911db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out_shape.AddDim(serialized.size()); 912db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (const int64 dim : config.dense[d].shape.dim_sizes()) { 913db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower out_shape.AddDim(dim); 914db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 915780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo fixed_dense_values[d] = Tensor(config.dense[d].dtype, out_shape); 916db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 917db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 918db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // This parameter affects performance in a big and data-dependent way. 9197705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower const size_t kMiniBatchSizeBytes = 50000; 920db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 9217705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // Calculate number of minibatches. 9227705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // In main regime make each minibatch around kMiniBatchSizeBytes bytes. 9237705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // Apply 'special logic' below for small and big regimes. 9247705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower const size_t num_minibatches = [&] { 9257705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower size_t result = 0; 926db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t minibatch_bytes = 0; 927db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 0; i < serialized.size(); i++) { 928db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (minibatch_bytes == 0) { // start minibatch 9297705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower result++; 930db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 931db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower minibatch_bytes += serialized[i].size() + 1; 932db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (minibatch_bytes > kMiniBatchSizeBytes) { 933db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower minibatch_bytes = 0; 934db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 935db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 9367705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // 'special logic' 9377705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower const size_t min_minibatches = std::min<size_t>(8, serialized.size()); 9387705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower const size_t max_minibatches = 64; 9397705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower return std::max<size_t>(min_minibatches, 9407705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower std::min<size_t>(max_minibatches, result)); 941db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }(); 942db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 9437705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower auto first_example_of_minibatch = [&](size_t minibatch) -> size_t { 9447705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower return (serialized.size() * minibatch) / num_minibatches; 9457705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower }; 9467705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower 9477705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // TODO(lew): A big performance low-hanging fruit here is to improve 9487705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // num_minibatches calculation to take into account actual amount of work 9497705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // needed, as the size in bytes is not perfect. Linear combination of 9507705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // size in bytes and average number of features per example is promising. 9517705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // Even better: measure time instead of estimating, but this is too costly 9527705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // in small batches. 9537705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower // Maybe accept outside parameter #num_minibatches? 954db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 955db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Do minibatches in parallel. 956db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<std::vector<SparseBuffer>> sparse_buffers(num_minibatches); 957780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo std::vector<std::vector<SparseBuffer>> varlen_dense_buffers(num_minibatches); 958db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::vector<Status> status_of_minibatch(num_minibatches); 959db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto ProcessMiniBatch = [&](size_t minibatch) { 960db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower sparse_buffers[minibatch].resize(config.sparse.size()); 961780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo varlen_dense_buffers[minibatch].resize(config.dense.size()); 9627705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower size_t start = first_example_of_minibatch(minibatch); 9637705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower size_t end = first_example_of_minibatch(minibatch + 1); 964db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t e = start; e < end; ++e) { 965db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower status_of_minibatch[minibatch] = FastParseSerializedExample( 966db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower serialized[e], 9677280dafca161eb3413ea120d3dd07c63e5254e72A. Unique TensorFlower (!example_names.empty() ? example_names[e] : "<unknown>"), e, config, 9687280dafca161eb3413ea120d3dd07c63e5254e72A. Unique TensorFlower config_index, hasher, &fixed_dense_values, 969780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo &varlen_dense_buffers[minibatch], &sparse_buffers[minibatch]); 970db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower if (!status_of_minibatch[minibatch].ok()) break; 971db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 972db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }; 973db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 974db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ParallelFor(ProcessMiniBatch, num_minibatches, thread_pool); 975db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 976db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (Status& status : status_of_minibatch) { 977db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TF_RETURN_IF_ERROR(status); 978db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 979db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 980780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo for (size_t d = 0; d < config.dense.size(); ++d) { 981780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo result->dense_values.push_back(std::move(fixed_dense_values[d])); 982780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 983780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 984db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Merge SparseBuffers from all minibatches for every config.sparse. 985780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo auto MergeSparseMinibatches = [&](size_t d) { 986db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Loop over minibatches 987db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t total_num_features = 0; 988db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t max_num_features = 0; 989db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (auto& sparse_values_tmp : sparse_buffers) { 990504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo const std::vector<size_t>& end_indices = 991db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower sparse_values_tmp[d].example_end_indices; 992db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower total_num_features += end_indices.back(); 993db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower max_num_features = std::max(max_num_features, end_indices[0]); 994db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 1; i < end_indices.size(); ++i) { 995db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t example_size = end_indices[i] - end_indices[i - 1]; 996db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower max_num_features = std::max(max_num_features, example_size); 997db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 998db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 999db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 1000db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TensorShape indices_shape; 1001db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower indices_shape.AddDim(total_num_features); 1002db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower indices_shape.AddDim(2); 1003db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result->sparse_indices.emplace_back(DT_INT64, indices_shape); 1004db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Tensor* indices = &result->sparse_indices.back(); 1005db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 1006db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower TensorShape values_shape; 1007db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower values_shape.AddDim(total_num_features); 1008db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result->sparse_values.emplace_back(config.sparse[d].dtype, values_shape); 1009db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower Tensor* values = &result->sparse_values.back(); 1010db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 1011db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower result->sparse_shapes.emplace_back(DT_INT64, TensorShape({2})); 1012db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower auto shapes_shape_t = result->sparse_shapes.back().vec<int64>(); 1013db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower shapes_shape_t(0) = serialized.size(); 1014db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower shapes_shape_t(1) = max_num_features; 1015db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 1016db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t offset = 0; 1017db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t i = 0; i < sparse_buffers.size(); ++i) { 1018db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower const SparseBuffer& buffer = sparse_buffers[i][d]; 1019db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 1020db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Update indices. 1021db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower int64* ix_p = &indices->matrix<int64>()(offset, 0); 1022db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t delta = 0; 10237705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower size_t example_index = first_example_of_minibatch(i); 1024db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t example_end_index : buffer.example_end_indices) { 1025db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower size_t feature_index = 0; 1026db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (; delta < example_end_index; ++delta) { 1027db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Column 0: example index 1028db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *ix_p = example_index; 1029db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Column 1: the feature index buffer example 1030db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower *(ix_p + 1) = feature_index; 1031db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ix_p += 2; 1032db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ++feature_index; 1033db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 1034db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower ++example_index; 1035db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 1036db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 1037db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower // Copy values over. 1038db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower switch (config.sparse[d].dtype) { 1039db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_INT64: { 1040db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy(buffer.int64_list.begin(), buffer.int64_list.end(), 1041db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower values->flat<int64>().data() + offset); 1042db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 1043db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 1044db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_FLOAT: { 1045db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::copy(buffer.float_list.begin(), buffer.float_list.end(), 1046db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower values->flat<float>().data() + offset); 1047db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 1048db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 1049db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower case DT_STRING: { 1050db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower std::move(buffer.bytes_list.begin(), buffer.bytes_list.end(), 1051db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower values->flat<string>().data() + offset); 1052db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower break; 1053db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 1054db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower default: 105588cdf1f81fa1938c5bb81c5d293fc0ed0758cadcA. Unique TensorFlower LOG(FATAL) << "Should not happen."; 1056db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 1057db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 1058db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower offset += delta; 1059db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 1060db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower }; 1061db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 1062780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo // Merge SparseBuffers from all minibatches for every config.dense having 1063780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo // variable_length. 1064780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo auto MergeDenseVarLenMinibatches = [&](size_t d) { 1065780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (!config.dense[d].variable_length) return; 1066780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 1067780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo // Loop over minibatches 1068780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo size_t max_num_features = 0; 1069780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo for (auto& dense_values_tmp : varlen_dense_buffers) { 1070780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo std::vector<size_t>& end_indices = 1071780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo dense_values_tmp[d].example_end_indices; 1072780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo max_num_features = std::max(max_num_features, end_indices[0]); 1073780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo for (size_t i = 1; i < end_indices.size(); ++i) { 1074780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo size_t example_size = end_indices[i] - end_indices[i - 1]; 1075780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo max_num_features = std::max(max_num_features, example_size); 1076780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 1077780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 1078780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 1079780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo const size_t stride_size = config.dense[d].elements_per_stride; 1080780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo const size_t max_num_elements = max_num_features / stride_size; 1081780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo TensorShape values_shape; 1082780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo DCHECK(max_num_features % config.dense[d].elements_per_stride == 0); 1083780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo const size_t batch_size = serialized.size(); 1084780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo values_shape.AddDim(batch_size); 1085780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo values_shape.AddDim(max_num_elements); 1086780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo for (int i = 1; i < config.dense[d].shape.dims(); ++i) { 1087780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo values_shape.AddDim(config.dense[d].shape.dim_size(i)); 1088780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 1089780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo Tensor values(config.dense[d].dtype, values_shape); 1090780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo result->dense_values[d] = values; 1091780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo const size_t num_elements = values.NumElements(); 1092780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 1093780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo // Nothing to write, exit early. 1094780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo if (num_elements == 0) return; 1095780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 1096780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo const size_t num_elements_per_minibatch = num_elements / batch_size; 1097780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 1098780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo switch (config.dense[d].dtype) { 1099780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo case DT_INT64: { 1100780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo FillAndCopyVarLen<int64>(d, num_elements, num_elements_per_minibatch, 1101504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo config, varlen_dense_buffers, &values); 1102780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo break; 1103780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 1104780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo case DT_FLOAT: { 1105780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo FillAndCopyVarLen<float>(d, num_elements, num_elements_per_minibatch, 1106504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo config, varlen_dense_buffers, &values); 1107780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo break; 1108780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 1109780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo case DT_STRING: { 1110780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo FillAndCopyVarLen<string>(d, num_elements, num_elements_per_minibatch, 1111504816b752cea83a848d3b3eb1cdf94b8bea1596Eugene Brevdo config, varlen_dense_buffers, &values); 1112780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo break; 1113780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 1114780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo default: 111588cdf1f81fa1938c5bb81c5d293fc0ed0758cadcA. Unique TensorFlower LOG(FATAL) << "Should not happen."; 1116780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 1117780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo }; 1118780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 1119780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo for (size_t d = 0; d < config.dense.size(); ++d) { 1120780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo MergeDenseVarLenMinibatches(d); 1121780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo } 1122780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo 1123db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower for (size_t d = 0; d < config.sparse.size(); ++d) { 1124780bc6b4d98665125c43685b20eeba6ad2804c0cEugene Brevdo MergeSparseMinibatches(d); 1125db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower } 1126db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 1127db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower return Status::OK(); 1128db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} 1129db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower 113096f3023b6a8b154c3840776c5feff3e028860a36Derek MurrayStatus FastParseSingleExample(const Config& config, const string& serialized, 113196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray Result* result) { 113296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray DCHECK(result != nullptr); 113396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Check config so we can safely CHECK(false) in switches on config.*.dtype 113496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray for (auto& c : config.sparse) { 113596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype)); 113696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 113796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray for (auto& c : config.dense) { 113896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype)); 113996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 114096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 114196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // TODO(mrry): Cache the construction of this map at Op construction time. 114296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray size_t config_size = config.dense.size() + config.sparse.size(); 114396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray SeededHasher hasher; 114496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Build config index. 114596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray PresizedCuckooMap<std::pair<size_t, Type>> config_index(config_size); 114696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray bool ok = true; 114796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray for (size_t i = 0; i < 1000; ++i) { 114896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray for (size_t d = 0; d < config.dense.size(); ++d) { 114996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray ok &= config_index.InsertUnique(hasher(config.dense[d].feature_name), 115096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray {d, Type::Dense}); 115196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 115296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray for (size_t d = 0; d < config.sparse.size(); ++d) { 115396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray ok &= config_index.InsertUnique(hasher(config.sparse[d].feature_name), 115496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray {d, Type::Sparse}); 115596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 115696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (ok) break; 115796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LOG(WARNING) << "Collision found. This should happen only if you have " 115896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "around 2^32 entries in your config."; 115996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray hasher.seed++; 116096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray config_index.Clear(config_size); 116196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 116296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!ok) { 116396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return errors::Internal( 116496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "Could not avoid collision. This should not happen."); 116596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 116696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 116796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Allocate dense output tensors. 116896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray for (size_t d = 0; d < config.dense.size(); ++d) { 116996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!config.dense[d].variable_length) { 117096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray TensorShape values_shape; 117196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!config.dense[d].shape.AsTensorShape(&values_shape)) { 117296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return errors::Internal( 117396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "Fixed-length shape was not a statically defined shape."); 117496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 117596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray result->dense_values.emplace_back(config.dense[d].dtype, values_shape); 117696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } else { 117796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Variable-length tensor will be allocated later. 117896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray result->dense_values.emplace_back(); 117996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 118096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 118196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 118296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Allocate sparse output tensors. 118396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray for (size_t d = 0; d < config.sparse.size(); ++d) { 118496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // The dense_shape is always a vector of length 1. 118596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray result->sparse_shapes.emplace_back(DT_INT64, TensorShape({1})); 118696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Variable-length tensors will be allocated later. 118796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray result->sparse_indices.emplace_back(); 118896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray result->sparse_values.emplace_back(); 118996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 119096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 119196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray parsed::Example parsed_example; 119296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!ParseExample(serialized, &parsed_example)) { 119396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return errors::InvalidArgument("Could not parse example input, value: '", 119496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray serialized, "'"); 119596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 119696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray std::vector<bool> sparse_feature_already_seen(config.sparse.size(), false); 119796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray std::vector<bool> dense_feature_already_seen(config.dense.size(), false); 119896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 119996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Handle features present in the example. 120096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray const size_t parsed_example_size = parsed_example.size(); 120196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray for (size_t i = 0; i < parsed_example_size; ++i) { 120296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // This is a logic that standard protobuf parsing is implementing. 120396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // I.e. last entry in the map overwrites all the previous ones. 120496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray parsed::FeatureMapEntry& name_and_feature = 120596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray parsed_example[parsed_example_size - i - 1]; 120696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 120796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray const StringPiece feature_name = name_and_feature.first; 120896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray parsed::Feature& feature = name_and_feature.second; 120996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 121096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray std::pair<size_t, Type> d_and_type; 121196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray uint64 h = hasher(feature_name); 121296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!config_index.Find(h, &d_and_type)) continue; 121396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 121496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray size_t d = d_and_type.first; 121596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray bool is_dense = d_and_type.second == Type::Dense; 121696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 121796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray { 121896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Testing for PresizedCuckooMap collision. 121996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // TODO(lew): Use dense_hash_map and avoid this and hasher creation. 122096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray const string& config_feature_name = is_dense 122196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray ? config.dense[d].feature_name 122296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray : config.sparse[d].feature_name; 122396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (feature_name != config_feature_name) continue; 122496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 122596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 122696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray auto example_error = [feature_name](StringPiece suffix) { 122796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return errors::InvalidArgument("Key: ", feature_name, ". ", suffix); 122896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray }; 122996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 123096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray auto parse_error = [feature_name] { 123196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return errors::InvalidArgument("Key: ", feature_name, 123296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray ". Can't parse serialized Example."); 123396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray }; 123496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 123596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray DataType example_dtype; 123696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray TF_RETURN_IF_ERROR(feature.ParseDataType(&example_dtype)); 123796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (example_dtype == DT_INVALID) continue; 123896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 123996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (is_dense && !config.dense[d].variable_length) { 124096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // If feature was already visited, skip. 124196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Compare comment at the beginning of the loop. 124296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (dense_feature_already_seen[d]) { 124396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LogDenseFeatureDataLoss(feature_name); 124496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray continue; 124596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 124696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray dense_feature_already_seen[d] = true; 124796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 124896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (example_dtype != config.dense[d].dtype) { 124996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return example_error(strings::StrCat( 125096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "Data types don't match. Data type: ", 125196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray DataTypeString(example_dtype), 125296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray " but expected type: ", DataTypeString(config.dense[d].dtype))); 125396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 125496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 125596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray Tensor* out = &result->dense_values[d]; 125696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray const std::size_t num_elements = config.dense[d].elements_per_stride; 125796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 125896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray switch (example_dtype) { 125996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray case DT_INT64: { 126096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray auto out_p = out->flat<int64>().data(); 126196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LimitedArraySlice<int64> slice(out_p, num_elements); 126296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!feature.ParseInt64List(&slice)) return parse_error(); 126396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (slice.EndDistance() != 0) { 126496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return parse_error(); 126596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 126696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray break; 126796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 126896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray case DT_FLOAT: { 126996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray auto out_p = out->flat<float>().data(); 127096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LimitedArraySlice<float> slice(out_p, num_elements); 127196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!feature.ParseFloatList(&slice)) return parse_error(); 127296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (slice.EndDistance() != 0) { 127396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return parse_error(); 127496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 127596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray break; 127696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 127796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray case DT_STRING: { 127896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray auto out_p = out->flat<string>().data(); 127996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LimitedArraySlice<string> slice(out_p, num_elements); 128096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!feature.ParseBytesList(&slice)) return parse_error(); 128196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (slice.EndDistance() != 0) { 128296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return parse_error(); 128396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 128496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray break; 128596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 128696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray default: 128796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LOG(FATAL) << "Should not happen."; 128896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 128996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 129096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } else { // if variable length 129196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray SparseBuffer out_temp; 129296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray const size_t num_elements_divisor = 129396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray is_dense ? config.dense[d].elements_per_stride : 1; 129496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray size_t num_elements; 129596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 129696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (is_dense) { 129796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // If feature was already visited, skip. 129896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Compare comment at the beginning of the loop. 129996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (dense_feature_already_seen[d]) { 130096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LogDenseFeatureDataLoss(feature_name); 130196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray continue; 130296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 130396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray dense_feature_already_seen[d] = true; 130496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (example_dtype != config.dense[d].dtype) { 130596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return example_error(strings::StrCat( 130696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "Data types don't match. Data type: ", 130796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray DataTypeString(example_dtype), 130896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray " but expected type: ", DataTypeString(config.dense[d].dtype))); 130996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 131096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } else { 131196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // If feature was already visited, skip. 131296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Compare comment at the beginning of the loop. 131396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (sparse_feature_already_seen[d]) { 131496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LogSparseFeatureDataLoss(feature_name); 131596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray continue; 131696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 131796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray sparse_feature_already_seen[d] = true; 131896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 131996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Handle sparse features. 132096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (example_dtype != DT_INVALID && 132196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray example_dtype != config.sparse[d].dtype) { 132296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return example_error(strings::StrCat( 132396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "Data types don't match. ", 132496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "Expected type: ", DataTypeString(config.sparse[d].dtype), 132596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray ", Actual type: ", DataTypeString(example_dtype))); 132696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 132796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 132896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 132996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray switch (example_dtype) { 133096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray case DT_INT64: { 133196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // TODO(mrry): Use the fact that the `int64_list` is packed to read 133296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // out the length and pre-allocate the output tensor. 133396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!feature.ParseInt64List(&out_temp.int64_list)) 133496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return parse_error(); 133596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray num_elements = out_temp.int64_list.size(); 133696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray break; 133796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 133896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray case DT_FLOAT: { 133996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // TODO(mrry): Use the fact that the `float_list` is packed to read 134096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // out the length and pre-allocate the output tensor. 134196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!feature.ParseFloatList(&out_temp.float_list)) 134296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return parse_error(); 134396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray num_elements = out_temp.float_list.size(); 134496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray break; 134596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 134696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray case DT_STRING: { 134796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray int actual_num_elements = 0; 134896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!feature.GetNumElementsInBytesList(&actual_num_elements)) { 134996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return parse_error(); 135096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 135196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray out_temp.bytes_list.reserve(actual_num_elements); 135296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!feature.ParseBytesList(&out_temp.bytes_list)) 135396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return parse_error(); 135496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray num_elements = out_temp.bytes_list.size(); 135596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray break; 135696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 135796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray default: 135896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LOG(FATAL) << "Should not happen. " << DataTypeString(example_dtype); 135996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 136096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 136196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (num_elements % num_elements_divisor != 0) { 136296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return parse_error(); 136396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 136496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 136596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray Tensor* out; 136696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (is_dense) { 136796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray TensorShape values_shape; 136896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray values_shape.AddDim(num_elements / num_elements_divisor); 136996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray for (int i = 1; i < config.dense[d].shape.dims(); ++i) { 137096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray values_shape.AddDim(config.dense[d].shape.dim_size(i)); 137196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 137296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 137396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray out = &result->dense_values[d]; 137496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray *out = Tensor(config.dense[d].dtype, values_shape); 137596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 137696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } else { 137796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray Tensor* out_indices = &result->sparse_indices[d]; 137896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray Tensor* out_dense_shape = &result->sparse_shapes[d]; 137996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray out = &result->sparse_values[d]; 138096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 138196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // TODO(mrry): Investigate the possibility of not materializing 138296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // the indices (and perhaps dense_shape) until they are needed. 138396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray *out_indices = Tensor( 138496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray DT_INT64, TensorShape({static_cast<int64>(num_elements), 1})); 138596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray auto indices_flat = out_indices->flat<int64>(); 138696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray for (size_t i = 0; i < num_elements; ++i) { 138796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray indices_flat(i) = static_cast<int64>(i); 138896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 138996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 139096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray *out_dense_shape = Tensor(DT_INT64, TensorShape({1})); 139196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray auto shapes_shape_t = out_dense_shape->vec<int64>(); 139296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray shapes_shape_t(0) = num_elements; 139396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 139496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray *out = Tensor(config.sparse[d].dtype, 139596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray TensorShape({static_cast<int64>(num_elements)})); 139696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 139796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 139896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray switch (example_dtype) { 139996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray case DT_INT64: { 140096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray CopyOrMoveBlock(out_temp.int64_list.begin(), 140196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray out_temp.int64_list.end(), out->flat<int64>().data()); 140296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray break; 140396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 140496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray case DT_FLOAT: { 140596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray CopyOrMoveBlock(out_temp.float_list.begin(), 140696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray out_temp.float_list.end(), out->flat<float>().data()); 140796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray break; 140896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 140996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray case DT_STRING: { 141096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray CopyOrMoveBlock(out_temp.bytes_list.begin(), 141196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray out_temp.bytes_list.end(), 141296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray out->flat<string>().data()); 141396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray break; 141496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 141596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray default: 141696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray LOG(FATAL) << "Should not happen."; 141796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 141896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 141996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 142096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 142196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Handle missing dense features. 142296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray for (size_t d = 0; d < config.dense.size(); ++d) { 142396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!dense_feature_already_seen[d]) { 142496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!config.dense[d].variable_length) { 142596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Handle missing fixed-length dense feature. 142696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (config.dense[d].default_value.NumElements() == 0) { 142796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return errors::InvalidArgument( 142896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray "Feature: ", config.dense[d].feature_name, 142996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray " (data type: ", DataTypeString(config.dense[d].dtype), ")", 143096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray " is required but could not be found."); 143196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 143296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray result->dense_values[d] = config.dense[d].default_value; 143396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } else { 143496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Handle missing varlen dense feature. 143596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray TensorShape empty_shape; 143696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray empty_shape.AddDim(0); 143796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray for (int i = 1; i < config.dense[d].shape.dims(); ++i) { 143896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray empty_shape.AddDim(config.dense[d].shape.dim_size(i)); 143996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 144096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray result->dense_values[d] = Tensor(config.dense[d].dtype, empty_shape); 144196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 144296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 144396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 144496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 144596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray // Handle missing sparse features. 144696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray for (size_t d = 0; d < config.sparse.size(); ++d) { 144796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray if (!sparse_feature_already_seen[d]) { 144896f3023b6a8b154c3840776c5feff3e028860a36Derek Murray result->sparse_indices[d] = Tensor(DT_INT64, TensorShape({0, 1})); 144996f3023b6a8b154c3840776c5feff3e028860a36Derek Murray result->sparse_values[d] = 145096f3023b6a8b154c3840776c5feff3e028860a36Derek Murray Tensor(config.sparse[d].dtype, TensorShape({0})); 145196f3023b6a8b154c3840776c5feff3e028860a36Derek Murray result->sparse_shapes[d].vec<int64>()(0) = 0; 145296f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 145396f3023b6a8b154c3840776c5feff3e028860a36Derek Murray } 145496f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 145596f3023b6a8b154c3840776c5feff3e028860a36Derek Murray return Status::OK(); 145696f3023b6a8b154c3840776c5feff3e028860a36Derek Murray} 145796f3023b6a8b154c3840776c5feff3e028860a36Derek Murray 1458db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace example 1459db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower} // namespace tensorflow 1460