example_proto_fast_parsing.cc revision 967376bdf3ae9007f8b4c996a4a260a911dfc409
1db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
3db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerLicensed under the Apache License, Version 2.0 (the "License");
4db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFloweryou may not use this file except in compliance with the License.
5db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerYou may obtain a copy of the License at
6db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
7db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    http://www.apache.org/licenses/LICENSE-2.0
8db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
9db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerUnless required by applicable law or agreed to in writing, software
10db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerdistributed under the License is distributed on an "AS IS" BASIS,
11db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerSee the License for the specific language governing permissions and
13db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerlimitations under the License.
14db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower==============================================================================*/
15db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/example_proto_fast_parsing.h"
16db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
17db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include <vector>
18db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
19db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/example/example.pb.h"
20db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/example/feature.pb_text.h"
21db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/numeric_op.h"
22db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/op_kernel.h"
23db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/register_types.h"
24db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/blocking_counter.h"
25db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/casts.h"
26db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/errors.h"
27db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/threadpool.h"
28db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/platform/logging.h"
29db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/platform/protobuf.h"
30db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/presized_cuckoo_map.h"
31db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/sparse/sparse_tensor.h"
32db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
33db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace tensorflow {
34db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace example {
35db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
36db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace {
37db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowertemplate <typename A>
38db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerauto EnableAliasing(A* a) -> decltype(a->EnableAliasing(true), void()) {
39db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  a->EnableAliasing(true);
40db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
41db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
42db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowertemplate <typename A>
43db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowervoid EnableAliasing(A&& a) {}
44db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
45db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFloweruint8 PeekTag(protobuf::io::CodedInputStream* stream) {
46db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(stream != nullptr);
47db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  const void* ptr;
48db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  int size;
49db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->GetDirectBufferPointer(&ptr, &size)) return 0;
50db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return *static_cast<const uint8*>(ptr);
51db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
52db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
53db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerconstexpr uint8 kVarintTag(uint tag) { return (tag << 3) | 0; }
54db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerconstexpr uint8 kDelimitedTag(uint tag) { return (tag << 3) | 2; }
55db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerconstexpr uint8 kFixed32Tag(uint tag) { return (tag << 3) | 5; }
56db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
57db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace parsed {
58db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
59db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower// ParseDataType has to be called first, then appropriate ParseZzzzList.
60db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerclass Feature {
61db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower public:
62db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  Feature() {}
63db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  Feature(StringPiece serialized) : serialized_(serialized) {}
64db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
65db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  Status ParseDataType(DataType* dtype) {
66db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    DCHECK(dtype != nullptr);
67db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (serialized_.empty()) {
68db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      *dtype = DT_INVALID;
69db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      return Status::OK();
70db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
71db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    uint8 oneof_tag = static_cast<uint8>(*serialized_.data());
72db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    serialized_.remove_prefix(1);
73db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    switch (oneof_tag) {
74db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case kDelimitedTag(1):
75db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        *dtype = DT_STRING;
76db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
77db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case kDelimitedTag(2):
78db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        *dtype = DT_FLOAT;
79db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
80db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case kDelimitedTag(3):
81db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        *dtype = DT_INT64;
82db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
83db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      default:
84db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        return errors::InvalidArgument("Unsuported datatype.");
85db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
86db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return Status::OK();
87db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
88db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
89db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  bool ParseBytesList(std::vector<string>* bytes_list) {
90db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    DCHECK(bytes_list != nullptr);
91db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    protobuf::io::CodedInputStream stream(
92db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size());
93db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
94db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    EnableAliasing(&stream);
95db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
96db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    uint32 length;
97db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!stream.ReadVarint32(&length)) return false;
98db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    auto limit = stream.PushLimit(length);
99db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
100db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    while (!stream.ExpectAtEnd()) {
101db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (!stream.ExpectTag(kDelimitedTag(1))) return false;
102db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      // parse string
103db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      uint32 bytes_length;
104db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (!stream.ReadVarint32(&bytes_length)) return false;
105db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      string bytes;
106db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (!stream.ReadString(&bytes, bytes_length)) return false;
107db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      bytes_list->push_back(std::move(bytes));
108db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
109db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    stream.PopLimit(limit);
110db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return true;
111db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
112db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
113db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  bool ParseFloatList(std::vector<float>* float_list) {
114db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    DCHECK(float_list != nullptr);
115db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    protobuf::io::CodedInputStream stream(
116db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size());
117db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    EnableAliasing(&stream);
118db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    uint32 length;
119db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!stream.ReadVarint32(&length)) return false;
120db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    auto limit = stream.PushLimit(length);
121db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
122db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!stream.ExpectAtEnd()) {
123db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      uint8 peek_tag = PeekTag(&stream);
124db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (peek_tag != kDelimitedTag(1) && peek_tag != kFixed32Tag(1)) {
125db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        return false;
126db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
127db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
128db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (peek_tag == kDelimitedTag(1)) {                       // packed
129db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        if (!stream.ExpectTag(kDelimitedTag(1))) return false;  // packed tag
130db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        uint32 packed_length;
131db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        if (!stream.ReadVarint32(&packed_length)) return false;
132db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        auto packed_limit = stream.PushLimit(packed_length);
133db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
134db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        while (!stream.ExpectAtEnd()) {
135db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          uint32 buffer32;
136db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!stream.ReadLittleEndian32(&buffer32)) return false;
137db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          float_list->push_back(bit_cast<float>(buffer32));
138db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
139db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
140db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        stream.PopLimit(packed_limit);
141db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      } else {  // non-packed
142db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        while (!stream.ExpectAtEnd()) {
143db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!stream.ExpectTag(kFixed32Tag(1))) return false;
144db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          uint32 buffer32;
145db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!stream.ReadLittleEndian32(&buffer32)) return false;
146db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          float_list->push_back(bit_cast<float>(buffer32));
147db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
148db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
149db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
150db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
151db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    stream.PopLimit(limit);
152db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return true;
153db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
154db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
155db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  bool ParseInt64List(std::vector<int64>* int64_list) {
156db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    DCHECK(int64_list != nullptr);
157db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    protobuf::io::CodedInputStream stream(
158db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size());
159db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    EnableAliasing(&stream);
160db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    uint32 length;
161db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!stream.ReadVarint32(&length)) return false;
162db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    auto limit = stream.PushLimit(length);
163db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
164db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!stream.ExpectAtEnd()) {
165db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      uint8 peek_tag = PeekTag(&stream);
166db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (peek_tag != kDelimitedTag(1) && peek_tag != kVarintTag(1)) {
167db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        return false;
168db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
169db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (peek_tag == kDelimitedTag(1)) {                       // packed
170db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        if (!stream.ExpectTag(kDelimitedTag(1))) return false;  // packed tag
171db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        uint32 packed_length;
172db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        if (!stream.ReadVarint32(&packed_length)) return false;
173db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        auto packed_limit = stream.PushLimit(packed_length);
174db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
175db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        while (!stream.ExpectAtEnd()) {
176967376bdf3ae9007f8b4c996a4a260a911dfc409A. Unique TensorFlower          protobuf_uint64 n;  // There is no API for int64
177db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!stream.ReadVarint64(&n)) return false;
178db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          int64_list->push_back(n);
179db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
180db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
181db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        stream.PopLimit(packed_limit);
182db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      } else {  // non-packed
183db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        while (!stream.ExpectAtEnd()) {
184db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!stream.ExpectTag(kVarintTag(1))) return false;
185967376bdf3ae9007f8b4c996a4a260a911dfc409A. Unique TensorFlower          protobuf_uint64 n;  // There is no API for int64
186db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!stream.ReadVarint64(&n)) return false;
187db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          int64_list->push_back(n);
188db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
189db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
190db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
191db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    stream.PopLimit(limit);
192db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return true;
193db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
194db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
195db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  StringPiece GetSerialized() const { return serialized_; }
196db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
197db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower private:
198db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // TODO(lew): Pair of uint8* would be more natural.
199db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  StringPiece serialized_;
200db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower};
201db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
202db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing FeatureMapEntry = std::pair<StringPiece, Feature>;
203db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing Example = std::vector<FeatureMapEntry>;
204db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
205db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}  // namespace parsed
206db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
207db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseString(protobuf::io::CodedInputStream* stream, StringPiece* result) {
208db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(stream != nullptr);
209db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(result != nullptr);
210db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  uint32 length;
211db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->ReadVarint32(&length)) return false;
212db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (length == 0) {
213db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    *result = StringPiece(nullptr, 0);
214db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return true;
215db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
216db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  const void* stream_alias;
217db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  int stream_size;
218db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->GetDirectBufferPointer(&stream_alias, &stream_size)) {
219db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return false;
220db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
221db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (static_cast<uint32>(stream_size) < length) return false;
222db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  *result = StringPiece(static_cast<const char*>(stream_alias), length);
223db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  stream->Skip(length);
224db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return true;
225db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
226db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
227db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseFeatureMapEntry(protobuf::io::CodedInputStream* stream,
228db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                          parsed::FeatureMapEntry* feature_map_entry) {
229db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(stream != nullptr);
230db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(feature_map_entry != nullptr);
231db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  uint32 length;
232db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->ReadVarint32(&length)) return false;
233db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  auto limit = stream->PushLimit(length);
234db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->ExpectTag(kDelimitedTag(1))) return false;
235db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!ParseString(stream, &feature_map_entry->first)) return false;
236db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->ExpectTag(kDelimitedTag(2))) return false;
237db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  StringPiece feature_string_piece;
238db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!ParseString(stream, &feature_string_piece)) return false;
239db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  feature_map_entry->second = parsed::Feature(feature_string_piece);
240db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->ExpectAtEnd()) return false;
241db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  stream->PopLimit(limit);
242db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return true;
243db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
244db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
245db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseFeatures(protobuf::io::CodedInputStream* stream,
246db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                   parsed::Example* example) {
247db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(stream != nullptr);
248db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(example != nullptr);
249db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  uint32 length;
250db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->ReadVarint32(&length)) return false;
251db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  auto limit = stream->PushLimit(length);
252db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  while (!stream->ExpectAtEnd()) {
253db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    parsed::FeatureMapEntry feature_map_entry;
254db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!stream->ExpectTag(kDelimitedTag(1))) return false;
255db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!ParseFeatureMapEntry(stream, &feature_map_entry)) return false;
256db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    example->push_back(std::move(feature_map_entry));
257db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
258db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  stream->PopLimit(limit);
259db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return true;
260db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
261db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
262db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseExample(protobuf::io::CodedInputStream* stream,
263db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                  parsed::Example* example) {
264db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(stream != nullptr);
265db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(example != nullptr);
266db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (stream->ExpectTag(kDelimitedTag(1))) {
267db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!ParseFeatures(stream, example)) return false;
268db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
269db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->ExpectAtEnd()) return false;
270db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return true;
271db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
272db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
273db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseExample(StringPiece serialized, parsed::Example* example) {
274db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(example != nullptr);
275db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  protobuf::io::CodedInputStream stream(
276db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      reinterpret_cast<const uint8*>(serialized.data()), serialized.size());
277db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  EnableAliasing(&stream);
278db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return ParseExample(&stream, example);
279db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
280db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
281db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}  // namespace
282db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
283db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool TestFastParse(const string& serialized, Example* example) {
284db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(example != nullptr);
285db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  parsed::Example parsed_example;
286db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!ParseExample(serialized, &parsed_example)) return false;
287db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  auto& features = *example->mutable_features();
288db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (parsed::FeatureMapEntry& entry : parsed_example) {
289db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    auto& value = (*features.mutable_feature())[entry.first.ToString()];
290db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    DataType dtype;
291db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!entry.second.ParseDataType(&dtype).ok()) return false;
292db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    switch (dtype) {
293db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_INVALID:
294db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
295db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_STRING: {
296db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        std::vector<string> list;
297db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        if (!entry.second.ParseBytesList(&list)) return false;
298db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        auto* result_list = value.mutable_bytes_list();
299db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        for (auto& bytes : list) {
300db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          result_list->add_value(std::move(bytes));
301db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
302db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
303db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
304db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_FLOAT: {
305db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        std::vector<float> list;
306db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        if (!entry.second.ParseFloatList(&list)) return false;
307db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        auto* result_list = value.mutable_float_list();
308db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        for (float f : list) {
309db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          result_list->add_value(f);
310db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
311db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
312db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
313db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_INT64: {
314db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        std::vector<int64> list;
315db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        if (!entry.second.ParseInt64List(&list)) return false;
316db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        auto* result_list = value.mutable_int64_list();
317db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        for (int64 i : list) {
318db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          result_list->add_value(i);
319db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
320db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
321db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
322db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      default:
323db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        CHECK(false) << "Should not happen.";
324db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
325db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
326db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return true;
327db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
328db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
329db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower// -----------------------------------------------------------------------------
330db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
331db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace {
332db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
333db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing Config = FastParseExampleConfig;
334db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
335db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowervoid ParallelFor(const std::function<void(size_t)>& f, size_t n,
336db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                 thread::ThreadPool* thread_pool) {
337db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(thread_pool != nullptr);
338db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (n == 0) return;
339db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  BlockingCounter counter(n - 1);
340db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (size_t i = 1; i < n; ++i) {
341db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    thread_pool->Schedule([i, &f, &counter] {
342db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      f(i);
343db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      counter.DecrementCount();
344db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    });
345db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
346db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  f(0);
347db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  counter.Wait();
348db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
349db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
350db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerenum class Type { Sparse, Dense };
351db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
352db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerstruct SparseBuffer {
353db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // TODO(lew): Use InlinedVector.
354db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Features are in one of the 3 vectors below depending on config's dtype.
355db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Other 2 vectors remain empty.
356db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  std::vector<string> bytes_list;
357db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  std::vector<float> float_list;
358db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  std::vector<int64> int64_list;
359db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
360db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Features of example i are elements with indices
361db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // from example_end_indices[i-1] to example_end_indices[i]-1 on the
362db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // appropriate xxxxx_list
363db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  std::vector<size_t> example_end_indices;
364db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower};
365db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
366db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerstruct SeededHasher {
367db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  uint64 operator()(StringPiece s) const {
368db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return Hash64(s.data(), s.size(), seed);
369db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
370db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  uint64 seed{0xDECAFCAFFE};
371db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower};
372db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
373db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus FastParseSerializedExample(
374db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    const string& serialized_example, const string& example_name,
375db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    const size_t example_index, const Config& config,
376db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    const PresizedCuckooMap<std::pair<size_t, Type>>& config_index,
377db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    SeededHasher hasher, std::vector<Tensor>* output_dense,
378db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    std::vector<SparseBuffer>* output_sparse) {
379db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(output_dense != nullptr);
380db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(output_sparse != nullptr);
381db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  parsed::Example parsed_example;
382db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!ParseExample(serialized_example, &parsed_example)) {
383db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return errors::InvalidArgument("Could not parse example input, value: '",
384db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                   serialized_example, "'");
385db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
386db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  constexpr size_t kMax = std::numeric_limits<size_t>::max();
387db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  std::vector<size_t> sparse_features_found(config.sparse.size(), kMax);
388db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  std::vector<size_t> dense_features_found(config.dense.size(), kMax);
389db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
390db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Handle features present in the example.
391db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (parsed::FeatureMapEntry& name_and_feature : parsed_example) {
392db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    parsed::Feature& feature = name_and_feature.second;
393db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    std::pair<size_t, Type> d_and_type;
394db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    uint64 h = hasher(name_and_feature.first);
395db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!config_index.Find(h, &d_and_type)) continue;
396db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t d = d_and_type.first;
397db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
398db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    auto parse_error = [&](StringPiece feature_name) {
399db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      return errors::InvalidArgument("Name: ", example_name, ", Key: ",
400db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                     feature_name, ", Index: ", example_index,
401db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                     ". Can't parse serialized Example.");
402db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    };
403db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
404db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (d_and_type.second == Type::Dense) {
405db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      DataType example_dtype;
406db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      TF_RETURN_IF_ERROR(feature.ParseDataType(&example_dtype));
407db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (example_dtype == DT_INVALID) continue;
408db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
409db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      dense_features_found[d] = example_index;
410db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (example_dtype != config.dense[d].dtype) {
411db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        return errors::InvalidArgument(
412db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            "Name: ", example_name, ", Feature: ", config.dense[d].feature_name,
413db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            ".  Data types don't match. ", "Data type: ",
414db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            DataTypeString(example_dtype), "Expected type: ",
415db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            DataTypeString(config.dense[d].dtype));
416db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
417db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      const string& feature_name = config.dense[d].feature_name;
418db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      const TensorShape& shape = config.dense[d].shape;
419db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      Tensor& out = (*output_dense)[d];
420db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
421db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      const std::size_t num_elements = shape.num_elements();
422db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      const std::size_t offset = example_index * num_elements;
423db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
424db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      auto shape_error = [&](size_t size, StringPiece type_str) {
425db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        return errors::InvalidArgument(
426db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            "Name: ", example_name, ", Key: ", feature_name, ", Index: ",
427db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            example_index, ".  Number of ", type_str,
428db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            " values != expected.  "
429db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            "Values size: ",
430db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            size, " but output shape: ", shape.DebugString());
431db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      };
432db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
433db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      switch (config.dense[d].dtype) {
434db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_INT64: {
435db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          std::vector<int64> list;
436db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!feature.ParseInt64List(&list)) return parse_error(feature_name);
437db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (list.size() != num_elements) {
438db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            return shape_error(list.size(), "int64");
439db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          }
440db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          auto out_p = out.flat<int64>().data() + offset;
441db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          std::copy_n(list.begin(), list.size(), out_p);
442db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
443db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
444db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_FLOAT: {
445db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          std::vector<float> list;
446db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!feature.ParseFloatList(&list)) return parse_error(feature_name);
447db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (list.size() != num_elements) {
448db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            return shape_error(list.size(), "float");
449db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          }
450db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          auto out_p = out.flat<float>().data() + offset;
451db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          std::copy_n(list.begin(), list.size(), out_p);
452db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
453db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
454db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_STRING: {
455db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          std::vector<string> list;
456db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!feature.ParseBytesList(&list)) return parse_error(feature_name);
457db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (list.size() != num_elements) {
458db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            return shape_error(list.size(), "bytes");
459db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          }
460db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          auto out_p = out.flat<string>().data() + offset;
461db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          for (size_t i = 0; i < list.size(); ++i) {
462db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            out_p[i] = std::move(list[i]);
463db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          }
464db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
465db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
466db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        default:
467db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          CHECK(false) << "Should not happen.";
468db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
469db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    } else {
470db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      // Handle sparse features.
471db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      sparse_features_found[d] = example_index;
472db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      const string& feature_name = config.sparse[d].feature_name;
473db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      SparseBuffer& out = (*output_sparse)[d];
474db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      DataType example_dtype;
475db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      TF_RETURN_IF_ERROR(feature.ParseDataType(&example_dtype));
476db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (example_dtype != DT_INVALID &&
477db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          example_dtype != config.sparse[d].dtype) {
478db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        return errors::InvalidArgument(
479db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            "Name: ", example_name, ", Feature: ",
480db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            config.sparse[d].feature_name, ".  Data types don't match. ",
481db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            "Expected type: ", DataTypeString(config.sparse[d].dtype));
482db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
483db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
484db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      switch (config.sparse[d].dtype) {
485db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_INT64: {
486db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (example_dtype != DT_INVALID) {
487db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            if (!feature.ParseInt64List(&out.int64_list)) {
488db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower              return parse_error(feature_name);
489db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            }
490db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          }
491db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          out.example_end_indices.push_back(out.int64_list.size());
492db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
493db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
494db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_FLOAT: {
495db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (example_dtype != DT_INVALID) {
496db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            if (!feature.ParseFloatList(&out.float_list)) {
497db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower              return parse_error(feature_name);
498db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            }
499db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          }
500db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          out.example_end_indices.push_back(out.float_list.size());
501db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
502db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
503db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_STRING: {
504db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (example_dtype != DT_INVALID) {
505db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            if (!feature.ParseBytesList(&out.bytes_list)) {
506db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower              return parse_error(feature_name);
507db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            }
508db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          }
509db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          out.example_end_indices.push_back(out.bytes_list.size());
510db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
511db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
512db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        default:
513db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          CHECK(false) << "Should not happen.";
514db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
515db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
516db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
517db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
518db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Handle missing dense features.
519db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (size_t d = 0; d < config.dense.size(); ++d) {
520db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (dense_features_found[d] == example_index) continue;
521db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (config.dense[d].default_value.NumElements() == 0) {
522db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      return errors::InvalidArgument("Name: ", example_name, ", Feature: ",
523db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                     config.dense[d].feature_name,
524db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                     " is required but could not be found.");
525db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
526db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
527db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    const Tensor& in = config.dense[d].default_value;
528db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    Tensor& out = (*output_dense)[d];
529db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    const std::size_t num_elements = in.shape().num_elements();
530db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    const std::size_t offset = example_index * num_elements;
531db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
532db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    switch (config.dense[d].dtype) {
533db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_INT64: {
534db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        std::copy_n(in.flat<int64>().data(), num_elements,
535db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    out.flat<int64>().data() + offset);
536db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
537db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
538db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_FLOAT: {
539db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        std::copy_n(in.flat<float>().data(), num_elements,
540db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    out.flat<float>().data() + offset);
541db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
542db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
543db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_STRING: {
544db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        std::copy_n(in.flat<string>().data(), num_elements,
545db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    out.flat<string>().data() + offset);
546db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
547db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
548db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      default:
549db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        CHECK(false) << "Should not happen.";
550db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
551db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
552db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
553db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Handle missing sparse features.
554db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (size_t d = 0; d < config.sparse.size(); ++d) {
555db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (sparse_features_found[d] == example_index) continue;
556db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    SparseBuffer& out = (*output_sparse)[d];
557db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t prev_example_end_index =
558db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        out.example_end_indices.empty() ? 0 : out.example_end_indices.back();
559db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    out.example_end_indices.push_back(prev_example_end_index);
560db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
561db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
562db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return Status::OK();
563db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
564db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
565db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus CheckConfigDataType(DataType dtype) {
566db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  switch (dtype) {
567db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    case DT_INT64:
568db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    case DT_FLOAT:
569db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    case DT_STRING:
570db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      return Status::OK();
571db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    default:
572db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      return errors::InvalidArgument("Invalid config dtype: ",
573db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                     DataTypeString(dtype));
574db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
575db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
576db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
577db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}  // namespace
578db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
579db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus FastParseExample(const Config& config,
580db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                        gtl::ArraySlice<string> serialized,
581db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                        gtl::ArraySlice<string> example_names,
582db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                        thread::ThreadPool* thread_pool, Result* result) {
583db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(thread_pool != nullptr);
584db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(result != nullptr);
585db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Check config so we can safely CHECK(false) in switches on config.*.dtype
586db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (auto& c : config.sparse) {
587db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
588db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
589db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (auto& c : config.dense) {
590db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
591db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
592db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
593db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  size_t config_size = config.dense.size() + config.sparse.size();
594db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  SeededHasher hasher;
595db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Build config index.
596db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  PresizedCuckooMap<std::pair<size_t, Type>> config_index(config_size);
597db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  bool ok = true;
598db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (size_t i = 0; i < 1000; ++i) {
599db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (size_t d = 0; d < config.dense.size(); ++d) {
600db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      ok &= config_index.InsertUnique(hasher(config.dense[d].feature_name),
601db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                      {d, Type::Dense});
602db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
603db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (size_t d = 0; d < config.sparse.size(); ++d) {
604db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      ok &= config_index.InsertUnique(hasher(config.sparse[d].feature_name),
605db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                      {d, Type::Sparse});
606db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
607db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (ok) break;
608db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    LOG(WARNING) << "Collision found. This should happen only if you have "
609db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    "around 2^32 entries in your config.";
610db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    hasher.seed++;
611db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    config_index.Clear(config_size);
612db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
613db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!ok) {
614db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return errors::Internal(
615db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        "Could not avoid collision. This should not happen.");
616db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
617db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
618db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Allocate dense output (sparse have to be buffered).
619db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (size_t d = 0; d < config.dense.size(); ++d) {
620db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    TensorShape out_shape;
621db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    out_shape.AddDim(serialized.size());
622db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (const int64 dim : config.dense[d].shape.dim_sizes()) {
623db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      out_shape.AddDim(dim);
624db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
625db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    result->dense_values.emplace_back(config.dense[d].dtype, out_shape);
626db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
627db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
628db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // This parameter affects performance in a big and data-dependent way.
629db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  const size_t kMiniBatchSizeBytes = 100000;
630db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
631db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Split examples into mini-batches for parallel processing.
632db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  auto first_example_of_minibatch = [&] {
633db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    std::vector<size_t> result;
634db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t minibatch_bytes = 0;
635db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (size_t i = 0; i < serialized.size(); i++) {
636db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (minibatch_bytes == 0) {  // start minibatch
637db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        result.push_back(i);
638db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
639db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      minibatch_bytes += serialized[i].size() + 1;
640db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (minibatch_bytes > kMiniBatchSizeBytes) {
641db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        minibatch_bytes = 0;
642db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
643db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
644db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return result;
645db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }();
646db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
647db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  size_t num_minibatches = first_example_of_minibatch.size();
648db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
649db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Do minibatches in parallel.
650db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  std::vector<std::vector<SparseBuffer>> sparse_buffers(num_minibatches);
651db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  std::vector<Status> status_of_minibatch(num_minibatches);
652db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
653db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  auto ProcessMiniBatch = [&](size_t minibatch) {
654db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    sparse_buffers[minibatch].resize(config.sparse.size());
655db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t start = first_example_of_minibatch[minibatch];
656db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t end = minibatch + 1 < num_minibatches
657db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                     ? first_example_of_minibatch[minibatch + 1]
658db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                     : serialized.size();
659db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (size_t e = start; e < end; ++e) {
660db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      status_of_minibatch[minibatch] = FastParseSerializedExample(
661db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          serialized[e],
662db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          (example_names.size() > 0 ? example_names[e] : "<unknown>"), e,
663db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          config, config_index, hasher, &result->dense_values,
664db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          &sparse_buffers[minibatch]);
665db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (!status_of_minibatch[minibatch].ok()) break;
666db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
667db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  };
668db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
669db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  ParallelFor(ProcessMiniBatch, num_minibatches, thread_pool);
670db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
671db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (Status& status : status_of_minibatch) {
672db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    TF_RETURN_IF_ERROR(status);
673db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
674db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
675db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Merge SparseBuffers from all minibatches for every config.sparse.
676db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  auto MergeMinibatches = [&](size_t d) {
677db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    // Loop over minibatches
678db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t total_num_features = 0;
679db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t max_num_features = 0;
680db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (auto& sparse_values_tmp : sparse_buffers) {
681db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      std::vector<size_t>& end_indices =
682db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          sparse_values_tmp[d].example_end_indices;
683db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      total_num_features += end_indices.back();
684db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      max_num_features = std::max(max_num_features, end_indices[0]);
685db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      for (size_t i = 1; i < end_indices.size(); ++i) {
686db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        size_t example_size = end_indices[i] - end_indices[i - 1];
687db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        max_num_features = std::max(max_num_features, example_size);
688db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
689db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
690db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
691db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    TensorShape indices_shape;
692db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    indices_shape.AddDim(total_num_features);
693db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    indices_shape.AddDim(2);
694db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    result->sparse_indices.emplace_back(DT_INT64, indices_shape);
695db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    Tensor* indices = &result->sparse_indices.back();
696db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
697db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    TensorShape values_shape;
698db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    values_shape.AddDim(total_num_features);
699db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    result->sparse_values.emplace_back(config.sparse[d].dtype, values_shape);
700db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    Tensor* values = &result->sparse_values.back();
701db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
702db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    result->sparse_shapes.emplace_back(DT_INT64, TensorShape({2}));
703db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    auto shapes_shape_t = result->sparse_shapes.back().vec<int64>();
704db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    shapes_shape_t(0) = serialized.size();
705db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    shapes_shape_t(1) = max_num_features;
706db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
707db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t offset = 0;
708db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (size_t i = 0; i < sparse_buffers.size(); ++i) {
709db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      const SparseBuffer& buffer = sparse_buffers[i][d];
710db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
711db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      // Update indices.
712db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      int64* ix_p = &indices->matrix<int64>()(offset, 0);
713db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      size_t delta = 0;
714db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      size_t example_index = first_example_of_minibatch[i];
715db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      for (size_t example_end_index : buffer.example_end_indices) {
716db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        size_t feature_index = 0;
717db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        for (; delta < example_end_index; ++delta) {
718db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          // Column 0: example index
719db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          *ix_p = example_index;
720db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          // Column 1: the feature index buffer example
721db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          *(ix_p + 1) = feature_index;
722db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          ix_p += 2;
723db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          ++feature_index;
724db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
725db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        ++example_index;
726db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
727db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
728db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      // Copy values over.
729db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      switch (config.sparse[d].dtype) {
730db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_INT64: {
731db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          std::copy(buffer.int64_list.begin(), buffer.int64_list.end(),
732db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    values->flat<int64>().data() + offset);
733db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
734db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
735db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_FLOAT: {
736db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          std::copy(buffer.float_list.begin(), buffer.float_list.end(),
737db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    values->flat<float>().data() + offset);
738db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
739db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
740db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_STRING: {
741db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          std::move(buffer.bytes_list.begin(), buffer.bytes_list.end(),
742db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    values->flat<string>().data() + offset);
743db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
744db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
745db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        default:
746db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          CHECK(false) << "Should not happen.";
747db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
748db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
749db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      offset += delta;
750db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
751db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  };
752db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
753db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (size_t d = 0; d < config.sparse.size(); ++d) {
754db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    MergeMinibatches(d);
755db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
756db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
757db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return Status::OK();
758db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
759db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
760db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}  // namespace example
761db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}  // namespace tensorflow
762