example_proto_fast_parsing.cc revision c27d1561bd89f1062e4cbb19262905e609daef80
1db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
3db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerLicensed under the Apache License, Version 2.0 (the "License");
4db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFloweryou may not use this file except in compliance with the License.
5db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerYou may obtain a copy of the License at
6db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
7db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    http://www.apache.org/licenses/LICENSE-2.0
8db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
9db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerUnless required by applicable law or agreed to in writing, software
10db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerdistributed under the License is distributed on an "AS IS" BASIS,
11db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerSee the License for the specific language governing permissions and
13db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerlimitations under the License.
14db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower==============================================================================*/
15db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/example_proto_fast_parsing.h"
16db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
17db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include <vector>
18db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
19db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/example/example.pb.h"
20db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/example/feature.pb_text.h"
21db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/numeric_op.h"
22db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/op_kernel.h"
23db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/framework/register_types.h"
24db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/blocking_counter.h"
25db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/casts.h"
26db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/errors.h"
27db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/lib/core/threadpool.h"
287705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower#include "tensorflow/core/lib/gtl/inlined_vector.h"
29353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower#include "tensorflow/core/lib/monitoring/counter.h"
30db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/platform/logging.h"
31db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/platform/protobuf.h"
32db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/presized_cuckoo_map.h"
33db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower#include "tensorflow/core/util/sparse/sparse_tensor.h"
34db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
35db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace tensorflow {
36db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace example {
37db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
38db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace {
397705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower
407705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlowertemplate <typename T>
417705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlowerusing SmallVector = gtl::InlinedVector<T, 4>;
427705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower
43db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowertemplate <typename A>
44db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerauto EnableAliasing(A* a) -> decltype(a->EnableAliasing(true), void()) {
45db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  a->EnableAliasing(true);
46db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
47db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
48db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowertemplate <typename A>
49db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowervoid EnableAliasing(A&& a) {}
50db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
51db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFloweruint8 PeekTag(protobuf::io::CodedInputStream* stream) {
52db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(stream != nullptr);
53db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  const void* ptr;
54db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  int size;
55db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->GetDirectBufferPointer(&ptr, &size)) return 0;
56db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return *static_cast<const uint8*>(ptr);
57db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
58db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
59e0bd1353e3cd9ca7d40ebf99913368fc604d1296Derek Murrayconstexpr uint8 kVarintTag(uint32 tag) { return (tag << 3) | 0; }
60e0bd1353e3cd9ca7d40ebf99913368fc604d1296Derek Murrayconstexpr uint8 kDelimitedTag(uint32 tag) { return (tag << 3) | 2; }
61e0bd1353e3cd9ca7d40ebf99913368fc604d1296Derek Murrayconstexpr uint8 kFixed32Tag(uint32 tag) { return (tag << 3) | 5; }
62db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
63db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace parsed {
64db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
65db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower// ParseDataType has to be called first, then appropriate ParseZzzzList.
66db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerclass Feature {
67db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower public:
68db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  Feature() {}
69db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  Feature(StringPiece serialized) : serialized_(serialized) {}
70db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
71db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  Status ParseDataType(DataType* dtype) {
72db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    DCHECK(dtype != nullptr);
73db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (serialized_.empty()) {
74db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      *dtype = DT_INVALID;
75db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      return Status::OK();
76db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
77db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    uint8 oneof_tag = static_cast<uint8>(*serialized_.data());
78db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    serialized_.remove_prefix(1);
79db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    switch (oneof_tag) {
80db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case kDelimitedTag(1):
81db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        *dtype = DT_STRING;
82db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
83db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case kDelimitedTag(2):
84db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        *dtype = DT_FLOAT;
85db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
86db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case kDelimitedTag(3):
87db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        *dtype = DT_INT64;
88db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
89db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      default:
90db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        return errors::InvalidArgument("Unsuported datatype.");
91db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
92db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return Status::OK();
93db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
94db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
95c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  template <typename Result>
96c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  bool ParseBytesList(Result* bytes_list) {
97db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    DCHECK(bytes_list != nullptr);
98db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    protobuf::io::CodedInputStream stream(
99db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size());
100db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
101db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    EnableAliasing(&stream);
102db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
103db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    uint32 length;
104db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!stream.ReadVarint32(&length)) return false;
105db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    auto limit = stream.PushLimit(length);
106db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
107db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    while (!stream.ExpectAtEnd()) {
108db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (!stream.ExpectTag(kDelimitedTag(1))) return false;
109db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      // parse string
110db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      uint32 bytes_length;
111db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (!stream.ReadVarint32(&bytes_length)) return false;
112db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      string bytes;
113db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (!stream.ReadString(&bytes, bytes_length)) return false;
114db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      bytes_list->push_back(std::move(bytes));
115db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
116db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    stream.PopLimit(limit);
117db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return true;
118db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
119db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
120c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  template <typename Result>
121c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  bool ParseFloatList(Result* float_list) {
122db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    DCHECK(float_list != nullptr);
123db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    protobuf::io::CodedInputStream stream(
124db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size());
125db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    EnableAliasing(&stream);
126db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    uint32 length;
127db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!stream.ReadVarint32(&length)) return false;
128db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    auto limit = stream.PushLimit(length);
129db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
130db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!stream.ExpectAtEnd()) {
131db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      uint8 peek_tag = PeekTag(&stream);
132db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (peek_tag != kDelimitedTag(1) && peek_tag != kFixed32Tag(1)) {
133db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        return false;
134db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
135db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
136db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (peek_tag == kDelimitedTag(1)) {                       // packed
137db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        if (!stream.ExpectTag(kDelimitedTag(1))) return false;  // packed tag
138db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        uint32 packed_length;
139db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        if (!stream.ReadVarint32(&packed_length)) return false;
140db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        auto packed_limit = stream.PushLimit(packed_length);
141db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
142db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        while (!stream.ExpectAtEnd()) {
143db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          uint32 buffer32;
144db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!stream.ReadLittleEndian32(&buffer32)) return false;
145db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          float_list->push_back(bit_cast<float>(buffer32));
146db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
147db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
148db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        stream.PopLimit(packed_limit);
149db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      } else {  // non-packed
150db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        while (!stream.ExpectAtEnd()) {
151db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!stream.ExpectTag(kFixed32Tag(1))) return false;
152db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          uint32 buffer32;
153db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!stream.ReadLittleEndian32(&buffer32)) return false;
154db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          float_list->push_back(bit_cast<float>(buffer32));
155db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
156db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
157db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
158db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
159db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    stream.PopLimit(limit);
160db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return true;
161db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
162db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
163c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  template <typename Result>
164c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  bool ParseInt64List(Result* int64_list) {
165db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    DCHECK(int64_list != nullptr);
166db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    protobuf::io::CodedInputStream stream(
167db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        reinterpret_cast<const uint8*>(serialized_.data()), serialized_.size());
168db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    EnableAliasing(&stream);
169db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    uint32 length;
170db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!stream.ReadVarint32(&length)) return false;
171db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    auto limit = stream.PushLimit(length);
172db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
173db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!stream.ExpectAtEnd()) {
174db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      uint8 peek_tag = PeekTag(&stream);
175db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (peek_tag != kDelimitedTag(1) && peek_tag != kVarintTag(1)) {
176db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        return false;
177db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
178db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (peek_tag == kDelimitedTag(1)) {                       // packed
179db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        if (!stream.ExpectTag(kDelimitedTag(1))) return false;  // packed tag
180db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        uint32 packed_length;
181db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        if (!stream.ReadVarint32(&packed_length)) return false;
182db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        auto packed_limit = stream.PushLimit(packed_length);
183db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
184db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        while (!stream.ExpectAtEnd()) {
185967376bdf3ae9007f8b4c996a4a260a911dfc409A. Unique TensorFlower          protobuf_uint64 n;  // There is no API for int64
186db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!stream.ReadVarint64(&n)) return false;
187db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          int64_list->push_back(n);
188db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
189db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
190db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        stream.PopLimit(packed_limit);
191db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      } else {  // non-packed
192db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        while (!stream.ExpectAtEnd()) {
193db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!stream.ExpectTag(kVarintTag(1))) return false;
194967376bdf3ae9007f8b4c996a4a260a911dfc409A. Unique TensorFlower          protobuf_uint64 n;  // There is no API for int64
195db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (!stream.ReadVarint64(&n)) return false;
196db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          int64_list->push_back(n);
197db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
198db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
199db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
200db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    stream.PopLimit(limit);
201db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return true;
202db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
203db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
204db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  StringPiece GetSerialized() const { return serialized_; }
205db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
206db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower private:
207db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // TODO(lew): Pair of uint8* would be more natural.
208db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  StringPiece serialized_;
209db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower};
210db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
211db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing FeatureMapEntry = std::pair<StringPiece, Feature>;
212db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing Example = std::vector<FeatureMapEntry>;
213db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
214db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}  // namespace parsed
215db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
216db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseString(protobuf::io::CodedInputStream* stream, StringPiece* result) {
217db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(stream != nullptr);
218db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(result != nullptr);
219db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  uint32 length;
220db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->ReadVarint32(&length)) return false;
221db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (length == 0) {
222db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    *result = StringPiece(nullptr, 0);
223db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return true;
224db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
225db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  const void* stream_alias;
226db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  int stream_size;
227db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->GetDirectBufferPointer(&stream_alias, &stream_size)) {
228db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return false;
229db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
230db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (static_cast<uint32>(stream_size) < length) return false;
231db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  *result = StringPiece(static_cast<const char*>(stream_alias), length);
232db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  stream->Skip(length);
233db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return true;
234db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
235db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
236db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseFeatureMapEntry(protobuf::io::CodedInputStream* stream,
237db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                          parsed::FeatureMapEntry* feature_map_entry) {
238db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(stream != nullptr);
239db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(feature_map_entry != nullptr);
240db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  uint32 length;
241db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->ReadVarint32(&length)) return false;
242db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  auto limit = stream->PushLimit(length);
243db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->ExpectTag(kDelimitedTag(1))) return false;
244db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!ParseString(stream, &feature_map_entry->first)) return false;
245db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->ExpectTag(kDelimitedTag(2))) return false;
246db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  StringPiece feature_string_piece;
247db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!ParseString(stream, &feature_string_piece)) return false;
248db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  feature_map_entry->second = parsed::Feature(feature_string_piece);
249db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->ExpectAtEnd()) return false;
250db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  stream->PopLimit(limit);
251db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return true;
252db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
253db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
254db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseFeatures(protobuf::io::CodedInputStream* stream,
255db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                   parsed::Example* example) {
256db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(stream != nullptr);
257db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(example != nullptr);
258db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  uint32 length;
259db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!stream->ReadVarint32(&length)) return false;
260db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  auto limit = stream->PushLimit(length);
261db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  while (!stream->ExpectAtEnd()) {
262db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    parsed::FeatureMapEntry feature_map_entry;
263db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!stream->ExpectTag(kDelimitedTag(1))) return false;
264db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!ParseFeatureMapEntry(stream, &feature_map_entry)) return false;
265db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    example->push_back(std::move(feature_map_entry));
266db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
267db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  stream->PopLimit(limit);
268db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return true;
269db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
270db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
271db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseExample(protobuf::io::CodedInputStream* stream,
272db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                  parsed::Example* example) {
273db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(stream != nullptr);
274db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(example != nullptr);
27589f358fdfd701b4118fe0f80e36f9cd098ee691eKiril Gorovoy  // Loop over the input stream which may contain multiple serialized Example
27689f358fdfd701b4118fe0f80e36f9cd098ee691eKiril Gorovoy  // protos merged together as strings. This behavior is consistent with Proto's
27789f358fdfd701b4118fe0f80e36f9cd098ee691eKiril Gorovoy  // ParseFromString when string representations are concatenated.
27889f358fdfd701b4118fe0f80e36f9cd098ee691eKiril Gorovoy  while (!stream->ExpectAtEnd()) {
279353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    if (!stream->ExpectTag(kDelimitedTag(1))) return false;
280353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    if (!ParseFeatures(stream, example)) return false;
281db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
282db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return true;
283db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
284db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
285db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool ParseExample(StringPiece serialized, parsed::Example* example) {
286db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(example != nullptr);
287db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  protobuf::io::CodedInputStream stream(
288db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      reinterpret_cast<const uint8*>(serialized.data()), serialized.size());
289db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  EnableAliasing(&stream);
290db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return ParseExample(&stream, example);
291db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
292db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
293db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}  // namespace
294db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
295db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerbool TestFastParse(const string& serialized, Example* example) {
296db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(example != nullptr);
297db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  parsed::Example parsed_example;
298db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!ParseExample(serialized, &parsed_example)) return false;
299db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  auto& features = *example->mutable_features();
300353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower  size_t parsed_example_size = parsed_example.size();
301353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower  for (size_t i = 0; i < parsed_example_size; ++i) {
302353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    // This is a logic that standard protobuf parsing is implementing.
303353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    // I.e. last entry in the map overwrites all the previous ones.
304353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    parsed::FeatureMapEntry& name_and_feature =
305353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        parsed_example[parsed_example_size - i - 1];
306353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    string name = name_and_feature.first.ToString();
307353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    if ((*features.mutable_feature()).count(name) > 0) continue;
308353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower
309353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    auto& value = (*features.mutable_feature())[name];
310db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    DataType dtype;
311353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    if (!name_and_feature.second.ParseDataType(&dtype).ok()) return false;
312db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    switch (dtype) {
313db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_INVALID:
314db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
315db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_STRING: {
3167705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower        SmallVector<string> list;
317353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        if (!name_and_feature.second.ParseBytesList(&list)) return false;
318db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        auto* result_list = value.mutable_bytes_list();
319db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        for (auto& bytes : list) {
32009326f005455a32c5d9276919db4e4b8de4c3117A. Unique TensorFlower          auto* new_value = result_list->add_value();
32109326f005455a32c5d9276919db4e4b8de4c3117A. Unique TensorFlower          new_value->swap(bytes);
322db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
323db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
324db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
325db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_FLOAT: {
3267705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower        SmallVector<float> list;
327353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        if (!name_and_feature.second.ParseFloatList(&list)) return false;
328db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        auto* result_list = value.mutable_float_list();
329db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        for (float f : list) {
330db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          result_list->add_value(f);
331db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
332db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
333db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
334db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_INT64: {
3357705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower        SmallVector<int64> list;
336353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        if (!name_and_feature.second.ParseInt64List(&list)) return false;
337db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        auto* result_list = value.mutable_int64_list();
338db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        for (int64 i : list) {
339db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          result_list->add_value(i);
340db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
341db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
342db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
343db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      default:
344db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        CHECK(false) << "Should not happen.";
345db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
346db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
347db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return true;
348db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
349db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
350db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower// -----------------------------------------------------------------------------
351db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
352db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowernamespace {
353db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
354db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerusing Config = FastParseExampleConfig;
355db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
356db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowervoid ParallelFor(const std::function<void(size_t)>& f, size_t n,
357db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                 thread::ThreadPool* thread_pool) {
358db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (n == 0) return;
3597705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  if (thread_pool == nullptr) {
3607705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    for (size_t i = 0; i < n; ++i) {
361db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      f(i);
3627705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    }
3637705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  } else {
3647705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    BlockingCounter counter(n - 1);
3657705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    for (size_t i = 1; i < n; ++i) {
3667705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower      thread_pool->Schedule([i, &f, &counter] {
3677705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower        f(i);
3687705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower        counter.DecrementCount();
3697705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower      });
3707705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    }
3717705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    f(0);
3727705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    counter.Wait();
373db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
374db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
375db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
376db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerenum class Type { Sparse, Dense };
377db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
378db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerstruct SparseBuffer {
379db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Features are in one of the 3 vectors below depending on config's dtype.
380db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Other 2 vectors remain empty.
3817705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  SmallVector<string> bytes_list;
3827705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  SmallVector<float> float_list;
3837705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  SmallVector<int64> int64_list;
384db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
385db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Features of example i are elements with indices
386db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // from example_end_indices[i-1] to example_end_indices[i]-1 on the
387db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // appropriate xxxxx_list
388db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  std::vector<size_t> example_end_indices;
389db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower};
390db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
391db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerstruct SeededHasher {
392db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  uint64 operator()(StringPiece s) const {
393db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return Hash64(s.data(), s.size(), seed);
394db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
395db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  uint64 seed{0xDECAFCAFFE};
396db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower};
397db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
398c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlowertemplate <typename T>
399c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlowerclass LimitedArraySlice {
400c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower public:
401c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  LimitedArraySlice(T* begin, size_t num_elements)
402c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower      : current_(begin), end_(begin + num_elements) {}
403c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower
404c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  // May return negative if there were push_back calls after slice was filled.
405c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  int64 EndDistance() const { return end_ - current_; }
406c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower
407c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  // Attempts to push value to the back of this. If the slice has
408c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  // already been filled, this method has no effect on the underlying data, but
409c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  // it changes the number returned by EndDistance into negative values.
410c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  void push_back(T&& value) {
411c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower    if (EndDistance() > 0) *current_ = std::move(value);
412c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower    ++current_;
413c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  }
414c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower
415c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower private:
416c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  T* current_;
417c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower  T* end_;
418c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower};
419c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower
420db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus FastParseSerializedExample(
421db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    const string& serialized_example, const string& example_name,
422db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    const size_t example_index, const Config& config,
423db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    const PresizedCuckooMap<std::pair<size_t, Type>>& config_index,
424db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    SeededHasher hasher, std::vector<Tensor>* output_dense,
425db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    std::vector<SparseBuffer>* output_sparse) {
426db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(output_dense != nullptr);
427db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(output_sparse != nullptr);
428db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  parsed::Example parsed_example;
429db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!ParseExample(serialized_example, &parsed_example)) {
430db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return errors::InvalidArgument("Could not parse example input, value: '",
431db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                   serialized_example, "'");
432db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
433353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower  std::vector<int64> sparse_feature_last_example(config.sparse.size(), -1);
434353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower  std::vector<int64> dense_feature_last_example(config.dense.size(), -1);
435db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
436db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Handle features present in the example.
437353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower  const size_t parsed_example_size = parsed_example.size();
438353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower  for (size_t i = 0; i < parsed_example_size; ++i) {
439353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    // This is a logic that standard protobuf parsing is implementing.
440353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    // I.e. last entry in the map overwrites all the previous ones.
441353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    parsed::FeatureMapEntry& name_and_feature =
442353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        parsed_example[parsed_example_size - i - 1];
443353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower
444353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    const StringPiece feature_name = name_and_feature.first;
445db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    parsed::Feature& feature = name_and_feature.second;
446353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower
447db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    std::pair<size_t, Type> d_and_type;
448353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    uint64 h = hasher(feature_name);
449db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (!config_index.Find(h, &d_and_type)) continue;
450353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower
451db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t d = d_and_type.first;
452353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    bool is_dense = d_and_type.second == Type::Dense;
453353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower
454353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    {
455353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      // Testing for PresizedCuckooMap collision.
456353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      // TODO(lew): Use dense_hash_map and avoid this and hasher creation.
457353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      const string& config_feature_name = is_dense
458353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower                                              ? config.dense[d].feature_name
459353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower                                              : config.sparse[d].feature_name;
460353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      if (feature_name != config_feature_name) continue;
461353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    }
462db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
463353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    auto example_error = [&](StringPiece suffix) {
464db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      return errors::InvalidArgument("Name: ", example_name, ", Key: ",
465db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                     feature_name, ", Index: ", example_index,
466353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower                                     ".  ", suffix);
467353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    };
468353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower
469353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    auto parse_error = [&] {
470353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      return example_error("Can't parse serialized Example.");
471db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    };
472db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
473353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    DataType example_dtype;
474353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    TF_RETURN_IF_ERROR(feature.ParseDataType(&example_dtype));
475353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower
476353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    if (is_dense) {
477db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (example_dtype == DT_INVALID) continue;
478db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
479353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      // If feature was already visited, skip.
480353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      // Compare comment at the beginning of the loop.
481353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      if (dense_feature_last_example[d] == example_index) {
482353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        LOG(WARNING) << "Data loss! Feature '" << feature_name
483353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower                     << "' in present in multiple concatenated "
484353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower                        "tf.Examples. Ignoring all but last one.";
485353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        static auto* duplicated_dense_feature = monitoring::Counter<0>::New(
486353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower            "/tensorflow/core/util/example_proto_fast_parsing/"
487353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower            "duplicated_dense_feature",
488353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower            "Dense feature appears twice in a tf.Example");
489353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        duplicated_dense_feature->GetCell()->IncrementBy(1);
490353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        continue;
491353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      }
492353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      dense_feature_last_example[d] = example_index;
493353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower
494db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (example_dtype != config.dense[d].dtype) {
495353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        return example_error(
496353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower            strings::StrCat("Data types don't match. Data type: ",
497353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower                            DataTypeString(example_dtype), "Expected type: ",
498353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower                            DataTypeString(config.dense[d].dtype)));
499db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
500db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      Tensor& out = (*output_dense)[d];
501db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
502353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      const std::size_t num_elements = config.dense[d].shape.num_elements();
503db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      const std::size_t offset = example_index * num_elements;
504db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
505db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      auto shape_error = [&](size_t size, StringPiece type_str) {
506353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        return example_error(strings::StrCat(
507353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower            "Number of ", type_str,
508db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            " values != expected.  "
509db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            "Values size: ",
510353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower            size, " but output shape: ", config.dense[d].shape.DebugString()));
511db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      };
512db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
513db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      switch (config.dense[d].dtype) {
514db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_INT64: {
515db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          auto out_p = out.flat<int64>().data() + offset;
516c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower          LimitedArraySlice<int64> slice(out_p, num_elements);
517c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower          if (!feature.ParseInt64List(&slice)) return parse_error();
518c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower          if (slice.EndDistance() != 0) {
519c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower            return shape_error(num_elements - slice.EndDistance(), "int64");
520c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower          }
521db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
522db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
523db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_FLOAT: {
524db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          auto out_p = out.flat<float>().data() + offset;
525c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower          LimitedArraySlice<float> slice(out_p, num_elements);
526c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower          if (!feature.ParseFloatList(&slice)) return parse_error();
527c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower          if (slice.EndDistance() != 0) {
528c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower            return shape_error(num_elements - slice.EndDistance(), "float");
529c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower          }
530db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
531db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
532db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_STRING: {
533db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          auto out_p = out.flat<string>().data() + offset;
534c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower          LimitedArraySlice<string> slice(out_p, num_elements);
535c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower          if (!feature.ParseBytesList(&slice)) return parse_error();
536c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower          if (slice.EndDistance() != 0) {
537c27d1561bd89f1062e4cbb19262905e609daef80A. Unique TensorFlower            return shape_error(num_elements - slice.EndDistance(), "bytes");
538db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          }
539db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
540db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
541db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        default:
542db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          CHECK(false) << "Should not happen.";
543db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
544db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    } else {
545353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      // If feature was already visited, skip.
546353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      // Compare comment at the beginning of the loop.
547353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      if (sparse_feature_last_example[d] == example_index) {
548353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        LOG(WARNING) << "Data loss! Feature '" << feature_name
549353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower                     << "' in present in multiple concatenated "
550353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower                        "tf.Examples. Ignoring all but last one.";
551353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        static auto* duplicated_sparse_feature = monitoring::Counter<0>::New(
552353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower            "/tensorflow/core/util/example_proto_fast_parsing/"
553353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower            "duplicated_sparse_feature",
554353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower            "sparse feature appears twice in a tf.Example");
555353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        duplicated_sparse_feature->GetCell()->IncrementBy(1);
556353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        continue;
557353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      }
558353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower      sparse_feature_last_example[d] = example_index;
559353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower
560db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      // Handle sparse features.
561db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      SparseBuffer& out = (*output_sparse)[d];
562db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (example_dtype != DT_INVALID &&
563db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          example_dtype != config.sparse[d].dtype) {
564353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower        return example_error(
565353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower            strings::StrCat("Data types don't match. ", "Expected type: ",
566353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower                            DataTypeString(config.sparse[d].dtype)));
567db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
568db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
569db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      switch (config.sparse[d].dtype) {
570db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_INT64: {
571db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (example_dtype != DT_INVALID) {
572db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            if (!feature.ParseInt64List(&out.int64_list)) {
573353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower              return parse_error();
574db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            }
575db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          }
576db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          out.example_end_indices.push_back(out.int64_list.size());
577db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
578db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
579db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_FLOAT: {
580db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (example_dtype != DT_INVALID) {
581db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            if (!feature.ParseFloatList(&out.float_list)) {
582353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower              return parse_error();
583db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            }
584db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          }
585db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          out.example_end_indices.push_back(out.float_list.size());
586db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
587db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
588db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_STRING: {
589db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          if (example_dtype != DT_INVALID) {
590db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            if (!feature.ParseBytesList(&out.bytes_list)) {
591353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower              return parse_error();
592db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower            }
593db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          }
594db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          out.example_end_indices.push_back(out.bytes_list.size());
595db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
596db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
597db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        default:
598db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          CHECK(false) << "Should not happen.";
599db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
600db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
601db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
602db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
603db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Handle missing dense features.
604db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (size_t d = 0; d < config.dense.size(); ++d) {
605353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    if (dense_feature_last_example[d] == example_index) continue;
606db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (config.dense[d].default_value.NumElements() == 0) {
607db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      return errors::InvalidArgument("Name: ", example_name, ", Feature: ",
608db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                     config.dense[d].feature_name,
609db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                     " is required but could not be found.");
610db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
611db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
612db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    const Tensor& in = config.dense[d].default_value;
613db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    Tensor& out = (*output_dense)[d];
614db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    const std::size_t num_elements = in.shape().num_elements();
615db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    const std::size_t offset = example_index * num_elements;
616db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
617db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    switch (config.dense[d].dtype) {
618db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_INT64: {
619db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        std::copy_n(in.flat<int64>().data(), num_elements,
620db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    out.flat<int64>().data() + offset);
621db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
622db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
623db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_FLOAT: {
624db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        std::copy_n(in.flat<float>().data(), num_elements,
625db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    out.flat<float>().data() + offset);
626db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
627db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
628db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      case DT_STRING: {
629db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        std::copy_n(in.flat<string>().data(), num_elements,
630db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    out.flat<string>().data() + offset);
631db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        break;
632db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
633db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      default:
634db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        CHECK(false) << "Should not happen.";
635db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
636db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
637db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
638db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Handle missing sparse features.
639db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (size_t d = 0; d < config.sparse.size(); ++d) {
640353c9352d53e1147b437eed13cfabd1e01dc2ec9A. Unique TensorFlower    if (sparse_feature_last_example[d] == example_index) continue;
641db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    SparseBuffer& out = (*output_sparse)[d];
642db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t prev_example_end_index =
643db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        out.example_end_indices.empty() ? 0 : out.example_end_indices.back();
644db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    out.example_end_indices.push_back(prev_example_end_index);
645db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
646db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
647db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return Status::OK();
648db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
649db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
650db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus CheckConfigDataType(DataType dtype) {
651db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  switch (dtype) {
652db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    case DT_INT64:
653db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    case DT_FLOAT:
654db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    case DT_STRING:
655db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      return Status::OK();
656db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    default:
657db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      return errors::InvalidArgument("Invalid config dtype: ",
658db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                     DataTypeString(dtype));
659db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
660db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
661db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
662db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}  // namespace
663db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
664db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlowerStatus FastParseExample(const Config& config,
665db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                        gtl::ArraySlice<string> serialized,
666db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                        gtl::ArraySlice<string> example_names,
667db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                        thread::ThreadPool* thread_pool, Result* result) {
668db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  DCHECK(result != nullptr);
669db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Check config so we can safely CHECK(false) in switches on config.*.dtype
670db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (auto& c : config.sparse) {
671db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
672db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
673db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (auto& c : config.dense) {
674db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
675db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
676db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
677db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  size_t config_size = config.dense.size() + config.sparse.size();
678db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  SeededHasher hasher;
679db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Build config index.
680db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  PresizedCuckooMap<std::pair<size_t, Type>> config_index(config_size);
681db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  bool ok = true;
682db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (size_t i = 0; i < 1000; ++i) {
683db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (size_t d = 0; d < config.dense.size(); ++d) {
684db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      ok &= config_index.InsertUnique(hasher(config.dense[d].feature_name),
685db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                      {d, Type::Dense});
686db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
687db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (size_t d = 0; d < config.sparse.size(); ++d) {
688db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      ok &= config_index.InsertUnique(hasher(config.sparse[d].feature_name),
689db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                                      {d, Type::Sparse});
690db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
691db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    if (ok) break;
692db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    LOG(WARNING) << "Collision found. This should happen only if you have "
693db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    "around 2^32 entries in your config.";
694db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    hasher.seed++;
695db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    config_index.Clear(config_size);
696db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
697db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  if (!ok) {
698db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    return errors::Internal(
699db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        "Could not avoid collision. This should not happen.");
700db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
701db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
702db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Allocate dense output (sparse have to be buffered).
703db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (size_t d = 0; d < config.dense.size(); ++d) {
704db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    TensorShape out_shape;
705db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    out_shape.AddDim(serialized.size());
706db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (const int64 dim : config.dense[d].shape.dim_sizes()) {
707db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      out_shape.AddDim(dim);
708db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
709db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    result->dense_values.emplace_back(config.dense[d].dtype, out_shape);
710db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
711db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
712db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // This parameter affects performance in a big and data-dependent way.
7137705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  const size_t kMiniBatchSizeBytes = 50000;
714db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
7157705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  // Calculate number of minibatches.
7167705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  // In main regime make each minibatch around kMiniBatchSizeBytes bytes.
7177705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  // Apply 'special logic' below for small and big regimes.
7187705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  const size_t num_minibatches = [&] {
7197705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    size_t result = 0;
720db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t minibatch_bytes = 0;
721db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (size_t i = 0; i < serialized.size(); i++) {
722db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (minibatch_bytes == 0) {  // start minibatch
7237705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower        result++;
724db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
725db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      minibatch_bytes += serialized[i].size() + 1;
726db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (minibatch_bytes > kMiniBatchSizeBytes) {
727db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        minibatch_bytes = 0;
728db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
729db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
7307705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    // 'special logic'
7317705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    const size_t min_minibatches = std::min<size_t>(8, serialized.size());
7327705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    const size_t max_minibatches = 64;
7337705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    return std::max<size_t>(min_minibatches,
7347705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower                            std::min<size_t>(max_minibatches, result));
735db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }();
736db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
7377705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  auto first_example_of_minibatch = [&](size_t minibatch) -> size_t {
7387705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    return (serialized.size() * minibatch) / num_minibatches;
7397705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  };
7407705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower
7417705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  // TODO(lew): A big performance low-hanging fruit here is to improve
7427705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  //   num_minibatches calculation to take into account actual amount of work
7437705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  //   needed, as the size in bytes is not perfect. Linear combination of
7447705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  //   size in bytes and average number of features per example is promising.
7457705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  //   Even better: measure time instead of estimating, but this is too costly
7467705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  //   in small batches.
7477705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower  //   Maybe accept outside parameter #num_minibatches?
748db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
749db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Do minibatches in parallel.
750db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  std::vector<std::vector<SparseBuffer>> sparse_buffers(num_minibatches);
751db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  std::vector<Status> status_of_minibatch(num_minibatches);
752db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  auto ProcessMiniBatch = [&](size_t minibatch) {
753db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    sparse_buffers[minibatch].resize(config.sparse.size());
7547705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    size_t start = first_example_of_minibatch(minibatch);
7557705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower    size_t end = first_example_of_minibatch(minibatch + 1);
756db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (size_t e = start; e < end; ++e) {
757db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      status_of_minibatch[minibatch] = FastParseSerializedExample(
758db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          serialized[e],
759db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          (example_names.size() > 0 ? example_names[e] : "<unknown>"), e,
760db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          config, config_index, hasher, &result->dense_values,
761db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          &sparse_buffers[minibatch]);
762db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      if (!status_of_minibatch[minibatch].ok()) break;
763db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
764db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  };
765db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
766db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  ParallelFor(ProcessMiniBatch, num_minibatches, thread_pool);
767db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
768db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (Status& status : status_of_minibatch) {
769db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    TF_RETURN_IF_ERROR(status);
770db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
771db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
772db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  // Merge SparseBuffers from all minibatches for every config.sparse.
773db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  auto MergeMinibatches = [&](size_t d) {
774db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    // Loop over minibatches
775db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t total_num_features = 0;
776db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t max_num_features = 0;
777db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (auto& sparse_values_tmp : sparse_buffers) {
778db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      std::vector<size_t>& end_indices =
779db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          sparse_values_tmp[d].example_end_indices;
780db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      total_num_features += end_indices.back();
781db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      max_num_features = std::max(max_num_features, end_indices[0]);
782db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      for (size_t i = 1; i < end_indices.size(); ++i) {
783db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        size_t example_size = end_indices[i] - end_indices[i - 1];
784db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        max_num_features = std::max(max_num_features, example_size);
785db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
786db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
787db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
788db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    TensorShape indices_shape;
789db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    indices_shape.AddDim(total_num_features);
790db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    indices_shape.AddDim(2);
791db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    result->sparse_indices.emplace_back(DT_INT64, indices_shape);
792db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    Tensor* indices = &result->sparse_indices.back();
793db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
794db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    TensorShape values_shape;
795db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    values_shape.AddDim(total_num_features);
796db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    result->sparse_values.emplace_back(config.sparse[d].dtype, values_shape);
797db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    Tensor* values = &result->sparse_values.back();
798db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
799db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    result->sparse_shapes.emplace_back(DT_INT64, TensorShape({2}));
800db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    auto shapes_shape_t = result->sparse_shapes.back().vec<int64>();
801db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    shapes_shape_t(0) = serialized.size();
802db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    shapes_shape_t(1) = max_num_features;
803db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
804db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    size_t offset = 0;
805db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    for (size_t i = 0; i < sparse_buffers.size(); ++i) {
806db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      const SparseBuffer& buffer = sparse_buffers[i][d];
807db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
808db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      // Update indices.
809db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      int64* ix_p = &indices->matrix<int64>()(offset, 0);
810db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      size_t delta = 0;
8117705791619f5e851687e9a63b4315087e189f8beA. Unique TensorFlower      size_t example_index = first_example_of_minibatch(i);
812db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      for (size_t example_end_index : buffer.example_end_indices) {
813db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        size_t feature_index = 0;
814db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        for (; delta < example_end_index; ++delta) {
815db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          // Column 0: example index
816db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          *ix_p = example_index;
817db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          // Column 1: the feature index buffer example
818db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          *(ix_p + 1) = feature_index;
819db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          ix_p += 2;
820db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          ++feature_index;
821db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
822db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        ++example_index;
823db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
824db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
825db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      // Copy values over.
826db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      switch (config.sparse[d].dtype) {
827db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_INT64: {
828db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          std::copy(buffer.int64_list.begin(), buffer.int64_list.end(),
829db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    values->flat<int64>().data() + offset);
830db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
831db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
832db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_FLOAT: {
833db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          std::copy(buffer.float_list.begin(), buffer.float_list.end(),
834db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    values->flat<float>().data() + offset);
835db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
836db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
837db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        case DT_STRING: {
838db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          std::move(buffer.bytes_list.begin(), buffer.bytes_list.end(),
839db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower                    values->flat<string>().data() + offset);
840db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          break;
841db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        }
842db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower        default:
843db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower          CHECK(false) << "Should not happen.";
844db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      }
845db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
846db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower      offset += delta;
847db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    }
848db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  };
849db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
850db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  for (size_t d = 0; d < config.sparse.size(); ++d) {
851db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower    MergeMinibatches(d);
852db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  }
853db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
854db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower  return Status::OK();
855db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}
856db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower
857db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}  // namespace example
858db7bdab6e586e02051556d9f36a7887500378cf9A. Unique TensorFlower}  // namespace tensorflow
859