1c8b59c046895fa5b6d79f73e0b5817330fcfbfc1A. Unique TensorFlower/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlur
3854f49bd43588c062b046384f239f64a3d819702Manjunath KudlurLicensed under the Apache License, Version 2.0 (the "License");
4854f49bd43588c062b046384f239f64a3d819702Manjunath Kudluryou may not use this file except in compliance with the License.
5854f49bd43588c062b046384f239f64a3d819702Manjunath KudlurYou may obtain a copy of the License at
6854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlur
7854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlur    http://www.apache.org/licenses/LICENSE-2.0
8854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlur
9854f49bd43588c062b046384f239f64a3d819702Manjunath KudlurUnless required by applicable law or agreed to in writing, software
10854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlurdistributed under the License is distributed on an "AS IS" BASIS,
11854f49bd43588c062b046384f239f64a3d819702Manjunath KudlurWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12854f49bd43588c062b046384f239f64a3d819702Manjunath KudlurSee the License for the specific language governing permissions and
13854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlurlimitations under the License.
14854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlur==============================================================================*/
15f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
16f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/io/table.h"
17f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
18f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/core/coding.h"
19f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/core/errors.h"
20f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/io/block.h"
21f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/io/format.h"
22f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/io/table_options.h"
23f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/io/two_level_iterator.h"
243a7d603feaf6434ffd6d9fd27cac65c1ef565e07Josh Levenberg#include "tensorflow/core/platform/env.h"
25f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
26f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace tensorflow {
27f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace table {
28f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
29f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstruct Table::Rep {
30f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  ~Rep() { delete index_block; }
31f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
32f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  Options options;
33f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  Status status;
34f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  RandomAccessFile* file;
35f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // XXX  uint64 cache_id;
36f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
37f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  BlockHandle metaindex_handle;  // Handle to metaindex_block: saved from footer
38f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  Block* index_block;
39f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur};
40f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
4156313def004795f75ef8281a0294c958d28f1e06Vijay VasudevanStatus Table::Open(const Options& options, RandomAccessFile* file, uint64 size,
4256313def004795f75ef8281a0294c958d28f1e06Vijay Vasudevan                   Table** table) {
43d83074847ebfe8871188f1f9f1e84ab0451f59e6A. Unique TensorFlower  *table = nullptr;
44f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  if (size < Footer::kEncodedLength) {
45f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    return errors::DataLoss("file is too short to be an sstable");
46f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  }
47f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
48f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  char footer_space[Footer::kEncodedLength];
49f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  StringPiece footer_input;
5056313def004795f75ef8281a0294c958d28f1e06Vijay Vasudevan  Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength,
5156313def004795f75ef8281a0294c958d28f1e06Vijay Vasudevan                        &footer_input, footer_space);
52f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  if (!s.ok()) return s;
53f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
54f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  Footer footer;
55f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  s = footer.DecodeFrom(&footer_input);
56f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  if (!s.ok()) return s;
57f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
58f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // Read the index block
59f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  BlockContents contents;
60d83074847ebfe8871188f1f9f1e84ab0451f59e6A. Unique TensorFlower  Block* index_block = nullptr;
61f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  if (s.ok()) {
62f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    s = ReadBlock(file, footer.index_handle(), &contents);
63f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    if (s.ok()) {
64f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      index_block = new Block(contents);
65f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    }
66f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  }
67f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
68f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  if (s.ok()) {
69f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    // We've successfully read the footer and the index block: we're
70f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    // ready to serve requests.
71f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    Rep* rep = new Table::Rep;
72f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    rep->options = options;
73f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    rep->file = file;
74f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    rep->metaindex_handle = footer.metaindex_handle();
75f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    rep->index_block = index_block;
76f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    // XXX    rep->cache_id = (options.block_cache ?
77f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    // options.block_cache->NewId() : 0);
78f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    *table = new Table(rep);
79f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  } else {
80f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    if (index_block) delete index_block;
81f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  }
82f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
83f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  return s;
84f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}
85f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
86f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath KudlurTable::~Table() { delete rep_; }
87f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
88f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstatic void DeleteBlock(void* arg, void* ignored) {
89f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  delete reinterpret_cast<Block*>(arg);
90f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}
91f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
92f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Convert an index iterator value (i.e., an encoded BlockHandle)
93f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// into an iterator over the contents of the corresponding block.
94f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath KudlurIterator* Table::BlockReader(void* arg, const StringPiece& index_value) {
95f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  Table* table = reinterpret_cast<Table*>(arg);
96f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //  Cache* block_cache = table->rep_->options.block_cache;
97d83074847ebfe8871188f1f9f1e84ab0451f59e6A. Unique TensorFlower  Block* block = nullptr;
98f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  //  Cache::Handle* cache_handle = NULL;
99f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
100f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  BlockHandle handle;
101f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  StringPiece input = index_value;
102f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  Status s = handle.DecodeFrom(&input);
103f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // We intentionally allow extra stuff in index_value so that we
104f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  // can add more features in the future.
105f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
106f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  if (s.ok()) {
107f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    BlockContents contents;
108f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    s = ReadBlock(table->rep_->file, handle, &contents);
109f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    if (s.ok()) {
110f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      block = new Block(contents);
111f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    }
112f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  }
113f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
114f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  Iterator* iter;
115d83074847ebfe8871188f1f9f1e84ab0451f59e6A. Unique TensorFlower  if (block != nullptr) {
116f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    iter = block->NewIterator();
117d83074847ebfe8871188f1f9f1e84ab0451f59e6A. Unique TensorFlower    iter->RegisterCleanup(&DeleteBlock, block, nullptr);
118f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  } else {
119f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    iter = NewErrorIterator(s);
120f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  }
121f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  return iter;
122f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}
123f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
124f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath KudlurIterator* Table::NewIterator() const {
125f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  return NewTwoLevelIterator(rep_->index_block->NewIterator(),
126f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur                             &Table::BlockReader, const_cast<Table*>(this));
127f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}
128f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
129f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath KudlurStatus Table::InternalGet(const StringPiece& k, void* arg,
13056313def004795f75ef8281a0294c958d28f1e06Vijay Vasudevan                          void (*saver)(void*, const StringPiece&,
13156313def004795f75ef8281a0294c958d28f1e06Vijay Vasudevan                                        const StringPiece&)) {
132f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  Status s;
133f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  Iterator* iiter = rep_->index_block->NewIterator();
134f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  iiter->Seek(k);
135f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  if (iiter->Valid()) {
136f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    BlockHandle handle;
137f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    Iterator* block_iter = BlockReader(this, iiter->value());
138f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    block_iter->Seek(k);
139f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    if (block_iter->Valid()) {
140f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      (*saver)(arg, block_iter->key(), block_iter->value());
141f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    }
142f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    s = block_iter->status();
143f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    delete block_iter;
144f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  }
145f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  if (s.ok()) {
146f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    s = iiter->status();
147f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  }
148f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  delete iiter;
149f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  return s;
150f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}
151f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
152f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudluruint64 Table::ApproximateOffsetOf(const StringPiece& key) const {
153f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  Iterator* index_iter = rep_->index_block->NewIterator();
154f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  index_iter->Seek(key);
155f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  uint64 result;
156f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  if (index_iter->Valid()) {
157f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    BlockHandle handle;
158f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    StringPiece input = index_iter->value();
159f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    Status s = handle.DecodeFrom(&input);
160f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    if (s.ok()) {
161f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      result = handle.offset();
162f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    } else {
163f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      // Strange: we can't decode the block handle in the index block.
164f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      // We'll just return the offset of the metaindex block, which is
165f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      // close to the whole file size for this case.
166f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur      result = rep_->metaindex_handle.offset();
167f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    }
168f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  } else {
169f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    // key is past the last key in the file.  Approximate the offset
170f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    // by returning the offset of the metaindex block (which is
171f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    // right near the end of the file).
172f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur    result = rep_->metaindex_handle.offset();
173f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  }
174f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  delete index_iter;
175f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur  return result;
176f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}
177f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur
178f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}  // namespace table
179f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}  // namespace tensorflow
180