1c8b59c046895fa5b6d79f73e0b5817330fcfbfc1A. Unique TensorFlower/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlur 3854f49bd43588c062b046384f239f64a3d819702Manjunath KudlurLicensed under the Apache License, Version 2.0 (the "License"); 4854f49bd43588c062b046384f239f64a3d819702Manjunath Kudluryou may not use this file except in compliance with the License. 5854f49bd43588c062b046384f239f64a3d819702Manjunath KudlurYou may obtain a copy of the License at 6854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlur 7854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlur http://www.apache.org/licenses/LICENSE-2.0 8854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlur 9854f49bd43588c062b046384f239f64a3d819702Manjunath KudlurUnless required by applicable law or agreed to in writing, software 10854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlurdistributed under the License is distributed on an "AS IS" BASIS, 11854f49bd43588c062b046384f239f64a3d819702Manjunath KudlurWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12854f49bd43588c062b046384f239f64a3d819702Manjunath KudlurSee the License for the specific language governing permissions and 13854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlurlimitations under the License. 14854f49bd43588c062b046384f239f64a3d819702Manjunath Kudlur==============================================================================*/ 15f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 16f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/io/table.h" 17f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 18f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/core/coding.h" 19f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/core/errors.h" 20f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/io/block.h" 21f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/io/format.h" 22f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/io/table_options.h" 23f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur#include "tensorflow/core/lib/io/two_level_iterator.h" 243a7d603feaf6434ffd6d9fd27cac65c1ef565e07Josh Levenberg#include "tensorflow/core/platform/env.h" 25f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 26f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace tensorflow { 27f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurnamespace table { 28f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 29f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstruct Table::Rep { 30f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur ~Rep() { delete index_block; } 31f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 32f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Options options; 33f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Status status; 34f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur RandomAccessFile* file; 35f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // XXX uint64 cache_id; 36f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 37f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer 38f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Block* index_block; 39f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur}; 40f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 4156313def004795f75ef8281a0294c958d28f1e06Vijay VasudevanStatus Table::Open(const Options& options, RandomAccessFile* file, uint64 size, 4256313def004795f75ef8281a0294c958d28f1e06Vijay Vasudevan Table** table) { 43d83074847ebfe8871188f1f9f1e84ab0451f59e6A. Unique TensorFlower *table = nullptr; 44f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (size < Footer::kEncodedLength) { 45f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur return errors::DataLoss("file is too short to be an sstable"); 46f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } 47f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 48f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur char footer_space[Footer::kEncodedLength]; 49f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur StringPiece footer_input; 5056313def004795f75ef8281a0294c958d28f1e06Vijay Vasudevan Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength, 5156313def004795f75ef8281a0294c958d28f1e06Vijay Vasudevan &footer_input, footer_space); 52f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (!s.ok()) return s; 53f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 54f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Footer footer; 55f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur s = footer.DecodeFrom(&footer_input); 56f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (!s.ok()) return s; 57f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 58f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Read the index block 59f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur BlockContents contents; 60d83074847ebfe8871188f1f9f1e84ab0451f59e6A. Unique TensorFlower Block* index_block = nullptr; 61f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (s.ok()) { 62f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur s = ReadBlock(file, footer.index_handle(), &contents); 63f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (s.ok()) { 64f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur index_block = new Block(contents); 65f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } 66f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } 67f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 68f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (s.ok()) { 69f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // We've successfully read the footer and the index block: we're 70f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // ready to serve requests. 71f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Rep* rep = new Table::Rep; 72f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur rep->options = options; 73f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur rep->file = file; 74f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur rep->metaindex_handle = footer.metaindex_handle(); 75f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur rep->index_block = index_block; 76f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // XXX rep->cache_id = (options.block_cache ? 77f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // options.block_cache->NewId() : 0); 78f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur *table = new Table(rep); 79f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } else { 80f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (index_block) delete index_block; 81f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } 82f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 83f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur return s; 84f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} 85f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 86f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath KudlurTable::~Table() { delete rep_; } 87f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 88f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlurstatic void DeleteBlock(void* arg, void* ignored) { 89f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur delete reinterpret_cast<Block*>(arg); 90f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} 91f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 92f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// Convert an index iterator value (i.e., an encoded BlockHandle) 93f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur// into an iterator over the contents of the corresponding block. 94f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath KudlurIterator* Table::BlockReader(void* arg, const StringPiece& index_value) { 95f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Table* table = reinterpret_cast<Table*>(arg); 96f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Cache* block_cache = table->rep_->options.block_cache; 97d83074847ebfe8871188f1f9f1e84ab0451f59e6A. Unique TensorFlower Block* block = nullptr; 98f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Cache::Handle* cache_handle = NULL; 99f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 100f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur BlockHandle handle; 101f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur StringPiece input = index_value; 102f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Status s = handle.DecodeFrom(&input); 103f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // We intentionally allow extra stuff in index_value so that we 104f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // can add more features in the future. 105f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 106f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (s.ok()) { 107f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur BlockContents contents; 108f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur s = ReadBlock(table->rep_->file, handle, &contents); 109f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (s.ok()) { 110f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur block = new Block(contents); 111f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } 112f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } 113f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 114f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Iterator* iter; 115d83074847ebfe8871188f1f9f1e84ab0451f59e6A. Unique TensorFlower if (block != nullptr) { 116f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur iter = block->NewIterator(); 117d83074847ebfe8871188f1f9f1e84ab0451f59e6A. Unique TensorFlower iter->RegisterCleanup(&DeleteBlock, block, nullptr); 118f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } else { 119f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur iter = NewErrorIterator(s); 120f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } 121f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur return iter; 122f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} 123f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 124f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath KudlurIterator* Table::NewIterator() const { 125f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur return NewTwoLevelIterator(rep_->index_block->NewIterator(), 126f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur &Table::BlockReader, const_cast<Table*>(this)); 127f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} 128f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 129f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath KudlurStatus Table::InternalGet(const StringPiece& k, void* arg, 13056313def004795f75ef8281a0294c958d28f1e06Vijay Vasudevan void (*saver)(void*, const StringPiece&, 13156313def004795f75ef8281a0294c958d28f1e06Vijay Vasudevan const StringPiece&)) { 132f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Status s; 133f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Iterator* iiter = rep_->index_block->NewIterator(); 134f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur iiter->Seek(k); 135f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (iiter->Valid()) { 136f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur BlockHandle handle; 137f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Iterator* block_iter = BlockReader(this, iiter->value()); 138f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur block_iter->Seek(k); 139f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (block_iter->Valid()) { 140f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur (*saver)(arg, block_iter->key(), block_iter->value()); 141f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } 142f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur s = block_iter->status(); 143f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur delete block_iter; 144f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } 145f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (s.ok()) { 146f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur s = iiter->status(); 147f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } 148f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur delete iiter; 149f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur return s; 150f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} 151f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 152f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudluruint64 Table::ApproximateOffsetOf(const StringPiece& key) const { 153f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Iterator* index_iter = rep_->index_block->NewIterator(); 154f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur index_iter->Seek(key); 155f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur uint64 result; 156f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (index_iter->Valid()) { 157f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur BlockHandle handle; 158f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur StringPiece input = index_iter->value(); 159f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur Status s = handle.DecodeFrom(&input); 160f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur if (s.ok()) { 161f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur result = handle.offset(); 162f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } else { 163f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // Strange: we can't decode the block handle in the index block. 164f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // We'll just return the offset of the metaindex block, which is 165f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // close to the whole file size for this case. 166f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur result = rep_->metaindex_handle.offset(); 167f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } 168f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } else { 169f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // key is past the last key in the file. Approximate the offset 170f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // by returning the offset of the metaindex block (which is 171f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur // right near the end of the file). 172f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur result = rep_->metaindex_handle.offset(); 173f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur } 174f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur delete index_iter; 175f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur return result; 176f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} 177f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur 178f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} // namespace table 179f41959ccb2d9d4c722fe8fc3351401d53bcf490Manjunath Kudlur} // namespace tensorflow 180