1// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file. See the AUTHORS file for names of contributors.
4//
5// Decodes the blocks generated by block_builder.cc.
6
7#include "table/block.h"
8
9#include <vector>
10#include <algorithm>
11#include "leveldb/comparator.h"
12#include "table/format.h"
13#include "util/coding.h"
14#include "util/logging.h"
15
16namespace leveldb {
17
18inline uint32_t Block::NumRestarts() const {
19  assert(size_ >= sizeof(uint32_t));
20  return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
21}
22
23Block::Block(const BlockContents& contents)
24    : data_(contents.data.data()),
25      size_(contents.data.size()),
26      owned_(contents.heap_allocated) {
27  if (size_ < sizeof(uint32_t)) {
28    size_ = 0;  // Error marker
29  } else {
30    size_t max_restarts_allowed = (size_-sizeof(uint32_t)) / sizeof(uint32_t);
31    if (NumRestarts() > max_restarts_allowed) {
32      // The size is too small for NumRestarts()
33      size_ = 0;
34    } else {
35      restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t);
36    }
37  }
38}
39
40Block::~Block() {
41  if (owned_) {
42    delete[] data_;
43  }
44}
45
46// Helper routine: decode the next block entry starting at "p",
47// storing the number of shared key bytes, non_shared key bytes,
48// and the length of the value in "*shared", "*non_shared", and
49// "*value_length", respectively.  Will not derefence past "limit".
50//
51// If any errors are detected, returns NULL.  Otherwise, returns a
52// pointer to the key delta (just past the three decoded values).
53static inline const char* DecodeEntry(const char* p, const char* limit,
54                                      uint32_t* shared,
55                                      uint32_t* non_shared,
56                                      uint32_t* value_length) {
57  if (limit - p < 3) return NULL;
58  *shared = reinterpret_cast<const unsigned char*>(p)[0];
59  *non_shared = reinterpret_cast<const unsigned char*>(p)[1];
60  *value_length = reinterpret_cast<const unsigned char*>(p)[2];
61  if ((*shared | *non_shared | *value_length) < 128) {
62    // Fast path: all three values are encoded in one byte each
63    p += 3;
64  } else {
65    if ((p = GetVarint32Ptr(p, limit, shared)) == NULL) return NULL;
66    if ((p = GetVarint32Ptr(p, limit, non_shared)) == NULL) return NULL;
67    if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL;
68  }
69
70  if (static_cast<uint32_t>(limit - p) < (*non_shared + *value_length)) {
71    return NULL;
72  }
73  return p;
74}
75
76class Block::Iter : public Iterator {
77 private:
78  const Comparator* const comparator_;
79  const char* const data_;      // underlying block contents
80  uint32_t const restarts_;     // Offset of restart array (list of fixed32)
81  uint32_t const num_restarts_; // Number of uint32_t entries in restart array
82
83  // current_ is offset in data_ of current entry.  >= restarts_ if !Valid
84  uint32_t current_;
85  uint32_t restart_index_;  // Index of restart block in which current_ falls
86  std::string key_;
87  Slice value_;
88  Status status_;
89
90  inline int Compare(const Slice& a, const Slice& b) const {
91    return comparator_->Compare(a, b);
92  }
93
94  // Return the offset in data_ just past the end of the current entry.
95  inline uint32_t NextEntryOffset() const {
96    return (value_.data() + value_.size()) - data_;
97  }
98
99  uint32_t GetRestartPoint(uint32_t index) {
100    assert(index < num_restarts_);
101    return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t));
102  }
103
104  void SeekToRestartPoint(uint32_t index) {
105    key_.clear();
106    restart_index_ = index;
107    // current_ will be fixed by ParseNextKey();
108
109    // ParseNextKey() starts at the end of value_, so set value_ accordingly
110    uint32_t offset = GetRestartPoint(index);
111    value_ = Slice(data_ + offset, 0);
112  }
113
114 public:
115  Iter(const Comparator* comparator,
116       const char* data,
117       uint32_t restarts,
118       uint32_t num_restarts)
119      : comparator_(comparator),
120        data_(data),
121        restarts_(restarts),
122        num_restarts_(num_restarts),
123        current_(restarts_),
124        restart_index_(num_restarts_) {
125    assert(num_restarts_ > 0);
126  }
127
128  virtual bool Valid() const { return current_ < restarts_; }
129  virtual Status status() const { return status_; }
130  virtual Slice key() const {
131    assert(Valid());
132    return key_;
133  }
134  virtual Slice value() const {
135    assert(Valid());
136    return value_;
137  }
138
139  virtual void Next() {
140    assert(Valid());
141    ParseNextKey();
142  }
143
144  virtual void Prev() {
145    assert(Valid());
146
147    // Scan backwards to a restart point before current_
148    const uint32_t original = current_;
149    while (GetRestartPoint(restart_index_) >= original) {
150      if (restart_index_ == 0) {
151        // No more entries
152        current_ = restarts_;
153        restart_index_ = num_restarts_;
154        return;
155      }
156      restart_index_--;
157    }
158
159    SeekToRestartPoint(restart_index_);
160    do {
161      // Loop until end of current entry hits the start of original entry
162    } while (ParseNextKey() && NextEntryOffset() < original);
163  }
164
165  virtual void Seek(const Slice& target) {
166    // Binary search in restart array to find the last restart point
167    // with a key < target
168    uint32_t left = 0;
169    uint32_t right = num_restarts_ - 1;
170    while (left < right) {
171      uint32_t mid = (left + right + 1) / 2;
172      uint32_t region_offset = GetRestartPoint(mid);
173      uint32_t shared, non_shared, value_length;
174      const char* key_ptr = DecodeEntry(data_ + region_offset,
175                                        data_ + restarts_,
176                                        &shared, &non_shared, &value_length);
177      if (key_ptr == NULL || (shared != 0)) {
178        CorruptionError();
179        return;
180      }
181      Slice mid_key(key_ptr, non_shared);
182      if (Compare(mid_key, target) < 0) {
183        // Key at "mid" is smaller than "target".  Therefore all
184        // blocks before "mid" are uninteresting.
185        left = mid;
186      } else {
187        // Key at "mid" is >= "target".  Therefore all blocks at or
188        // after "mid" are uninteresting.
189        right = mid - 1;
190      }
191    }
192
193    // Linear search (within restart block) for first key >= target
194    SeekToRestartPoint(left);
195    while (true) {
196      if (!ParseNextKey()) {
197        return;
198      }
199      if (Compare(key_, target) >= 0) {
200        return;
201      }
202    }
203  }
204
205  virtual void SeekToFirst() {
206    SeekToRestartPoint(0);
207    ParseNextKey();
208  }
209
210  virtual void SeekToLast() {
211    SeekToRestartPoint(num_restarts_ - 1);
212    while (ParseNextKey() && NextEntryOffset() < restarts_) {
213      // Keep skipping
214    }
215  }
216
217 private:
218  void CorruptionError() {
219    current_ = restarts_;
220    restart_index_ = num_restarts_;
221    status_ = Status::Corruption("bad entry in block");
222    key_.clear();
223    value_.clear();
224  }
225
226  bool ParseNextKey() {
227    current_ = NextEntryOffset();
228    const char* p = data_ + current_;
229    const char* limit = data_ + restarts_;  // Restarts come right after data
230    if (p >= limit) {
231      // No more entries to return.  Mark as invalid.
232      current_ = restarts_;
233      restart_index_ = num_restarts_;
234      return false;
235    }
236
237    // Decode next entry
238    uint32_t shared, non_shared, value_length;
239    p = DecodeEntry(p, limit, &shared, &non_shared, &value_length);
240    if (p == NULL || key_.size() < shared) {
241      CorruptionError();
242      return false;
243    } else {
244      key_.resize(shared);
245      key_.append(p, non_shared);
246      value_ = Slice(p + non_shared, value_length);
247      while (restart_index_ + 1 < num_restarts_ &&
248             GetRestartPoint(restart_index_ + 1) < current_) {
249        ++restart_index_;
250      }
251      return true;
252    }
253  }
254};
255
256Iterator* Block::NewIterator(const Comparator* cmp) {
257  if (size_ < sizeof(uint32_t)) {
258    return NewErrorIterator(Status::Corruption("bad block contents"));
259  }
260  const uint32_t num_restarts = NumRestarts();
261  if (num_restarts == 0) {
262    return NewEmptyIterator();
263  } else {
264    return new Iter(cmp, data_, restart_offset_, num_restarts);
265  }
266}
267
268}  // namespace leveldb
269