1179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org// Use of this source code is governed by a BSD-style license that can be
3179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org// found in the LICENSE file. See the AUTHORS file for names of contributors.
4179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
5179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "db/db_impl.h"
6179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
7179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include <algorithm>
8179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include <set>
9179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include <string>
10179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include <stdint.h>
11179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include <stdio.h>
12179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include <vector>
13179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "db/builder.h"
14179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "db/db_iter.h"
15179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "db/dbformat.h"
16179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "db/filename.h"
17179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "db/log_reader.h"
18179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "db/log_writer.h"
19179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "db/memtable.h"
20179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "db/table_cache.h"
21179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "db/version_set.h"
22179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "db/write_batch_internal.h"
23fbd97aa4c5325eace57d24b89845b9581bac9324jorlow@chromium.org#include "leveldb/db.h"
24fbd97aa4c5325eace57d24b89845b9581bac9324jorlow@chromium.org#include "leveldb/env.h"
25fbd97aa4c5325eace57d24b89845b9581bac9324jorlow@chromium.org#include "leveldb/status.h"
26fbd97aa4c5325eace57d24b89845b9581bac9324jorlow@chromium.org#include "leveldb/table.h"
27fbd97aa4c5325eace57d24b89845b9581bac9324jorlow@chromium.org#include "leveldb/table_builder.h"
28179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "port/port.h"
29179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "table/block.h"
30179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "table/merger.h"
31179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "table/two_level_iterator.h"
32179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "util/coding.h"
33179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "util/logging.h"
34179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#include "util/mutexlock.h"
35179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
36179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgnamespace leveldb {
37179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
3811042098fe3e5a16ad70c388bb5914a907ae3faedgrogan@chromium.orgconst int kNumNonTableCacheFiles = 10;
3911042098fe3e5a16ad70c388bb5914a907ae3faedgrogan@chromium.org
4013daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com// Information kept for every waiting writer
4113daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.comstruct DBImpl::Writer {
4213daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  Status status;
4313daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  WriteBatch* batch;
4413daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  bool sync;
4513daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  bool done;
4613daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  port::CondVar cv;
4713daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com
4813daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  explicit Writer(port::Mutex* mu) : cv(mu) { }
4913daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com};
5013daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com
51179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgstruct DBImpl::CompactionState {
52179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Compaction* const compaction;
53179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
54179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Sequence numbers < smallest_snapshot are not significant since we
55179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // will never have to service a snapshot below smallest_snapshot.
56179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Therefore if we have seen a sequence number S <= smallest_snapshot,
57179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // we can drop all entries for the same key with sequence numbers < S.
58179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  SequenceNumber smallest_snapshot;
59179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
60179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Files produced by compaction
61179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  struct Output {
62179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    uint64_t number;
63179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    uint64_t file_size;
64179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    InternalKey smallest, largest;
65179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  };
66179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  std::vector<Output> outputs;
67179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
68179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // State kept for output being generated
69179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  WritableFile* outfile;
70179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  TableBuilder* builder;
71179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
72179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  uint64_t total_bytes;
73179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
74179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Output* current_output() { return &outputs[outputs.size()-1]; }
75179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
76179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  explicit CompactionState(Compaction* c)
77179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      : compaction(c),
78179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        outfile(NULL),
79179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        builder(NULL),
80179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        total_bytes(0) {
81179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
82179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org};
83179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
84179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org// Fix user-supplied options to be reasonable
85179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgtemplate <class T,class V>
86179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgstatic void ClipToRange(T* ptr, V minvalue, V maxvalue) {
871511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org  if (static_cast<V>(*ptr) > maxvalue) *ptr = maxvalue;
881511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org  if (static_cast<V>(*ptr) < minvalue) *ptr = minvalue;
89179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
90179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgOptions SanitizeOptions(const std::string& dbname,
91179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                        const InternalKeyComparator* icmp,
9299a7585544fc162a5f8dd39a6add00776a981efesanjay@google.com                        const InternalFilterPolicy* ipolicy,
93179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                        const Options& src) {
94179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Options result = src;
95179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  result.comparator = icmp;
9699a7585544fc162a5f8dd39a6add00776a981efesanjay@google.com  result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL;
9711042098fe3e5a16ad70c388bb5914a907ae3faedgrogan@chromium.org  ClipToRange(&result.max_open_files,    64 + kNumNonTableCacheFiles, 50000);
9811042098fe3e5a16ad70c388bb5914a907ae3faedgrogan@chromium.org  ClipToRange(&result.write_buffer_size, 64<<10,                      1<<30);
9911042098fe3e5a16ad70c388bb5914a907ae3faedgrogan@chromium.org  ClipToRange(&result.block_size,        1<<10,                       4<<20);
100179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (result.info_log == NULL) {
101179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // Open a log file in the same directory as the db
102179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    src.env->CreateDir(dbname);  // In case it does not exist
103179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    src.env->RenameFile(InfoLogFileName(dbname), OldInfoLogFileName(dbname));
104f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com    Status s = src.env->NewLogger(InfoLogFileName(dbname), &result.info_log);
105179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (!s.ok()) {
106179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      // No place suitable for logging
107f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com      result.info_log = NULL;
108179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
109179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
11095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  if (result.block_cache == NULL) {
11195e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    result.block_cache = NewLRUCache(8 << 20);
11295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  }
113179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return result;
114179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
115179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
11608595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.orgDBImpl::DBImpl(const Options& raw_options, const std::string& dbname)
11708595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org    : env_(raw_options.env),
11808595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org      internal_comparator_(raw_options.comparator),
11908595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org      internal_filter_policy_(raw_options.filter_policy),
12008595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org      options_(SanitizeOptions(dbname, &internal_comparator_,
12108595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org                               &internal_filter_policy_, raw_options)),
12208595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org      owns_info_log_(options_.info_log != raw_options.info_log),
12308595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org      owns_cache_(options_.block_cache != raw_options.block_cache),
124179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      dbname_(dbname),
125179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      db_lock_(NULL),
126179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      shutting_down_(NULL),
127179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      bg_cv_(&mutex_),
128179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      mem_(new MemTable(internal_comparator_)),
12995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      imm_(NULL),
130179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      logfile_(NULL),
1318cd4ab8303620197cf24282ae8639060efbb326egabor@google.com      logfile_number_(0),
132179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      log_(NULL),
13308595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org      seed_(0),
13413daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com      tmp_batch_(new WriteBatch),
135179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      bg_compaction_scheduled_(false),
1364935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org      manual_compaction_(NULL) {
137a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org  mem_->Ref();
13895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  has_imm_.Release_Store(NULL);
13995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org
140179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Reserve ten files or so for other uses and give the rest to TableCache.
14108595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org  const int table_cache_size = options_.max_open_files - kNumNonTableCacheFiles;
142179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  table_cache_ = new TableCache(dbname_, &options_, table_cache_size);
143179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
144179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  versions_ = new VersionSet(dbname_, &options_, table_cache_,
145179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                             &internal_comparator_);
146179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
147179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
148179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgDBImpl::~DBImpl() {
149179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Wait for background work to finish
150179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.Lock();
151179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  shutting_down_.Release_Store(this);  // Any non-NULL value is ok
1526635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  while (bg_compaction_scheduled_) {
1536635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org    bg_cv_.Wait();
154179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
155179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.Unlock();
156179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
157179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (db_lock_ != NULL) {
158179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    env_->UnlockFile(db_lock_);
159179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
160179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
161179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  delete versions_;
162a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org  if (mem_ != NULL) mem_->Unref();
163a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org  if (imm_ != NULL) imm_->Unref();
16413daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  delete tmp_batch_;
165179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  delete log_;
166179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  delete logfile_;
167179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  delete table_cache_;
168179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
169179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (owns_info_log_) {
170179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    delete options_.info_log;
171179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
17295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  if (owns_cache_) {
17395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    delete options_.block_cache;
17495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  }
175179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
176179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
177179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DBImpl::NewDB() {
178179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  VersionEdit new_db;
179179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  new_db.SetComparatorName(user_comparator()->Name());
18095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  new_db.SetLogNumber(0);
181179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  new_db.SetNextFile(2);
182179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  new_db.SetLastSequence(0);
183179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
184179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  const std::string manifest = DescriptorFileName(dbname_, 1);
185179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  WritableFile* file;
186179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Status s = env_->NewWritableFile(manifest, &file);
187179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (!s.ok()) {
188179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    return s;
189179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
190179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  {
191179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    log::Writer log(file);
192179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    std::string record;
193179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    new_db.EncodeTo(&record);
194179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    s = log.AddRecord(record);
195179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (s.ok()) {
196179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      s = file->Close();
197179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
198179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
199179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  delete file;
200179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (s.ok()) {
201179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // Make "CURRENT" file that points to the new manifest file.
202179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    s = SetCurrentFile(env_, dbname_, 1);
203179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  } else {
204179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    env_->DeleteFile(manifest);
205179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
206179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return s;
207179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
208179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
209179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgvoid DBImpl::MaybeIgnoreError(Status* s) const {
210179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (s->ok() || options_.paranoid_checks) {
211179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // No change needed
212179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  } else {
213f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com    Log(options_.info_log, "Ignoring error %s", s->ToString().c_str());
214179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    *s = Status::OK();
215179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
216179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
217179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
218179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgvoid DBImpl::DeleteObsoleteFiles() {
2194935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  if (!bg_error_.ok()) {
2204935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    // After a background error, we don't know whether a new version may
2214935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    // or may not have been committed, so we cannot safely garbage collect.
2224935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    return;
2234935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  }
2244935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org
225179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Make a set of all of the live files
226179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  std::set<uint64_t> live = pending_outputs_;
227179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  versions_->AddLiveFiles(&live);
228179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
229179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  std::vector<std::string> filenames;
230179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose
231179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  uint64_t number;
232179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  FileType type;
2331511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org  for (size_t i = 0; i < filenames.size(); i++) {
2341511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org    if (ParseFileName(filenames[i], &number, &type)) {
235179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      bool keep = true;
236179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      switch (type) {
237179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        case kLogFile:
2388cd4ab8303620197cf24282ae8639060efbb326egabor@google.com          keep = ((number >= versions_->LogNumber()) ||
23995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org                  (number == versions_->PrevLogNumber()));
240179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          break;
241179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        case kDescriptorFile:
242179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          // Keep my manifest file, and any newer incarnations'
243179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          // (in case there is a race that allows other incarnations)
244179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          keep = (number >= versions_->ManifestFileNumber());
245179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          break;
246179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        case kTableFile:
247179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          keep = (live.find(number) != live.end());
248179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          break;
249179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        case kTempFile:
250179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          // Any temp files that are currently being written to must
251179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          // be recorded in pending_outputs_, which is inserted into "live"
252179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          keep = (live.find(number) != live.end());
253179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          break;
254179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        case kCurrentFile:
255179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        case kDBLockFile:
256179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        case kInfoLogFile:
257179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          keep = true;
258179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          break;
259179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      }
260179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
261179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      if (!keep) {
262179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        if (type == kTableFile) {
263179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          table_cache_->Evict(number);
264179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        }
265f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com        Log(options_.info_log, "Delete type=%d #%lld\n",
266179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org            int(type),
267179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org            static_cast<unsigned long long>(number));
268179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        env_->DeleteFile(dbname_ + "/" + filenames[i]);
269179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      }
270179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
271179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
272179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
273179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
274179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DBImpl::Recover(VersionEdit* edit) {
275179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.AssertHeld();
276179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
277179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Ignore error from CreateDir since the creation of the DB is
278179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // committed only when the descriptor is created, and this directory
279179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // may already exist from a previous failed creation attempt.
280179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  env_->CreateDir(dbname_);
281179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  assert(db_lock_ == NULL);
282179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Status s = env_->LockFile(LockFileName(dbname_), &db_lock_);
283179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (!s.ok()) {
284179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    return s;
285179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
286179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
287179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (!env_->FileExists(CurrentFileName(dbname_))) {
288179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (options_.create_if_missing) {
289179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      s = NewDB();
290179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      if (!s.ok()) {
291179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        return s;
292179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      }
293179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    } else {
294179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      return Status::InvalidArgument(
295179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          dbname_, "does not exist (create_if_missing is false)");
296179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
297179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  } else {
298179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (options_.error_if_exists) {
299179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      return Status::InvalidArgument(
300179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          dbname_, "exists (error_if_exists is true)");
301179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
302179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
303179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
30495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  s = versions_->Recover();
305179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (s.ok()) {
306179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    SequenceNumber max_sequence(0);
3078cd4ab8303620197cf24282ae8639060efbb326egabor@google.com
3088cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    // Recover from all newer log files than the ones named in the
3098cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    // descriptor (new log files may have been added by the previous
3108cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    // incarnation without registering them in the descriptor).
3118cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    //
3128cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    // Note that PrevLogNumber() is no longer used, but we pay
3138cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    // attention to it in case we are recovering a database
3148cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    // produced by an older version of leveldb.
3158cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    const uint64_t min_log = versions_->LogNumber();
3168cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    const uint64_t prev_log = versions_->PrevLogNumber();
3178cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    std::vector<std::string> filenames;
3188cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    s = env_->GetChildren(dbname_, &filenames);
3198cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    if (!s.ok()) {
3208cd4ab8303620197cf24282ae8639060efbb326egabor@google.com      return s;
32195e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    }
322bbb0263070defe02ffee97b35d0dc31d3f6297a3dgrogan@chromium.org    std::set<uint64_t> expected;
323bbb0263070defe02ffee97b35d0dc31d3f6297a3dgrogan@chromium.org    versions_->AddLiveFiles(&expected);
3248cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    uint64_t number;
3258cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    FileType type;
3268cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    std::vector<uint64_t> logs;
3278cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    for (size_t i = 0; i < filenames.size(); i++) {
328bbb0263070defe02ffee97b35d0dc31d3f6297a3dgrogan@chromium.org      if (ParseFileName(filenames[i], &number, &type)) {
329bbb0263070defe02ffee97b35d0dc31d3f6297a3dgrogan@chromium.org        expected.erase(number);
330bbb0263070defe02ffee97b35d0dc31d3f6297a3dgrogan@chromium.org        if (type == kLogFile && ((number >= min_log) || (number == prev_log)))
331bbb0263070defe02ffee97b35d0dc31d3f6297a3dgrogan@chromium.org          logs.push_back(number);
3328cd4ab8303620197cf24282ae8639060efbb326egabor@google.com      }
333179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
334bbb0263070defe02ffee97b35d0dc31d3f6297a3dgrogan@chromium.org    if (!expected.empty()) {
335bbb0263070defe02ffee97b35d0dc31d3f6297a3dgrogan@chromium.org      char buf[50];
336bbb0263070defe02ffee97b35d0dc31d3f6297a3dgrogan@chromium.org      snprintf(buf, sizeof(buf), "%d missing files; e.g.",
337bbb0263070defe02ffee97b35d0dc31d3f6297a3dgrogan@chromium.org               static_cast<int>(expected.size()));
338bbb0263070defe02ffee97b35d0dc31d3f6297a3dgrogan@chromium.org      return Status::Corruption(buf, TableFileName(dbname_, *(expected.begin())));
339bbb0263070defe02ffee97b35d0dc31d3f6297a3dgrogan@chromium.org    }
3408cd4ab8303620197cf24282ae8639060efbb326egabor@google.com
3418cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    // Recover in the order in which the logs were generated
3428cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    std::sort(logs.begin(), logs.end());
3438cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    for (size_t i = 0; i < logs.size(); i++) {
3448cd4ab8303620197cf24282ae8639060efbb326egabor@google.com      s = RecoverLogFile(logs[i], edit, &max_sequence);
345394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com
346394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com      // The previous incarnation may not have written any MANIFEST
347394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com      // records after allocating this log number.  So we manually
348394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com      // update the file number allocation counter in VersionSet.
349394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com      versions_->MarkFileNumberUsed(logs[i]);
3508cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    }
3518cd4ab8303620197cf24282ae8639060efbb326egabor@google.com
352179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (s.ok()) {
35395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      if (versions_->LastSequence() < max_sequence) {
35495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org        versions_->SetLastSequence(max_sequence);
35595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      }
356179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
357179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
358179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
359179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return s;
360179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
361179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
362179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DBImpl::RecoverLogFile(uint64_t log_number,
363179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                              VersionEdit* edit,
364179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                              SequenceNumber* max_sequence) {
365179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  struct LogReporter : public log::Reader::Reporter {
366179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    Env* env;
367f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com    Logger* info_log;
368179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    const char* fname;
369179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    Status* status;  // NULL if options_.paranoid_checks==false
370179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    virtual void Corruption(size_t bytes, const Status& s) {
371f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com      Log(info_log, "%s%s: dropping %d bytes; %s",
372179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          (this->status == NULL ? "(ignoring error) " : ""),
373179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          fname, static_cast<int>(bytes), s.ToString().c_str());
374179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      if (this->status != NULL && this->status->ok()) *this->status = s;
375179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
376179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  };
377179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
378179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.AssertHeld();
379179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
380179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Open the log file
381179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  std::string fname = LogFileName(dbname_, log_number);
382179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  SequentialFile* file;
383179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Status status = env_->NewSequentialFile(fname, &file);
384179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (!status.ok()) {
385179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    MaybeIgnoreError(&status);
386179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    return status;
387179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
388179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
389179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Create the log reader.
390179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  LogReporter reporter;
391179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  reporter.env = env_;
392179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  reporter.info_log = options_.info_log;
393179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  reporter.fname = fname.c_str();
394179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  reporter.status = (options_.paranoid_checks ? &status : NULL);
395179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // We intentially make log::Reader do checksumming even if
396179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // paranoid_checks==false so that corruptions cause entire commits
397179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // to be skipped instead of propagating bad information (like overly
398179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // large sequence numbers).
399a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org  log::Reader reader(file, &reporter, true/*checksum*/,
400a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org                     0/*initial_offset*/);
401f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com  Log(options_.info_log, "Recovering log #%llu",
402179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      (unsigned long long) log_number);
403179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
404179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Read all the records and add to a memtable
405179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  std::string scratch;
406179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Slice record;
407179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  WriteBatch batch;
408179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  MemTable* mem = NULL;
409179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  while (reader.ReadRecord(&record, &scratch) &&
410179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org         status.ok()) {
411179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (record.size() < 12) {
412179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      reporter.Corruption(
413179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          record.size(), Status::Corruption("log record too small"));
414179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      continue;
415179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
416179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    WriteBatchInternal::SetContents(&batch, record);
417179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
418179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (mem == NULL) {
419179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      mem = new MemTable(internal_comparator_);
420a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      mem->Ref();
421179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
422179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    status = WriteBatchInternal::InsertInto(&batch, mem);
423179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    MaybeIgnoreError(&status);
424179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (!status.ok()) {
425179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      break;
426179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
427179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    const SequenceNumber last_seq =
428179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        WriteBatchInternal::Sequence(&batch) +
429179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        WriteBatchInternal::Count(&batch) - 1;
430179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (last_seq > *max_sequence) {
431179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      *max_sequence = last_seq;
432179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
433179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
434179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) {
4358cd4ab8303620197cf24282ae8639060efbb326egabor@google.com      status = WriteLevel0Table(mem, edit, NULL);
436179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      if (!status.ok()) {
437179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        // Reflect errors immediately so that conditions like full
438179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        // file-systems cause the DB::Open() to fail.
439179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        break;
440179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      }
441a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      mem->Unref();
442179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      mem = NULL;
443179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
444179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
445179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
446179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (status.ok() && mem != NULL) {
4478cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    status = WriteLevel0Table(mem, edit, NULL);
448179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // Reflect errors immediately so that conditions like full
449179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // file-systems cause the DB::Open() to fail.
450179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
451179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
452a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org  if (mem != NULL) mem->Unref();
453179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  delete file;
454179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return status;
455179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
456179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
4578cd4ab8303620197cf24282ae8639060efbb326egabor@google.comStatus DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit,
4588cd4ab8303620197cf24282ae8639060efbb326egabor@google.com                                Version* base) {
459179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.AssertHeld();
46095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  const uint64_t start_micros = env_->NowMicros();
461179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  FileMetaData meta;
462179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  meta.number = versions_->NewFileNumber();
463179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  pending_outputs_.insert(meta.number);
464179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Iterator* iter = mem->NewIterator();
465f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com  Log(options_.info_log, "Level-0 table #%llu: started",
466179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      (unsigned long long) meta.number);
46795e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org
46895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  Status s;
46995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  {
47095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    mutex_.Unlock();
4718cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta);
47295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    mutex_.Lock();
47395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  }
47495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org
475f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com  Log(options_.info_log, "Level-0 table #%llu: %lld bytes %s",
476179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      (unsigned long long) meta.number,
477179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      (unsigned long long) meta.file_size,
478179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      s.ToString().c_str());
479179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  delete iter;
480179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  pending_outputs_.erase(meta.number);
48195e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org
4828cd4ab8303620197cf24282ae8639060efbb326egabor@google.com
4838cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  // Note that if file_size is zero, the file has been deleted and
4848cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  // should not be added to the manifest.
4858cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  int level = 0;
4868cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  if (s.ok() && meta.file_size > 0) {
487917b88dd720b6e658c1fd7812bc61c605f315124gabor@google.com    const Slice min_user_key = meta.smallest.user_key();
488917b88dd720b6e658c1fd7812bc61c605f315124gabor@google.com    const Slice max_user_key = meta.largest.user_key();
4895fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    if (base != NULL) {
4905fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com      level = base->PickLevelForMemTableOutput(min_user_key, max_user_key);
4918cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    }
4928cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    edit->AddFile(level, meta.number, meta.file_size,
4938cd4ab8303620197cf24282ae8639060efbb326egabor@google.com                  meta.smallest, meta.largest);
4948cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  }
4958cd4ab8303620197cf24282ae8639060efbb326egabor@google.com
49695e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  CompactionStats stats;
49795e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  stats.micros = env_->NowMicros() - start_micros;
49895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  stats.bytes_written = meta.file_size;
4998cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  stats_[level].Add(stats);
500179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return s;
501179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
502179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
5034935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.orgvoid DBImpl::CompactMemTable() {
504179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.AssertHeld();
50595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  assert(imm_ != NULL);
506179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
507179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Save the contents of the memtable as a new Table
50895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  VersionEdit edit;
5098cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  Version* base = versions_->current();
5108cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  base->Ref();
5118cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  Status s = WriteLevel0Table(imm_, &edit, base);
5128cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  base->Unref();
5138cd4ab8303620197cf24282ae8639060efbb326egabor@google.com
5148cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  if (s.ok() && shutting_down_.Acquire_Load()) {
5158cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    s = Status::IOError("Deleting DB during memtable compaction");
5168cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  }
517179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
51895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  // Replace immutable memtable with the generated Table
519179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (s.ok()) {
52095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    edit.SetPrevLogNumber(0);
5218cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    edit.SetLogNumber(logfile_number_);  // Earlier logs no longer needed
522394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com    s = versions_->LogAndApply(&edit, &mutex_);
523179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
524179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
525179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (s.ok()) {
526179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // Commit to the new state
527a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org    imm_->Unref();
52895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    imm_ = NULL;
52995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    has_imm_.Release_Store(NULL);
530179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    DeleteObsoleteFiles();
5314935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  } else {
5324935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    RecordBackgroundError(s);
533179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
534179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
535179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
5365fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.comvoid DBImpl::CompactRange(const Slice* begin, const Slice* end) {
5375fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  int max_level_with_files = 1;
5385fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  {
5395fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    MutexLock l(&mutex_);
5405fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    Version* base = versions_->current();
5415fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    for (int level = 1; level < config::kNumLevels; level++) {
5425fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com      if (base->OverlapInLevel(level, begin, end)) {
5435fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com        max_level_with_files = level;
5445fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com      }
5455fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    }
5465fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  }
5475fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  TEST_CompactMemTable(); // TODO(sanjay): Skip if memtable does not overlap
5485fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  for (int level = 0; level < max_level_with_files; level++) {
5495fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    TEST_CompactRange(level, begin, end);
5505fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  }
5515fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com}
5525fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com
5535fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.comvoid DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) {
5548cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  assert(level >= 0);
5558cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  assert(level + 1 < config::kNumLevels);
5568cd4ab8303620197cf24282ae8639060efbb326egabor@google.com
5575fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  InternalKey begin_storage, end_storage;
5585fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com
5596635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  ManualCompaction manual;
5606635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  manual.level = level;
5615fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  manual.done = false;
5625fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  if (begin == NULL) {
5635fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    manual.begin = NULL;
5645fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  } else {
5655fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    begin_storage = InternalKey(*begin, kMaxSequenceNumber, kValueTypeForSeek);
5665fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    manual.begin = &begin_storage;
5675fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  }
5685fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  if (end == NULL) {
5695fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    manual.end = NULL;
5705fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  } else {
5715fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    end_storage = InternalKey(*end, 0, static_cast<ValueType>(0));
5725fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    manual.end = &end_storage;
5735fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  }
5745fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com
5755fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  MutexLock l(&mutex_);
5764935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  while (!manual.done && !shutting_down_.Acquire_Load() && bg_error_.ok()) {
5774935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    if (manual_compaction_ == NULL) {  // Idle
5784935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org      manual_compaction_ = &manual;
5794935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org      MaybeScheduleCompaction();
5804935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    } else {  // Running either my compaction or another compaction.
5815fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com      bg_cv_.Wait();
5825fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    }
5836635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  }
5844935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  if (manual_compaction_ == &manual) {
5854935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    // Cancel my manual compaction since we aborted early for some reason.
5864935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    manual_compaction_ = NULL;
5874935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  }
588179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
589179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
590179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DBImpl::TEST_CompactMemTable() {
59113daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  // NULL batch means just wait for earlier writes to be done
59213daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  Status s = Write(WriteOptions(), NULL);
59395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  if (s.ok()) {
59495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    // Wait until the compaction completes
59513daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    MutexLock l(&mutex_);
59695e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    while (imm_ != NULL && bg_error_.ok()) {
5976635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org      bg_cv_.Wait();
59895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    }
59995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    if (imm_ != NULL) {
60095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      s = bg_error_;
60195e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    }
60295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  }
60395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  return s;
604179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
605179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
6064935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.orgvoid DBImpl::RecordBackgroundError(const Status& s) {
6074935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  mutex_.AssertHeld();
6084935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  if (bg_error_.ok()) {
6094935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    bg_error_ = s;
6104935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    bg_cv_.SignalAll();
6114935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  }
6124935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org}
6134935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org
614179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgvoid DBImpl::MaybeScheduleCompaction() {
615179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.AssertHeld();
616179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (bg_compaction_scheduled_) {
617179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // Already scheduled
618179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  } else if (shutting_down_.Acquire_Load()) {
619179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // DB is being deleted; no more background compactions
6204935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  } else if (!bg_error_.ok()) {
6214935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    // Already got an error; no more changes
6226635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  } else if (imm_ == NULL &&
6236635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org             manual_compaction_ == NULL &&
6246635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org             !versions_->NeedsCompaction()) {
625179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // No work to be done
626179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  } else {
627179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    bg_compaction_scheduled_ = true;
628179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    env_->Schedule(&DBImpl::BGWork, this);
629179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
630179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
631179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
632179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgvoid DBImpl::BGWork(void* db) {
633179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  reinterpret_cast<DBImpl*>(db)->BackgroundCall();
634179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
635179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
636179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgvoid DBImpl::BackgroundCall() {
637179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  MutexLock l(&mutex_);
638179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  assert(bg_compaction_scheduled_);
6394935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  if (shutting_down_.Acquire_Load()) {
6404935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    // No more background work when shutting down.
6414935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  } else if (!bg_error_.ok()) {
6424935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    // No more background work after a background error.
6434935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  } else {
6444935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    BackgroundCompaction();
645179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
646158f767acaed4c39cbb3ee8128fe896e155ec40csanjay@google.com
647179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  bg_compaction_scheduled_ = false;
648179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
649179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Previous compaction may have produced too many files in a level,
650179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // so reschedule another compaction if needed.
651179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  MaybeScheduleCompaction();
6526635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  bg_cv_.SignalAll();
653179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
654179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
6554935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.orgvoid DBImpl::BackgroundCompaction() {
656179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.AssertHeld();
65795e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org
65895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  if (imm_ != NULL) {
6594935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    CompactMemTable();
6604935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    return;
66195e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  }
66295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org
6636635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  Compaction* c;
6646635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  bool is_manual = (manual_compaction_ != NULL);
6655fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  InternalKey manual_end;
6666635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  if (is_manual) {
6675fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    ManualCompaction* m = manual_compaction_;
6685fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    c = versions_->CompactRange(m->level, m->begin, m->end);
6695fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    m->done = (c == NULL);
6705fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    if (c != NULL) {
6715fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com      manual_end = c->input(0, c->num_input_files(0) - 1)->largest;
6725fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    }
6735fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    Log(options_.info_log,
6745fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com        "Manual compaction at level-%d from %s .. %s; will stop at %s\n",
6756635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org        m->level,
6765fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com        (m->begin ? m->begin->DebugString().c_str() : "(begin)"),
6775fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com        (m->end ? m->end->DebugString().c_str() : "(end)"),
6785fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com        (m->done ? "(end)" : manual_end.DebugString().c_str()));
6796635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  } else {
6806635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org    c = versions_->PickCompaction();
681179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
682179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
683179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Status status;
6846635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  if (c == NULL) {
6856635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org    // Nothing to do
6866635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  } else if (!is_manual && c->IsTrivialMove()) {
687179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // Move file to next level
688b887f640bae906abfb77fdf418be63350b4c5e1fjorlow@chromium.org    assert(c->num_input_files(0) == 1);
689179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    FileMetaData* f = c->input(0, 0);
690179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    c->edit()->DeleteFile(c->level(), f->number);
691179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    c->edit()->AddFile(c->level() + 1, f->number, f->file_size,
692179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                       f->smallest, f->largest);
693394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com    status = versions_->LogAndApply(c->edit(), &mutex_);
6944935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    if (!status.ok()) {
6954935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org      RecordBackgroundError(status);
6964935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    }
6976635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org    VersionSet::LevelSummaryStorage tmp;
698f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com    Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n",
699179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        static_cast<unsigned long long>(f->number),
700179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        c->level() + 1,
701179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        static_cast<unsigned long long>(f->file_size),
7026635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org        status.ToString().c_str(),
7036635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org        versions_->LevelSummary(&tmp));
704179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  } else {
705179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    CompactionState* compact = new CompactionState(c);
706179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    status = DoCompactionWork(compact);
7074935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    if (!status.ok()) {
7084935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org      RecordBackgroundError(status);
7094935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    }
710179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    CleanupCompaction(compact);
711e05bd5cade19e5de0f763f4f122eef9f35de3d9csanjay@google.com    c->ReleaseInputs();
712e05bd5cade19e5de0f763f4f122eef9f35de3d9csanjay@google.com    DeleteObsoleteFiles();
713179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
714179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  delete c;
715179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
716179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (status.ok()) {
717179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // Done
718179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  } else if (shutting_down_.Acquire_Load()) {
719179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // Ignore compaction errors found during shutting down
720179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  } else {
721f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com    Log(options_.info_log,
722179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        "Compaction error: %s", status.ToString().c_str());
723179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
7246635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org
7256635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  if (is_manual) {
7265fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    ManualCompaction* m = manual_compaction_;
727e05bd5cade19e5de0f763f4f122eef9f35de3d9csanjay@google.com    if (!status.ok()) {
728e05bd5cade19e5de0f763f4f122eef9f35de3d9csanjay@google.com      m->done = true;
729e05bd5cade19e5de0f763f4f122eef9f35de3d9csanjay@google.com    }
7305fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    if (!m->done) {
7315fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com      // We only compacted part of the requested range.  Update *m
7325fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com      // to the range that is left to be compacted.
7335fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com      m->tmp_storage = manual_end;
7345fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com      m->begin = &m->tmp_storage;
7355fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    }
7366635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org    manual_compaction_ = NULL;
7376635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org  }
738179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
739179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
740179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgvoid DBImpl::CleanupCompaction(CompactionState* compact) {
741179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.AssertHeld();
742179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (compact->builder != NULL) {
743179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // May happen if we get a shutdown call in the middle of compaction
744179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    compact->builder->Abandon();
745179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    delete compact->builder;
746179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  } else {
747179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    assert(compact->outfile == NULL);
748179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
749179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  delete compact->outfile;
7501511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org  for (size_t i = 0; i < compact->outputs.size(); i++) {
751179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    const CompactionState::Output& out = compact->outputs[i];
752179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    pending_outputs_.erase(out.number);
753179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
754179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  delete compact;
755179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
756179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
757179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DBImpl::OpenCompactionOutputFile(CompactionState* compact) {
758179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  assert(compact != NULL);
759179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  assert(compact->builder == NULL);
760179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  uint64_t file_number;
761179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  {
762179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    mutex_.Lock();
763179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    file_number = versions_->NewFileNumber();
764179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    pending_outputs_.insert(file_number);
765179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    CompactionState::Output out;
766179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    out.number = file_number;
767179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    out.smallest.Clear();
768179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    out.largest.Clear();
769179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    compact->outputs.push_back(out);
770179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    mutex_.Unlock();
771179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
772179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
773179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Make the output file
774179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  std::string fname = TableFileName(dbname_, file_number);
775179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Status s = env_->NewWritableFile(fname, &compact->outfile);
776179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (s.ok()) {
777179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    compact->builder = new TableBuilder(options_, compact->outfile);
778179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
779179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return s;
780179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
781179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
782179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DBImpl::FinishCompactionOutputFile(CompactionState* compact,
783179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                                          Iterator* input) {
784179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  assert(compact != NULL);
785179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  assert(compact->outfile != NULL);
786179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  assert(compact->builder != NULL);
787179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
788179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  const uint64_t output_number = compact->current_output()->number;
789179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  assert(output_number != 0);
790179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
791179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Check for iterator errors
792179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Status s = input->status();
793179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  const uint64_t current_entries = compact->builder->NumEntries();
794179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (s.ok()) {
795179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    s = compact->builder->Finish();
796179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  } else {
797179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    compact->builder->Abandon();
798179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
799179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  const uint64_t current_bytes = compact->builder->FileSize();
800179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  compact->current_output()->file_size = current_bytes;
801179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  compact->total_bytes += current_bytes;
802179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  delete compact->builder;
803179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  compact->builder = NULL;
804179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
805179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Finish and check for file errors
806179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (s.ok()) {
807179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    s = compact->outfile->Sync();
808179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
809179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (s.ok()) {
810179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    s = compact->outfile->Close();
811179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
812179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  delete compact->outfile;
813179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  compact->outfile = NULL;
814179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
815179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (s.ok() && current_entries > 0) {
816179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // Verify that the table is usable
817f85ede82f8c27a00c3120f67fbab89b2a89fe987jorlow@chromium.org    Iterator* iter = table_cache_->NewIterator(ReadOptions(),
818f85ede82f8c27a00c3120f67fbab89b2a89fe987jorlow@chromium.org                                               output_number,
819f85ede82f8c27a00c3120f67fbab89b2a89fe987jorlow@chromium.org                                               current_bytes);
820179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    s = iter->status();
821179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    delete iter;
822179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (s.ok()) {
823f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com      Log(options_.info_log,
824179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          "Generated table #%llu: %lld keys, %lld bytes",
825179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          (unsigned long long) output_number,
826179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          (unsigned long long) current_entries,
827179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          (unsigned long long) current_bytes);
828179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
829179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
830179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return s;
831179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
832179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
833179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
834179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DBImpl::InstallCompactionResults(CompactionState* compact) {
835179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.AssertHeld();
836f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com  Log(options_.info_log,  "Compacted %d@%d + %d@%d files => %lld bytes",
837179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      compact->compaction->num_input_files(0),
838179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      compact->compaction->level(),
839179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      compact->compaction->num_input_files(1),
840179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      compact->compaction->level() + 1,
841179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      static_cast<long long>(compact->total_bytes));
842179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
843179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Add compaction outputs
844179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  compact->compaction->AddInputDeletions(compact->compaction->edit());
845179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  const int level = compact->compaction->level();
8461511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org  for (size_t i = 0; i < compact->outputs.size(); i++) {
847179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    const CompactionState::Output& out = compact->outputs[i];
848179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    compact->compaction->edit()->AddFile(
849179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        level + 1,
850179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        out.number, out.file_size, out.smallest, out.largest);
851179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
852e05bd5cade19e5de0f763f4f122eef9f35de3d9csanjay@google.com  return versions_->LogAndApply(compact->compaction->edit(), &mutex_);
853179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
854179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
855179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DBImpl::DoCompactionWork(CompactionState* compact) {
85695e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  const uint64_t start_micros = env_->NowMicros();
85795e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  int64_t imm_micros = 0;  // Micros spent doing imm_ compactions
85895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org
859f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com  Log(options_.info_log,  "Compacting %d@%d + %d@%d files",
860179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      compact->compaction->num_input_files(0),
861179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      compact->compaction->level(),
862179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      compact->compaction->num_input_files(1),
863179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      compact->compaction->level() + 1);
864179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
865179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  assert(versions_->NumLevelFiles(compact->compaction->level()) > 0);
866179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  assert(compact->builder == NULL);
867179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  assert(compact->outfile == NULL);
868179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (snapshots_.empty()) {
86995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    compact->smallest_snapshot = versions_->LastSequence();
870179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  } else {
871179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    compact->smallest_snapshot = snapshots_.oldest()->number_;
872179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
873179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
874179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Release mutex while we're actually doing the compaction work
875179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.Unlock();
876179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
877179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Iterator* input = versions_->MakeInputIterator(compact->compaction);
878179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  input->SeekToFirst();
879179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Status status;
880179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  ParsedInternalKey ikey;
881179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  std::string current_user_key;
882179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  bool has_current_user_key = false;
883179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  SequenceNumber last_sequence_for_key = kMaxSequenceNumber;
884179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  for (; input->Valid() && !shutting_down_.Acquire_Load(); ) {
88595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    // Prioritize immutable compaction work
88695e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    if (has_imm_.NoBarrier_Load() != NULL) {
88795e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      const uint64_t imm_start = env_->NowMicros();
88895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      mutex_.Lock();
88995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      if (imm_ != NULL) {
89095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org        CompactMemTable();
8916635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org        bg_cv_.SignalAll();  // Wakeup MakeRoomForWrite() if necessary
89295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      }
89395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      mutex_.Unlock();
89495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      imm_micros += (env_->NowMicros() - imm_start);
89595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    }
89695e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org
897179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    Slice key = input->key();
898a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org    if (compact->compaction->ShouldStopBefore(key) &&
899b887f640bae906abfb77fdf418be63350b4c5e1fjorlow@chromium.org        compact->builder != NULL) {
900b887f640bae906abfb77fdf418be63350b4c5e1fjorlow@chromium.org      status = FinishCompactionOutputFile(compact, input);
901b887f640bae906abfb77fdf418be63350b4c5e1fjorlow@chromium.org      if (!status.ok()) {
902b887f640bae906abfb77fdf418be63350b4c5e1fjorlow@chromium.org        break;
903b887f640bae906abfb77fdf418be63350b4c5e1fjorlow@chromium.org      }
904b887f640bae906abfb77fdf418be63350b4c5e1fjorlow@chromium.org    }
905b887f640bae906abfb77fdf418be63350b4c5e1fjorlow@chromium.org
906b887f640bae906abfb77fdf418be63350b4c5e1fjorlow@chromium.org    // Handle key/value, add to state, etc.
907179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    bool drop = false;
908179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (!ParseInternalKey(key, &ikey)) {
909179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      // Do not hide error keys
910179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      current_user_key.clear();
911179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      has_current_user_key = false;
912179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      last_sequence_for_key = kMaxSequenceNumber;
913179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    } else {
914179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      if (!has_current_user_key ||
915179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          user_comparator()->Compare(ikey.user_key,
916179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                                     Slice(current_user_key)) != 0) {
917179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        // First occurrence of this user key
918179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        current_user_key.assign(ikey.user_key.data(), ikey.user_key.size());
919179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        has_current_user_key = true;
920179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        last_sequence_for_key = kMaxSequenceNumber;
921179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      }
922179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
923179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      if (last_sequence_for_key <= compact->smallest_snapshot) {
924179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        // Hidden by an newer entry for same user key
925179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        drop = true;    // (A)
926179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      } else if (ikey.type == kTypeDeletion &&
927179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                 ikey.sequence <= compact->smallest_snapshot &&
928179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                 compact->compaction->IsBaseLevelForKey(ikey.user_key)) {
929179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        // For this user key:
930179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        // (1) there is no data in higher levels
931179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        // (2) data in lower levels will have larger sequence numbers
932179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        // (3) data in layers that are being compacted here and have
933179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        //     smaller sequence numbers will be dropped in the next
934179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        //     few iterations of this loop (by rule (A) above).
935179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        // Therefore this deletion marker is obsolete and can be dropped.
936179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        drop = true;
937179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      }
938179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
939179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      last_sequence_for_key = ikey.sequence;
940179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
941179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#if 0
942f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com    Log(options_.info_log,
943179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        "  Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
944179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        "%d smallest_snapshot: %d",
945179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        ikey.user_key.ToString().c_str(),
9461511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org        (int)ikey.sequence, ikey.type, kTypeValue, drop,
947179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        compact->compaction->IsBaseLevelForKey(ikey.user_key),
948179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        (int)last_sequence_for_key, (int)compact->smallest_snapshot);
949179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org#endif
950179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
951179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (!drop) {
952179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      // Open output file if necessary
953179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      if (compact->builder == NULL) {
954179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        status = OpenCompactionOutputFile(compact);
955179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        if (!status.ok()) {
956179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          break;
957179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        }
958179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      }
959179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      if (compact->builder->NumEntries() == 0) {
960179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        compact->current_output()->smallest.DecodeFrom(key);
961179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      }
962179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      compact->current_output()->largest.DecodeFrom(key);
9631511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org      compact->builder->Add(key, input->value());
964179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
965179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      // Close output file if it is big enough
966179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      if (compact->builder->FileSize() >=
967179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          compact->compaction->MaxOutputFileSize()) {
968179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        status = FinishCompactionOutputFile(compact, input);
969179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        if (!status.ok()) {
970179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          break;
971179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        }
972179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      }
973179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
974179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
975179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    input->Next();
976179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
977179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
978179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (status.ok() && shutting_down_.Acquire_Load()) {
979179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    status = Status::IOError("Deleting DB during compaction");
980179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
981179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (status.ok() && compact->builder != NULL) {
982179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    status = FinishCompactionOutputFile(compact, input);
983179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
984179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (status.ok()) {
985179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    status = input->status();
986179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
987179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  delete input;
988179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  input = NULL;
989179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
99095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  CompactionStats stats;
99195e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  stats.micros = env_->NowMicros() - start_micros - imm_micros;
99295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  for (int which = 0; which < 2; which++) {
99395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    for (int i = 0; i < compact->compaction->num_input_files(which); i++) {
99495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      stats.bytes_read += compact->compaction->input(which, i)->file_size;
99595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    }
99695e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  }
9971511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org  for (size_t i = 0; i < compact->outputs.size(); i++) {
99895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    stats.bytes_written += compact->outputs[i].file_size;
99995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  }
100095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org
1001179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.Lock();
100295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  stats_[compact->compaction->level() + 1].Add(stats);
1003179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1004179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (status.ok()) {
1005179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    status = InstallCompactionResults(compact);
1006179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
10074935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  if (!status.ok()) {
10084935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org    RecordBackgroundError(status);
10094935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org  }
1010a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org  VersionSet::LevelSummaryStorage tmp;
1011f65a55c8d0744b95be29a65d06b59b22b012f37bgabor@google.com  Log(options_.info_log,
1012a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      "compacted to: %s", versions_->LevelSummary(&tmp));
1013179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return status;
1014179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1015179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1016c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.orgnamespace {
1017c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.orgstruct IterState {
1018c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  port::Mutex* mu;
1019c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  Version* version;
1020c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  MemTable* mem;
1021c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  MemTable* imm;
1022c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org};
1023c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org
1024c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.orgstatic void CleanupIteratorState(void* arg1, void* arg2) {
1025c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  IterState* state = reinterpret_cast<IterState*>(arg1);
1026c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  state->mu->Lock();
1027c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  state->mem->Unref();
1028c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  if (state->imm != NULL) state->imm->Unref();
1029c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  state->version->Unref();
1030c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  state->mu->Unlock();
1031c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  delete state;
1032c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org}
103345b9940be332834440bd5299419f396e38085ebehans@chromium.org}  // namespace
1034c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org
1035179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgIterator* DBImpl::NewInternalIterator(const ReadOptions& options,
103608595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org                                      SequenceNumber* latest_snapshot,
103708595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org                                      uint32_t* seed) {
1038c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  IterState* cleanup = new IterState;
1039179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.Lock();
104095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  *latest_snapshot = versions_->LastSequence();
1041179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1042179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Collect together all needed child iterators
1043179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  std::vector<Iterator*> list;
1044179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  list.push_back(mem_->NewIterator());
1045c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  mem_->Ref();
104695e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  if (imm_ != NULL) {
104795e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    list.push_back(imm_->NewIterator());
1048c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org    imm_->Ref();
104995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  }
1050179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  versions_->current()->AddIterators(options, &list);
1051179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Iterator* internal_iter =
1052179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      NewMergingIterator(&internal_comparator_, &list[0], list.size());
1053179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  versions_->current()->Ref();
1054c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org
1055c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  cleanup->mu = &mutex_;
1056c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  cleanup->mem = mem_;
1057c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  cleanup->imm = imm_;
1058c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  cleanup->version = versions_->current();
1059c6ac22e779e5135e494ddeb1d8e2b6008e9b619edgrogan@chromium.org  internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, NULL);
1060179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
106108595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org  *seed = ++seed_;
1062179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  mutex_.Unlock();
1063179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return internal_iter;
1064179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1065179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1066179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgIterator* DBImpl::TEST_NewInternalIterator() {
1067179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  SequenceNumber ignored;
106808595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org  uint32_t ignored_seed;
106908595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org  return NewInternalIterator(ReadOptions(), &ignored, &ignored_seed);
1070179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1071179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
107207f3bcfb9764be2a339cc02cf0a0d6edb151defbjorlow@chromium.orgint64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
1073b887f640bae906abfb77fdf418be63350b4c5e1fjorlow@chromium.org  MutexLock l(&mutex_);
1074b887f640bae906abfb77fdf418be63350b4c5e1fjorlow@chromium.org  return versions_->MaxNextLevelOverlappingBytes();
1075b887f640bae906abfb77fdf418be63350b4c5e1fjorlow@chromium.org}
1076b887f640bae906abfb77fdf418be63350b4c5e1fjorlow@chromium.org
1077179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DBImpl::Get(const ReadOptions& options,
1078179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                   const Slice& key,
1079179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                   std::string* value) {
10808cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  Status s;
10818cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  MutexLock l(&mutex_);
10828cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  SequenceNumber snapshot;
10838cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  if (options.snapshot != NULL) {
10848cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    snapshot = reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_;
10858cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  } else {
10868cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    snapshot = versions_->LastSequence();
1087179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
10888cd4ab8303620197cf24282ae8639060efbb326egabor@google.com
1089d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com  MemTable* mem = mem_;
1090d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com  MemTable* imm = imm_;
10918cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  Version* current = versions_->current();
1092d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com  mem->Ref();
1093d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com  if (imm != NULL) imm->Ref();
10948cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  current->Ref();
1095d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com
1096d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com  bool have_stat_update = false;
10978cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  Version::GetStats stats;
1098d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com
1099d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com  // Unlock while reading from files and memtables
1100d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com  {
11018cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    mutex_.Unlock();
1102d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com    // First look in the memtable, then in the immutable memtable (if any).
1103d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com    LookupKey lkey(key, snapshot);
1104394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com    if (mem->Get(lkey, value, &s)) {
1105d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com      // Done
1106394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com    } else if (imm != NULL && imm->Get(lkey, value, &s)) {
1107d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com      // Done
1108d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com    } else {
1109d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com      s = current->Get(options, lkey, value, &stats);
1110d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com      have_stat_update = true;
1111d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com    }
11128cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    mutex_.Lock();
11138cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  }
1114d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com
1115d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com  if (have_stat_update && current->UpdateStats(stats)) {
11168cd4ab8303620197cf24282ae8639060efbb326egabor@google.com    MaybeScheduleCompaction();
11178cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  }
1118d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com  mem->Unref();
1119d36ce84e66c7d3cee978fbeb52721c30dfb842a5gabor@google.com  if (imm != NULL) imm->Unref();
11208cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  current->Unref();
11218cd4ab8303620197cf24282ae8639060efbb326egabor@google.com  return s;
1122179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1123179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1124179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgIterator* DBImpl::NewIterator(const ReadOptions& options) {
1125179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  SequenceNumber latest_snapshot;
112608595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org  uint32_t seed;
112708595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org  Iterator* iter = NewInternalIterator(options, &latest_snapshot, &seed);
1128a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org  return NewDBIterator(
112908595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org      this, user_comparator(), iter,
1130a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      (options.snapshot != NULL
1131a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org       ? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_
113208595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org       : latest_snapshot),
113308595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org      seed);
113408595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org}
113508595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org
113608595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.orgvoid DBImpl::RecordReadSample(Slice key) {
113708595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org  MutexLock l(&mutex_);
113808595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org  if (versions_->current()->RecordReadSample(key)) {
113908595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org    MaybeScheduleCompaction();
114008595b9e51ded54851b7664bd38affad63a67838dgrogan@chromium.org  }
1141179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1142179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1143179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgconst Snapshot* DBImpl::GetSnapshot() {
1144179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  MutexLock l(&mutex_);
114595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  return snapshots_.New(versions_->LastSequence());
1146179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1147179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1148179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgvoid DBImpl::ReleaseSnapshot(const Snapshot* s) {
1149179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  MutexLock l(&mutex_);
1150a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org  snapshots_.Delete(reinterpret_cast<const SnapshotImpl*>(s));
1151179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1152179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1153179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org// Convenience methods
1154179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DBImpl::Put(const WriteOptions& o, const Slice& key, const Slice& val) {
1155179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return DB::Put(o, key, val);
1156179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1157179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1158179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DBImpl::Delete(const WriteOptions& options, const Slice& key) {
1159179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return DB::Delete(options, key);
1160179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1161179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
116213daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.comStatus DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
116313daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  Writer w(&mutex_);
116413daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  w.batch = my_batch;
116513daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  w.sync = options.sync;
116613daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  w.done = false;
1167394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com
11681511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org  MutexLock l(&mutex_);
116913daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  writers_.push_back(&w);
117013daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  while (!w.done && &w != writers_.front()) {
117113daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    w.cv.Wait();
117213daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  }
117313daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  if (w.done) {
117413daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    return w.status;
117513daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  }
117613daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com
117713daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  // May temporarily unlock and wait.
117813daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  Status status = MakeRoomForWrite(my_batch == NULL);
11791511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org  uint64_t last_sequence = versions_->LastSequence();
118013daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  Writer* last_writer = &w;
118113daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  if (status.ok() && my_batch != NULL) {  // NULL batch is for compactions
118213daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    WriteBatch* updates = BuildBatchGroup(&last_writer);
11831511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org    WriteBatchInternal::SetSequence(updates, last_sequence + 1);
11841511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org    last_sequence += WriteBatchInternal::Count(updates);
11851511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org
118613daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    // Add to log and apply to memtable.  We can release the lock
118713daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    // during this phase since &w is currently responsible for logging
118813daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    // and protects against concurrent loggers and concurrent writes
118913daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    // into mem_.
1190394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com    {
1191394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com      mutex_.Unlock();
1192394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com      status = log_->AddRecord(WriteBatchInternal::Contents(updates));
11934935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org      bool sync_error = false;
1194394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com      if (status.ok() && options.sync) {
1195394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com        status = logfile_->Sync();
11964935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org        if (!status.ok()) {
11974935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org          sync_error = true;
11984935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org        }
1199394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com      }
1200394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com      if (status.ok()) {
1201394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com        status = WriteBatchInternal::InsertInto(updates, mem_);
1202394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com      }
1203394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com      mutex_.Lock();
12044935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org      if (sync_error) {
12054935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org        // The state of the log file is indeterminate: the log record we
12064935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org        // just added may or may not show up when the DB is re-opened.
12074935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org        // So we force the DB into a mode where all future writes fail.
12084935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org        RecordBackgroundError(status);
12094935bf087b28aa308c0a820720b85ef695e236aedgrogan@chromium.org      }
1210179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
121113daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    if (updates == tmp_batch_) tmp_batch_->Clear();
1212394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com
1213394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com    versions_->SetLastSequence(last_sequence);
1214179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
121513daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com
121613daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  while (true) {
121713daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    Writer* ready = writers_.front();
121813daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    writers_.pop_front();
121913daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    if (ready != &w) {
122013daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com      ready->status = status;
122113daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com      ready->done = true;
122213daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com      ready->cv.Signal();
122313daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    }
122413daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    if (ready == last_writer) break;
122513daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  }
122613daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com
122713daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  // Notify new head of write queue
122813daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  if (!writers_.empty()) {
122913daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    writers_.front()->cv.Signal();
123013daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  }
123113daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com
1232179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return status;
1233179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1234179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
123513daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com// REQUIRES: Writer list must be non-empty
123613daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com// REQUIRES: First writer must have a non-NULL batch
123713daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.comWriteBatch* DBImpl::BuildBatchGroup(Writer** last_writer) {
123813daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  assert(!writers_.empty());
123913daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  Writer* first = writers_.front();
124013daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  WriteBatch* result = first->batch;
124113daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  assert(result != NULL);
124213daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com
124313daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  size_t size = WriteBatchInternal::ByteSize(first->batch);
124413daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com
124513daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  // Allow the group to grow up to a maximum size, but if the
124613daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  // original write is small, limit the growth so we do not slow
124713daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  // down the small write too much.
124813daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  size_t max_size = 1 << 20;
124913daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  if (size <= (128<<10)) {
125013daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    max_size = size + (128<<10);
125113daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  }
125213daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com
125313daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  *last_writer = first;
125413daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  std::deque<Writer*>::iterator iter = writers_.begin();
125513daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  ++iter;  // Advance past "first"
125613daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  for (; iter != writers_.end(); ++iter) {
125713daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    Writer* w = *iter;
125813daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    if (w->sync && !first->sync) {
125913daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com      // Do not include a sync write into a batch handled by a non-sync write.
126013daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com      break;
126113daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    }
126213daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com
126313daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    if (w->batch != NULL) {
126413daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com      size += WriteBatchInternal::ByteSize(w->batch);
126513daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com      if (size > max_size) {
126613daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com        // Do not make batch too big
126713daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com        break;
126813daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com      }
126913daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com
127013daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com      // Append to *reuslt
127113daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com      if (result == first->batch) {
127213daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com        // Switch to temporary batch instead of disturbing caller's batch
127313daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com        result = tmp_batch_;
127413daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com        assert(WriteBatchInternal::Count(result) == 0);
127513daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com        WriteBatchInternal::Append(result, first->batch);
127613daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com      }
127713daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com      WriteBatchInternal::Append(result, w->batch);
127813daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    }
127913daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com    *last_writer = w;
128013daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  }
128113daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  return result;
128213daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com}
128313daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com
1284394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com// REQUIRES: mutex_ is held
128513daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com// REQUIRES: this thread is currently at the front of the writer queue
128695e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.orgStatus DBImpl::MakeRoomForWrite(bool force) {
128795e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  mutex_.AssertHeld();
128813daa9f29c999ee40a257ee0775abee2c78a0ad9sanjay@google.com  assert(!writers_.empty());
1289a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org  bool allow_delay = !force;
129095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  Status s;
129195e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  while (true) {
129295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    if (!bg_error_.ok()) {
129395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      // Yield previous error
129495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      s = bg_error_;
129595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      break;
1296a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org    } else if (
1297a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org        allow_delay &&
1298a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org        versions_->NumLevelFiles(0) >= config::kL0_SlowdownWritesTrigger) {
1299a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      // We are getting close to hitting a hard limit on the number of
1300a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      // L0 files.  Rather than delaying a single write by several
1301a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      // seconds when we hit the hard limit, start delaying each
1302a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      // individual write by 1ms to reduce latency variance.  Also,
1303a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      // this delay hands over some CPU to the compaction thread in
1304a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      // case it is sharing the same core as the writer.
1305a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      mutex_.Unlock();
1306a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      env_->SleepForMicroseconds(1000);
1307a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      allow_delay = false;  // Do not delay a single write more than once
1308a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      mutex_.Lock();
130995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    } else if (!force &&
131095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org               (mem_->ApproximateMemoryUsage() <= options_.write_buffer_size)) {
131195e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      // There is room in current memtable
131295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      break;
131395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    } else if (imm_ != NULL) {
131495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      // We have filled up the current memtable, but the previous
131595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      // one is still being compacted, so we wait.
1316bd534e2d9ba35e6ada9afe854ad0dbcef3f27c4fdgrogan@chromium.org      Log(options_.info_log, "Current memtable full; waiting...\n");
13176635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org      bg_cv_.Wait();
1318a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org    } else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) {
1319a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      // There are too many level-0 files.
1320bd534e2d9ba35e6ada9afe854ad0dbcef3f27c4fdgrogan@chromium.org      Log(options_.info_log, "Too many L0 files; waiting...\n");
13216635e49a8999ab5e411d5227146a3db17fac2944hans@chromium.org      bg_cv_.Wait();
132295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    } else {
132395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      // Attempt to switch to a new memtable and trigger compaction of old
132495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      assert(versions_->PrevLogNumber() == 0);
132595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      uint64_t new_log_number = versions_->NewFileNumber();
132695e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      WritableFile* lfile = NULL;
132795e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      s = env_->NewWritableFile(LogFileName(dbname_, new_log_number), &lfile);
132895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      if (!s.ok()) {
1329158f767acaed4c39cbb3ee8128fe896e155ec40csanjay@google.com        // Avoid chewing through file number space in a tight loop.
1330158f767acaed4c39cbb3ee8128fe896e155ec40csanjay@google.com        versions_->ReuseFileNumber(new_log_number);
133195e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org        break;
133295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      }
133395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      delete log_;
133495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      delete logfile_;
133595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      logfile_ = lfile;
13368cd4ab8303620197cf24282ae8639060efbb326egabor@google.com      logfile_number_ = new_log_number;
133795e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      log_ = new log::Writer(lfile);
133895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      imm_ = mem_;
133995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      has_imm_.Release_Store(imm_);
134095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      mem_ = new MemTable(internal_comparator_);
1341a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      mem_->Ref();
134295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      force = false;   // Do not force another compaction if have room
134395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      MaybeScheduleCompaction();
134495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    }
134595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  }
134695e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  return s;
134795e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org}
134895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org
134995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.orgbool DBImpl::GetProperty(const Slice& property, std::string* value) {
135095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  value->clear();
135195e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org
1352179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  MutexLock l(&mutex_);
1353179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Slice in = property;
1354179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Slice prefix("leveldb.");
1355179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (!in.starts_with(prefix)) return false;
1356179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  in.remove_prefix(prefix.size());
1357179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1358179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (in.starts_with("num-files-at-level")) {
1359179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    in.remove_prefix(strlen("num-files-at-level"));
1360179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    uint64_t level;
1361179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    bool ok = ConsumeDecimalNumber(&in, &level) && in.empty();
1362fbe4e3af3f4e368e0779b6d75cd6005d67469aa2dgrogan@chromium.org    if (!ok || level >= config::kNumLevels) {
1363179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      return false;
1364179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    } else {
136595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      char buf[100];
13661511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org      snprintf(buf, sizeof(buf), "%d",
13671511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org               versions_->NumLevelFiles(static_cast<int>(level)));
136895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      *value = buf;
1369179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      return true;
1370179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
137195e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org  } else if (in == "stats") {
137295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    char buf[200];
137395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    snprintf(buf, sizeof(buf),
137495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org             "                               Compactions\n"
137595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org             "Level  Files Size(MB) Time(sec) Read(MB) Write(MB)\n"
137695e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org             "--------------------------------------------------\n"
137795e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org             );
137895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    value->append(buf);
137995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    for (int level = 0; level < config::kNumLevels; level++) {
138095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      int files = versions_->NumLevelFiles(level);
138195e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      if (stats_[level].micros > 0 || files > 0) {
138295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org        snprintf(
138395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org            buf, sizeof(buf),
138495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org            "%3d %8d %8.0f %9.0f %8.0f %9.0f\n",
138595e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org            level,
138695e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org            files,
138795e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org            versions_->NumLevelBytes(level) / 1048576.0,
138895e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org            stats_[level].micros / 1e6,
138995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org            stats_[level].bytes_read / 1048576.0,
139095e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org            stats_[level].bytes_written / 1048576.0);
139195e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org        value->append(buf);
139295e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      }
139395e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    }
139495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    return true;
13955fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com  } else if (in == "sstables") {
13965fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    *value = versions_->current()->DebugString();
13975fb21ed7ac9e91010d473ac77e132ae68f348d6agabor@google.com    return true;
1398179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
139995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org
1400179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return false;
1401179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1402179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1403179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgvoid DBImpl::GetApproximateSizes(
1404179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    const Range* range, int n,
1405179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    uint64_t* sizes) {
1406179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // TODO(opt): better implementation
1407179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Version* v;
1408179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  {
1409179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    MutexLock l(&mutex_);
1410179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    versions_->current()->Ref();
1411179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    v = versions_->current();
1412179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
1413179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1414179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  for (int i = 0; i < n; i++) {
1415179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    // Convert user_key into a corresponding internal key.
1416179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek);
1417179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek);
1418179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    uint64_t start = versions_->ApproximateOffsetOf(v, k1);
1419179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    uint64_t limit = versions_->ApproximateOffsetOf(v, k2);
1420179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    sizes[i] = (limit >= start ? limit - start : 0);
1421179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
1422179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1423179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  {
1424179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    MutexLock l(&mutex_);
1425179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    v->Unref();
1426179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
1427179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1428179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1429179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org// Default implementations of convenience methods that subclasses of DB
1430179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org// can call if they wish
1431179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) {
1432179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  WriteBatch batch;
1433179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  batch.Put(key, value);
1434179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return Write(opt, &batch);
1435179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1436179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1437179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DB::Delete(const WriteOptions& opt, const Slice& key) {
1438179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  WriteBatch batch;
1439179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  batch.Delete(key);
1440179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return Write(opt, &batch);
1441179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1442179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1443179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgDB::~DB() { }
1444179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1445179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DB::Open(const Options& options, const std::string& dbname,
1446179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                DB** dbptr) {
1447179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  *dbptr = NULL;
1448179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1449179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  DBImpl* impl = new DBImpl(options, dbname);
1450179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  impl->mutex_.Lock();
1451179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  VersionEdit edit;
1452179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Status s = impl->Recover(&edit); // Handles create_if_missing, error_if_exists
1453179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (s.ok()) {
145495e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    uint64_t new_log_number = impl->versions_->NewFileNumber();
1455179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    WritableFile* lfile;
145695e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org    s = options.env->NewWritableFile(LogFileName(dbname, new_log_number),
1457179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org                                     &lfile);
1458179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (s.ok()) {
145995e21f32367748825123e382172ecbfd492ddb23dgrogan@chromium.org      edit.SetLogNumber(new_log_number);
1460179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      impl->logfile_ = lfile;
14618cd4ab8303620197cf24282ae8639060efbb326egabor@google.com      impl->logfile_number_ = new_log_number;
1462179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      impl->log_ = new log::Writer(lfile);
1463394a4b425a6a8aca3244fc26ec77c101a11a632cgabor@google.com      s = impl->versions_->LogAndApply(&edit, &impl->mutex_);
1464179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
1465179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    if (s.ok()) {
1466179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      impl->DeleteObsoleteFiles();
1467a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org      impl->MaybeScheduleCompaction();
1468179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
1469179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
1470179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  impl->mutex_.Unlock();
1471179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (s.ok()) {
1472179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    *dbptr = impl;
1473179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  } else {
1474179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    delete impl;
1475179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
1476179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return s;
1477179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1478179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1479a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.orgSnapshot::~Snapshot() {
1480a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org}
1481a5b4129c0a8c01158cde2244a5811f15b9d45ec0dgrogan@chromium.org
1482179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.orgStatus DestroyDB(const std::string& dbname, const Options& options) {
1483179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  Env* env = options.env;
1484179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  std::vector<std::string> filenames;
1485179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  // Ignore error in case directory does not exist
1486179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  env->GetChildren(dbname, &filenames);
1487179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (filenames.empty()) {
1488179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    return Status::OK();
1489179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
1490179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
1491179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  FileLock* lock;
1492917b88dd720b6e658c1fd7812bc61c605f315124gabor@google.com  const std::string lockname = LockFileName(dbname);
1493917b88dd720b6e658c1fd7812bc61c605f315124gabor@google.com  Status result = env->LockFile(lockname, &lock);
1494179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  if (result.ok()) {
1495179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    uint64_t number;
1496179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    FileType type;
14971511be6edb54b6ade2bfad94256f76bc191e92ecdgrogan@chromium.org    for (size_t i = 0; i < filenames.size(); i++) {
1498917b88dd720b6e658c1fd7812bc61c605f315124gabor@google.com      if (ParseFileName(filenames[i], &number, &type) &&
1499f168d0177b095ac7a608f6aafb9efc96976b6b3csanjay@google.com          type != kDBLockFile) {  // Lock file will be deleted at end
1500179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        Status del = env->DeleteFile(dbname + "/" + filenames[i]);
1501179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        if (result.ok() && !del.ok()) {
1502179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org          result = del;
1503179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org        }
1504179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org      }
1505179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    }
1506179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    env->UnlockFile(lock);  // Ignore error since state is already gone
1507917b88dd720b6e658c1fd7812bc61c605f315124gabor@google.com    env->DeleteFile(lockname);
1508179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org    env->DeleteDir(dbname);  // Ignore error in case dir contains other files
1509179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  }
1510179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org  return result;
1511179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org}
1512179be588c25dccaa963df9c9c104fc6229435483jorlow@chromium.org
151345b9940be332834440bd5299419f396e38085ebehans@chromium.org}  // namespace leveldb
1514