history_database.cc revision eb525c5499e34cc9c4b825d6d9e75bb07cc06ace
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/history/history_database.h"
6
7#include <algorithm>
8#include <set>
9#include <string>
10
11#include "base/command_line.h"
12#include "base/file_util.h"
13#include "base/metrics/histogram.h"
14#include "base/rand_util.h"
15#include "base/strings/string_util.h"
16#include "base/time/time.h"
17#include "sql/transaction.h"
18
19#if defined(OS_MACOSX)
20#include "base/mac/mac_util.h"
21#endif
22
23namespace history {
24
25namespace {
26
27// Current version number. We write databases at the "current" version number,
28// but any previous version that can read the "compatible" one can make do with
29// or database without *too* many bad effects.
30static const int kCurrentVersionNumber = 26;
31static const int kCompatibleVersionNumber = 16;
32static const char kEarlyExpirationThresholdKey[] = "early_expiration_threshold";
33
34// Key in the meta table used to determine if we need to migrate thumbnails out
35// of history.
36static const char kNeedsThumbnailMigrationKey[] = "needs_thumbnail_migration";
37
38}  // namespace
39
40HistoryDatabase::HistoryDatabase()
41    : needs_version_17_migration_(false) {
42}
43
44HistoryDatabase::~HistoryDatabase() {
45}
46
47sql::InitStatus HistoryDatabase::Init(const base::FilePath& history_name) {
48  db_.set_histogram_tag("History");
49
50  // Set the exceptional sqlite error handler.
51  db_.set_error_callback(error_callback_);
52
53  // Set the database page size to something a little larger to give us
54  // better performance (we're typically seek rather than bandwidth limited).
55  // This only has an effect before any tables have been created, otherwise
56  // this is a NOP. Must be a power of 2 and a max of 8192.
57  db_.set_page_size(4096);
58
59  // Set the cache size. The page size, plus a little extra, times this
60  // value, tells us how much memory the cache will use maximum.
61  // 1000 * 4kB = 4MB
62  // TODO(brettw) scale this value to the amount of available memory.
63  db_.set_cache_size(1000);
64
65  // Note that we don't set exclusive locking here. That's done by
66  // BeginExclusiveMode below which is called later (we have to be in shared
67  // mode to start out for the in-memory backend to read the data).
68
69  if (!db_.Open(history_name))
70    return sql::INIT_FAILURE;
71
72  // Wrap the rest of init in a tranaction. This will prevent the database from
73  // getting corrupted if we crash in the middle of initialization or migration.
74  sql::Transaction committer(&db_);
75  if (!committer.Begin())
76    return sql::INIT_FAILURE;
77
78#if defined(OS_MACOSX)
79  // Exclude the history file from backups.
80  base::mac::SetFileBackupExclusion(history_name);
81#endif
82
83  // Prime the cache.
84  db_.Preload();
85
86  // Create the tables and indices.
87  // NOTE: If you add something here, also add it to
88  //       RecreateAllButStarAndURLTables.
89  if (!meta_table_.Init(&db_, GetCurrentVersion(), kCompatibleVersionNumber))
90    return sql::INIT_FAILURE;
91  if (!CreateURLTable(false) || !InitVisitTable() ||
92      !InitKeywordSearchTermsTable() || !InitDownloadTable() ||
93      !InitSegmentTables())
94    return sql::INIT_FAILURE;
95  CreateMainURLIndex();
96  CreateKeywordSearchTermsIndices();
97
98  // Version check.
99  sql::InitStatus version_status = EnsureCurrentVersion();
100  if (version_status != sql::INIT_OK)
101    return version_status;
102
103  return committer.Commit() ? sql::INIT_OK : sql::INIT_FAILURE;
104}
105
106void HistoryDatabase::ComputeDatabaseMetrics(
107    const base::FilePath& history_name) {
108    base::TimeTicks start_time = base::TimeTicks::Now();
109  int64 file_size = 0;
110  if (!file_util::GetFileSize(history_name, &file_size))
111    return;
112  int file_mb = static_cast<int>(file_size / (1024 * 1024));
113  UMA_HISTOGRAM_MEMORY_MB("History.DatabaseFileMB", file_mb);
114
115  sql::Statement url_count(db_.GetUniqueStatement("SELECT count(*) FROM urls"));
116  if (!url_count.Step())
117    return;
118  UMA_HISTOGRAM_COUNTS("History.URLTableCount", url_count.ColumnInt(0));
119
120  sql::Statement visit_count(db_.GetUniqueStatement(
121      "SELECT count(*) FROM visits"));
122  if (!visit_count.Step())
123    return;
124  UMA_HISTOGRAM_COUNTS("History.VisitTableCount", visit_count.ColumnInt(0));
125
126  base::Time one_week_ago = base::Time::Now() - base::TimeDelta::FromDays(7);
127  sql::Statement weekly_visit_sql(db_.GetUniqueStatement(
128      "SELECT count(*) FROM visits WHERE visit_time > ?"));
129  weekly_visit_sql.BindInt64(0, one_week_ago.ToInternalValue());
130  int weekly_visit_count = 0;
131  if (weekly_visit_sql.Step())
132    weekly_visit_count = weekly_visit_sql.ColumnInt(0);
133  UMA_HISTOGRAM_COUNTS("History.WeeklyVisitCount", weekly_visit_count);
134
135  base::Time one_month_ago = base::Time::Now() - base::TimeDelta::FromDays(30);
136  sql::Statement monthly_visit_sql(db_.GetUniqueStatement(
137      "SELECT count(*) FROM visits WHERE visit_time > ? AND visit_time <= ?"));
138  monthly_visit_sql.BindInt64(0, one_month_ago.ToInternalValue());
139  monthly_visit_sql.BindInt64(1, one_week_ago.ToInternalValue());
140  int older_visit_count = 0;
141  if (monthly_visit_sql.Step())
142    older_visit_count = monthly_visit_sql.ColumnInt(0);
143  UMA_HISTOGRAM_COUNTS("History.MonthlyVisitCount",
144                       older_visit_count + weekly_visit_count);
145
146  UMA_HISTOGRAM_TIMES("History.DatabaseBasicMetricsTime",
147                      base::TimeTicks::Now() - start_time);
148
149  // Compute the advanced metrics even less often, pending timing data showing
150  // that's not necessary.
151  if (base::RandInt(1, 3) == 3) {
152    start_time = base::TimeTicks::Now();
153
154    // Collect all URLs visited within the last month.
155    sql::Statement url_sql(db_.GetUniqueStatement(
156        "SELECT url, last_visit_time FROM urls WHERE last_visit_time > ?"));
157    url_sql.BindInt64(0, one_month_ago.ToInternalValue());
158
159    // Count URLs (which will always be unique) and unique hosts within the last
160    // week and last month.
161    int week_url_count = 0;
162    int month_url_count = 0;
163    std::set<std::string> week_hosts;
164    std::set<std::string> month_hosts;
165    while (url_sql.Step()) {
166      GURL url(url_sql.ColumnString(0));
167      base::Time visit_time =
168          base::Time::FromInternalValue(url_sql.ColumnInt64(1));
169      ++month_url_count;
170      month_hosts.insert(url.host());
171      if (visit_time > one_week_ago) {
172        ++week_url_count;
173        week_hosts.insert(url.host());
174      }
175    }
176    UMA_HISTOGRAM_COUNTS("History.WeeklyURLCount", week_url_count);
177    UMA_HISTOGRAM_COUNTS_10000("History.WeeklyHostCount", week_hosts.size());
178    UMA_HISTOGRAM_COUNTS("History.MonthlyURLCount", month_url_count);
179    UMA_HISTOGRAM_COUNTS_10000("History.MonthlyHostCount", month_hosts.size());
180    UMA_HISTOGRAM_TIMES("History.DatabaseAdvancedMetricsTime",
181                        base::TimeTicks::Now() - start_time);
182  }
183}
184
185void HistoryDatabase::BeginExclusiveMode() {
186  // We can't use set_exclusive_locking() since that only has an effect before
187  // the DB is opened.
188  ignore_result(db_.Execute("PRAGMA locking_mode=EXCLUSIVE"));
189}
190
191// static
192int HistoryDatabase::GetCurrentVersion() {
193  return kCurrentVersionNumber;
194}
195
196void HistoryDatabase::BeginTransaction() {
197  db_.BeginTransaction();
198}
199
200void HistoryDatabase::CommitTransaction() {
201  db_.CommitTransaction();
202}
203
204void HistoryDatabase::RollbackTransaction() {
205  db_.RollbackTransaction();
206}
207
208bool HistoryDatabase::RecreateAllTablesButURL() {
209  if (!DropVisitTable())
210    return false;
211  if (!InitVisitTable())
212    return false;
213
214  if (!DropKeywordSearchTermsTable())
215    return false;
216  if (!InitKeywordSearchTermsTable())
217    return false;
218
219  if (!DropSegmentTables())
220    return false;
221  if (!InitSegmentTables())
222    return false;
223
224  // We also add the supplementary URL indices at this point. This index is
225  // over parts of the URL table that weren't automatically created when the
226  // temporary URL table was
227  CreateKeywordSearchTermsIndices();
228  return true;
229}
230
231void HistoryDatabase::Vacuum() {
232  DCHECK_EQ(0, db_.transaction_nesting()) <<
233      "Can not have a transaction when vacuuming.";
234  ignore_result(db_.Execute("VACUUM"));
235}
236
237bool HistoryDatabase::Raze() {
238  return db_.Raze();
239}
240
241void HistoryDatabase::ThumbnailMigrationDone() {
242  meta_table_.SetValue(kNeedsThumbnailMigrationKey, 0);
243}
244
245bool HistoryDatabase::GetNeedsThumbnailMigration() {
246  int value = 0;
247  return (meta_table_.GetValue(kNeedsThumbnailMigrationKey, &value) &&
248          value != 0);
249}
250
251bool HistoryDatabase::SetSegmentID(VisitID visit_id, SegmentID segment_id) {
252  sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
253      "UPDATE visits SET segment_id = ? WHERE id = ?"));
254  s.BindInt64(0, segment_id);
255  s.BindInt64(1, visit_id);
256  DCHECK(db_.GetLastChangeCount() == 1);
257
258  return s.Run();
259}
260
261SegmentID HistoryDatabase::GetSegmentID(VisitID visit_id) {
262  sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
263      "SELECT segment_id FROM visits WHERE id = ?"));
264  s.BindInt64(0, visit_id);
265
266  if (s.Step()) {
267    if (s.ColumnType(0) == sql::COLUMN_TYPE_NULL)
268      return 0;
269    else
270      return s.ColumnInt64(0);
271  }
272  return 0;
273}
274
275base::Time HistoryDatabase::GetEarlyExpirationThreshold() {
276  if (!cached_early_expiration_threshold_.is_null())
277    return cached_early_expiration_threshold_;
278
279  int64 threshold;
280  if (!meta_table_.GetValue(kEarlyExpirationThresholdKey, &threshold)) {
281    // Set to a very early non-zero time, so it's before all history, but not
282    // zero to avoid re-retrieval.
283    threshold = 1L;
284  }
285
286  cached_early_expiration_threshold_ = base::Time::FromInternalValue(threshold);
287  return cached_early_expiration_threshold_;
288}
289
290void HistoryDatabase::UpdateEarlyExpirationThreshold(base::Time threshold) {
291  meta_table_.SetValue(kEarlyExpirationThresholdKey,
292                       threshold.ToInternalValue());
293  cached_early_expiration_threshold_ = threshold;
294}
295
296sql::Connection& HistoryDatabase::GetDB() {
297  return db_;
298}
299
300sql::MetaTable& HistoryDatabase::GetMetaTable() {
301  return meta_table_;
302}
303
304// Migration -------------------------------------------------------------------
305
306sql::InitStatus HistoryDatabase::EnsureCurrentVersion() {
307  // We can't read databases newer than we were designed for.
308  if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {
309    LOG(WARNING) << "History database is too new.";
310    return sql::INIT_TOO_NEW;
311  }
312
313  // NOTICE: If you are changing structures for things shared with the archived
314  // history file like URLs, visits, or downloads, that will need migration as
315  // well. Instead of putting such migration code in this class, it should be
316  // in the corresponding file (url_database.cc, etc.) and called from here and
317  // from the archived_database.cc.
318
319  int cur_version = meta_table_.GetVersionNumber();
320
321  // Put migration code here
322
323  if (cur_version == 15) {
324    if (!db_.Execute("DROP TABLE starred") || !DropStarredIDFromURLs()) {
325      LOG(WARNING) << "Unable to update history database to version 16.";
326      return sql::INIT_FAILURE;
327    }
328    ++cur_version;
329    meta_table_.SetVersionNumber(cur_version);
330    meta_table_.SetCompatibleVersionNumber(
331        std::min(cur_version, kCompatibleVersionNumber));
332  }
333
334  if (cur_version == 16) {
335#if !defined(OS_WIN)
336    // In this version we bring the time format on Mac & Linux in sync with the
337    // Windows version so that profiles can be moved between computers.
338    MigrateTimeEpoch();
339#endif
340    // On all platforms we bump the version number, so on Windows this
341    // migration is a NOP. We keep the compatible version at 16 since things
342    // will basically still work, just history will be in the future if an
343    // old version reads it.
344    ++cur_version;
345    meta_table_.SetVersionNumber(cur_version);
346  }
347
348  if (cur_version == 17) {
349    // Version 17 was for thumbnails to top sites migration. We ended up
350    // disabling it though, so 17->18 does nothing.
351    ++cur_version;
352    meta_table_.SetVersionNumber(cur_version);
353  }
354
355  if (cur_version == 18) {
356    // This is the version prior to adding url_source column. We need to
357    // migrate the database.
358    cur_version = 19;
359    meta_table_.SetVersionNumber(cur_version);
360  }
361
362  if (cur_version == 19) {
363    cur_version++;
364    meta_table_.SetVersionNumber(cur_version);
365    // Set a key indicating we need to migrate thumbnails. When successfull the
366    // key is removed (ThumbnailMigrationDone).
367    meta_table_.SetValue(kNeedsThumbnailMigrationKey, 1);
368  }
369
370  if (cur_version == 20) {
371    // This is the version prior to adding the visit_duration field in visits
372    // database. We need to migrate the database.
373    if (!MigrateVisitsWithoutDuration()) {
374      LOG(WARNING) << "Unable to update history database to version 21.";
375      return sql::INIT_FAILURE;
376    }
377    ++cur_version;
378    meta_table_.SetVersionNumber(cur_version);
379  }
380
381  if (cur_version == 21) {
382    // The android_urls table's data schemal was changed in version 21.
383#if defined(OS_ANDROID)
384    if (!MigrateToVersion22()) {
385      LOG(WARNING) << "Unable to migrate the android_urls table to version 22";
386    }
387#endif
388    ++cur_version;
389    meta_table_.SetVersionNumber(cur_version);
390  }
391
392  if (cur_version == 22) {
393    if (!MigrateDownloadsState()) {
394      LOG(WARNING) << "Unable to fix invalid downloads state values";
395      // Invalid state values may cause crashes.
396      return sql::INIT_FAILURE;
397    }
398    cur_version++;
399    meta_table_.SetVersionNumber(cur_version);
400  }
401
402  if (cur_version == 23) {
403    if (!MigrateDownloadsReasonPathsAndDangerType()) {
404      LOG(WARNING) << "Unable to upgrade download interrupt reason and paths";
405      // Invalid state values may cause crashes.
406      return sql::INIT_FAILURE;
407    }
408    cur_version++;
409    meta_table_.SetVersionNumber(cur_version);
410  }
411
412  if (cur_version == 24) {
413    if (!MigratePresentationIndex()) {
414      LOG(WARNING) << "Unable to migrate history to version 25";
415      return sql::INIT_FAILURE;
416    }
417    cur_version++;
418    meta_table_.SetVersionNumber(cur_version);
419  }
420
421  if (cur_version == 25) {
422    if (!MigrateReferrer()) {
423      LOG(WARNING) << "Unable to migrate history to version 26";
424      return sql::INIT_FAILURE;
425    }
426    cur_version++;
427    meta_table_.SetVersionNumber(cur_version);
428  }
429
430  // When the version is too old, we just try to continue anyway, there should
431  // not be a released product that makes a database too old for us to handle.
432  LOG_IF(WARNING, cur_version < GetCurrentVersion()) <<
433         "History database version " << cur_version << " is too old to handle.";
434
435  return sql::INIT_OK;
436}
437
438#if !defined(OS_WIN)
439void HistoryDatabase::MigrateTimeEpoch() {
440  // Update all the times in the URLs and visits table in the main database.
441  // For visits, clear the indexed flag since we'll delete the FTS databases in
442  // the next step.
443  ignore_result(db_.Execute(
444      "UPDATE urls "
445      "SET last_visit_time = last_visit_time + 11644473600000000 "
446      "WHERE id IN (SELECT id FROM urls WHERE last_visit_time > 0);"));
447  ignore_result(db_.Execute(
448      "UPDATE visits "
449      "SET visit_time = visit_time + 11644473600000000, is_indexed = 0 "
450      "WHERE id IN (SELECT id FROM visits WHERE visit_time > 0);"));
451  ignore_result(db_.Execute(
452      "UPDATE segment_usage "
453      "SET time_slot = time_slot + 11644473600000000 "
454      "WHERE id IN (SELECT id FROM segment_usage WHERE time_slot > 0);"));
455
456  // Erase all the full text index files. These will take a while to update and
457  // are less important, so we just blow them away. Same with the archived
458  // database.
459  needs_version_17_migration_ = true;
460}
461#endif
462
463}  // namespace history
464