history_database.cc revision a36e5920737c6adbddd3e43b760e5de8431db6e0
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/history/history_database.h"
6
7#include <algorithm>
8#include <set>
9#include <string>
10
11#include "base/command_line.h"
12#include "base/file_util.h"
13#include "base/metrics/histogram.h"
14#include "base/rand_util.h"
15#include "base/strings/string_util.h"
16#include "base/time/time.h"
17#include "sql/transaction.h"
18
19#if defined(OS_MACOSX)
20#include "base/mac/mac_util.h"
21#endif
22
23namespace history {
24
25namespace {
26
27// Current version number. We write databases at the "current" version number,
28// but any previous version that can read the "compatible" one can make do with
29// or database without *too* many bad effects.
30static const int kCurrentVersionNumber = 27;
31static const int kCompatibleVersionNumber = 16;
32static const char kEarlyExpirationThresholdKey[] = "early_expiration_threshold";
33
34// Key in the meta table used to determine if we need to migrate thumbnails out
35// of history.
36static const char kNeedsThumbnailMigrationKey[] = "needs_thumbnail_migration";
37
38}  // namespace
39
40HistoryDatabase::HistoryDatabase()
41    : needs_version_17_migration_(false) {
42}
43
44HistoryDatabase::~HistoryDatabase() {
45}
46
47sql::InitStatus HistoryDatabase::Init(const base::FilePath& history_name) {
48  db_.set_histogram_tag("History");
49
50  // Set the exceptional sqlite error handler.
51  db_.set_error_callback(error_callback_);
52
53  // Set the database page size to something a little larger to give us
54  // better performance (we're typically seek rather than bandwidth limited).
55  // This only has an effect before any tables have been created, otherwise
56  // this is a NOP. Must be a power of 2 and a max of 8192.
57  db_.set_page_size(4096);
58
59  // Set the cache size. The page size, plus a little extra, times this
60  // value, tells us how much memory the cache will use maximum.
61  // 1000 * 4kB = 4MB
62  // TODO(brettw) scale this value to the amount of available memory.
63  db_.set_cache_size(1000);
64
65  // Note that we don't set exclusive locking here. That's done by
66  // BeginExclusiveMode below which is called later (we have to be in shared
67  // mode to start out for the in-memory backend to read the data).
68
69  if (!db_.Open(history_name))
70    return sql::INIT_FAILURE;
71
72  // Wrap the rest of init in a tranaction. This will prevent the database from
73  // getting corrupted if we crash in the middle of initialization or migration.
74  sql::Transaction committer(&db_);
75  if (!committer.Begin())
76    return sql::INIT_FAILURE;
77
78#if defined(OS_MACOSX)
79  // Exclude the history file from backups.
80  base::mac::SetFileBackupExclusion(history_name);
81#endif
82
83  // Prime the cache.
84  db_.Preload();
85
86  // Create the tables and indices.
87  // NOTE: If you add something here, also add it to
88  //       RecreateAllButStarAndURLTables.
89  if (!meta_table_.Init(&db_, GetCurrentVersion(), kCompatibleVersionNumber))
90    return sql::INIT_FAILURE;
91  if (!CreateURLTable(false) || !InitVisitTable() ||
92      !InitKeywordSearchTermsTable() || !InitDownloadTable() ||
93      !InitSegmentTables())
94    return sql::INIT_FAILURE;
95  CreateMainURLIndex();
96  CreateKeywordSearchTermsIndices();
97
98  // TODO(benjhayden) Remove at some point.
99  meta_table_.DeleteKey("next_download_id");
100
101  // Version check.
102  sql::InitStatus version_status = EnsureCurrentVersion();
103  if (version_status != sql::INIT_OK)
104    return version_status;
105
106  return committer.Commit() ? sql::INIT_OK : sql::INIT_FAILURE;
107}
108
109void HistoryDatabase::ComputeDatabaseMetrics(
110    const base::FilePath& history_name) {
111    base::TimeTicks start_time = base::TimeTicks::Now();
112  int64 file_size = 0;
113  if (!file_util::GetFileSize(history_name, &file_size))
114    return;
115  int file_mb = static_cast<int>(file_size / (1024 * 1024));
116  UMA_HISTOGRAM_MEMORY_MB("History.DatabaseFileMB", file_mb);
117
118  sql::Statement url_count(db_.GetUniqueStatement("SELECT count(*) FROM urls"));
119  if (!url_count.Step())
120    return;
121  UMA_HISTOGRAM_COUNTS("History.URLTableCount", url_count.ColumnInt(0));
122
123  sql::Statement visit_count(db_.GetUniqueStatement(
124      "SELECT count(*) FROM visits"));
125  if (!visit_count.Step())
126    return;
127  UMA_HISTOGRAM_COUNTS("History.VisitTableCount", visit_count.ColumnInt(0));
128
129  base::Time one_week_ago = base::Time::Now() - base::TimeDelta::FromDays(7);
130  sql::Statement weekly_visit_sql(db_.GetUniqueStatement(
131      "SELECT count(*) FROM visits WHERE visit_time > ?"));
132  weekly_visit_sql.BindInt64(0, one_week_ago.ToInternalValue());
133  int weekly_visit_count = 0;
134  if (weekly_visit_sql.Step())
135    weekly_visit_count = weekly_visit_sql.ColumnInt(0);
136  UMA_HISTOGRAM_COUNTS("History.WeeklyVisitCount", weekly_visit_count);
137
138  base::Time one_month_ago = base::Time::Now() - base::TimeDelta::FromDays(30);
139  sql::Statement monthly_visit_sql(db_.GetUniqueStatement(
140      "SELECT count(*) FROM visits WHERE visit_time > ? AND visit_time <= ?"));
141  monthly_visit_sql.BindInt64(0, one_month_ago.ToInternalValue());
142  monthly_visit_sql.BindInt64(1, one_week_ago.ToInternalValue());
143  int older_visit_count = 0;
144  if (monthly_visit_sql.Step())
145    older_visit_count = monthly_visit_sql.ColumnInt(0);
146  UMA_HISTOGRAM_COUNTS("History.MonthlyVisitCount",
147                       older_visit_count + weekly_visit_count);
148
149  UMA_HISTOGRAM_TIMES("History.DatabaseBasicMetricsTime",
150                      base::TimeTicks::Now() - start_time);
151
152  // Compute the advanced metrics even less often, pending timing data showing
153  // that's not necessary.
154  if (base::RandInt(1, 3) == 3) {
155    start_time = base::TimeTicks::Now();
156
157    // Collect all URLs visited within the last month.
158    sql::Statement url_sql(db_.GetUniqueStatement(
159        "SELECT url, last_visit_time FROM urls WHERE last_visit_time > ?"));
160    url_sql.BindInt64(0, one_month_ago.ToInternalValue());
161
162    // Count URLs (which will always be unique) and unique hosts within the last
163    // week and last month.
164    int week_url_count = 0;
165    int month_url_count = 0;
166    std::set<std::string> week_hosts;
167    std::set<std::string> month_hosts;
168    while (url_sql.Step()) {
169      GURL url(url_sql.ColumnString(0));
170      base::Time visit_time =
171          base::Time::FromInternalValue(url_sql.ColumnInt64(1));
172      ++month_url_count;
173      month_hosts.insert(url.host());
174      if (visit_time > one_week_ago) {
175        ++week_url_count;
176        week_hosts.insert(url.host());
177      }
178    }
179    UMA_HISTOGRAM_COUNTS("History.WeeklyURLCount", week_url_count);
180    UMA_HISTOGRAM_COUNTS_10000("History.WeeklyHostCount", week_hosts.size());
181    UMA_HISTOGRAM_COUNTS("History.MonthlyURLCount", month_url_count);
182    UMA_HISTOGRAM_COUNTS_10000("History.MonthlyHostCount", month_hosts.size());
183    UMA_HISTOGRAM_TIMES("History.DatabaseAdvancedMetricsTime",
184                        base::TimeTicks::Now() - start_time);
185  }
186}
187
188void HistoryDatabase::BeginExclusiveMode() {
189  // We can't use set_exclusive_locking() since that only has an effect before
190  // the DB is opened.
191  ignore_result(db_.Execute("PRAGMA locking_mode=EXCLUSIVE"));
192}
193
194// static
195int HistoryDatabase::GetCurrentVersion() {
196  return kCurrentVersionNumber;
197}
198
199void HistoryDatabase::BeginTransaction() {
200  db_.BeginTransaction();
201}
202
203void HistoryDatabase::CommitTransaction() {
204  db_.CommitTransaction();
205}
206
207void HistoryDatabase::RollbackTransaction() {
208  db_.RollbackTransaction();
209}
210
211bool HistoryDatabase::RecreateAllTablesButURL() {
212  if (!DropVisitTable())
213    return false;
214  if (!InitVisitTable())
215    return false;
216
217  if (!DropKeywordSearchTermsTable())
218    return false;
219  if (!InitKeywordSearchTermsTable())
220    return false;
221
222  if (!DropSegmentTables())
223    return false;
224  if (!InitSegmentTables())
225    return false;
226
227  // We also add the supplementary URL indices at this point. This index is
228  // over parts of the URL table that weren't automatically created when the
229  // temporary URL table was
230  CreateKeywordSearchTermsIndices();
231  return true;
232}
233
234void HistoryDatabase::Vacuum() {
235  DCHECK_EQ(0, db_.transaction_nesting()) <<
236      "Can not have a transaction when vacuuming.";
237  ignore_result(db_.Execute("VACUUM"));
238}
239
240void HistoryDatabase::TrimMemory(bool aggressively) {
241  db_.TrimMemory(aggressively);
242}
243
244bool HistoryDatabase::Raze() {
245  return db_.Raze();
246}
247
248void HistoryDatabase::ThumbnailMigrationDone() {
249  meta_table_.SetValue(kNeedsThumbnailMigrationKey, 0);
250}
251
252bool HistoryDatabase::GetNeedsThumbnailMigration() {
253  int value = 0;
254  return (meta_table_.GetValue(kNeedsThumbnailMigrationKey, &value) &&
255          value != 0);
256}
257
258bool HistoryDatabase::SetSegmentID(VisitID visit_id, SegmentID segment_id) {
259  sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
260      "UPDATE visits SET segment_id = ? WHERE id = ?"));
261  s.BindInt64(0, segment_id);
262  s.BindInt64(1, visit_id);
263  DCHECK(db_.GetLastChangeCount() == 1);
264
265  return s.Run();
266}
267
268SegmentID HistoryDatabase::GetSegmentID(VisitID visit_id) {
269  sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
270      "SELECT segment_id FROM visits WHERE id = ?"));
271  s.BindInt64(0, visit_id);
272
273  if (s.Step()) {
274    if (s.ColumnType(0) == sql::COLUMN_TYPE_NULL)
275      return 0;
276    else
277      return s.ColumnInt64(0);
278  }
279  return 0;
280}
281
282base::Time HistoryDatabase::GetEarlyExpirationThreshold() {
283  if (!cached_early_expiration_threshold_.is_null())
284    return cached_early_expiration_threshold_;
285
286  int64 threshold;
287  if (!meta_table_.GetValue(kEarlyExpirationThresholdKey, &threshold)) {
288    // Set to a very early non-zero time, so it's before all history, but not
289    // zero to avoid re-retrieval.
290    threshold = 1L;
291  }
292
293  cached_early_expiration_threshold_ = base::Time::FromInternalValue(threshold);
294  return cached_early_expiration_threshold_;
295}
296
297void HistoryDatabase::UpdateEarlyExpirationThreshold(base::Time threshold) {
298  meta_table_.SetValue(kEarlyExpirationThresholdKey,
299                       threshold.ToInternalValue());
300  cached_early_expiration_threshold_ = threshold;
301}
302
303sql::Connection& HistoryDatabase::GetDB() {
304  return db_;
305}
306
307// Migration -------------------------------------------------------------------
308
309sql::InitStatus HistoryDatabase::EnsureCurrentVersion() {
310  // We can't read databases newer than we were designed for.
311  if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {
312    LOG(WARNING) << "History database is too new.";
313    return sql::INIT_TOO_NEW;
314  }
315
316  // NOTICE: If you are changing structures for things shared with the archived
317  // history file like URLs, visits, or downloads, that will need migration as
318  // well. Instead of putting such migration code in this class, it should be
319  // in the corresponding file (url_database.cc, etc.) and called from here and
320  // from the archived_database.cc.
321
322  int cur_version = meta_table_.GetVersionNumber();
323
324  // Put migration code here
325
326  if (cur_version == 15) {
327    if (!db_.Execute("DROP TABLE starred") || !DropStarredIDFromURLs()) {
328      LOG(WARNING) << "Unable to update history database to version 16.";
329      return sql::INIT_FAILURE;
330    }
331    ++cur_version;
332    meta_table_.SetVersionNumber(cur_version);
333    meta_table_.SetCompatibleVersionNumber(
334        std::min(cur_version, kCompatibleVersionNumber));
335  }
336
337  if (cur_version == 16) {
338#if !defined(OS_WIN)
339    // In this version we bring the time format on Mac & Linux in sync with the
340    // Windows version so that profiles can be moved between computers.
341    MigrateTimeEpoch();
342#endif
343    // On all platforms we bump the version number, so on Windows this
344    // migration is a NOP. We keep the compatible version at 16 since things
345    // will basically still work, just history will be in the future if an
346    // old version reads it.
347    ++cur_version;
348    meta_table_.SetVersionNumber(cur_version);
349  }
350
351  if (cur_version == 17) {
352    // Version 17 was for thumbnails to top sites migration. We ended up
353    // disabling it though, so 17->18 does nothing.
354    ++cur_version;
355    meta_table_.SetVersionNumber(cur_version);
356  }
357
358  if (cur_version == 18) {
359    // This is the version prior to adding url_source column. We need to
360    // migrate the database.
361    cur_version = 19;
362    meta_table_.SetVersionNumber(cur_version);
363  }
364
365  if (cur_version == 19) {
366    cur_version++;
367    meta_table_.SetVersionNumber(cur_version);
368    // Set a key indicating we need to migrate thumbnails. When successfull the
369    // key is removed (ThumbnailMigrationDone).
370    meta_table_.SetValue(kNeedsThumbnailMigrationKey, 1);
371  }
372
373  if (cur_version == 20) {
374    // This is the version prior to adding the visit_duration field in visits
375    // database. We need to migrate the database.
376    if (!MigrateVisitsWithoutDuration()) {
377      LOG(WARNING) << "Unable to update history database to version 21.";
378      return sql::INIT_FAILURE;
379    }
380    ++cur_version;
381    meta_table_.SetVersionNumber(cur_version);
382  }
383
384  if (cur_version == 21) {
385    // The android_urls table's data schemal was changed in version 21.
386#if defined(OS_ANDROID)
387    if (!MigrateToVersion22()) {
388      LOG(WARNING) << "Unable to migrate the android_urls table to version 22";
389    }
390#endif
391    ++cur_version;
392    meta_table_.SetVersionNumber(cur_version);
393  }
394
395  if (cur_version == 22) {
396    if (!MigrateDownloadsState()) {
397      LOG(WARNING) << "Unable to fix invalid downloads state values";
398      // Invalid state values may cause crashes.
399      return sql::INIT_FAILURE;
400    }
401    cur_version++;
402    meta_table_.SetVersionNumber(cur_version);
403  }
404
405  if (cur_version == 23) {
406    if (!MigrateDownloadsReasonPathsAndDangerType()) {
407      LOG(WARNING) << "Unable to upgrade download interrupt reason and paths";
408      // Invalid state values may cause crashes.
409      return sql::INIT_FAILURE;
410    }
411    cur_version++;
412    meta_table_.SetVersionNumber(cur_version);
413  }
414
415  if (cur_version == 24) {
416    if (!MigratePresentationIndex()) {
417      LOG(WARNING) << "Unable to migrate history to version 25";
418      return sql::INIT_FAILURE;
419    }
420    cur_version++;
421    meta_table_.SetVersionNumber(cur_version);
422  }
423
424  if (cur_version == 25) {
425    if (!MigrateReferrer()) {
426      LOG(WARNING) << "Unable to migrate history to version 26";
427      return sql::INIT_FAILURE;
428    }
429    cur_version++;
430    meta_table_.SetVersionNumber(cur_version);
431  }
432
433  if (cur_version == 26) {
434    if (!MigrateDownloadedByExtension()) {
435      LOG(WARNING) << "Unable to migrate history to version 27";
436      return sql::INIT_FAILURE;
437    }
438    cur_version++;
439    meta_table_.SetVersionNumber(cur_version);
440  }
441
442  // When the version is too old, we just try to continue anyway, there should
443  // not be a released product that makes a database too old for us to handle.
444  LOG_IF(WARNING, cur_version < GetCurrentVersion()) <<
445         "History database version " << cur_version << " is too old to handle.";
446
447  return sql::INIT_OK;
448}
449
450#if !defined(OS_WIN)
451void HistoryDatabase::MigrateTimeEpoch() {
452  // Update all the times in the URLs and visits table in the main database.
453  ignore_result(db_.Execute(
454      "UPDATE urls "
455      "SET last_visit_time = last_visit_time + 11644473600000000 "
456      "WHERE id IN (SELECT id FROM urls WHERE last_visit_time > 0);"));
457  ignore_result(db_.Execute(
458      "UPDATE visits "
459      "SET visit_time = visit_time + 11644473600000000 "
460      "WHERE id IN (SELECT id FROM visits WHERE visit_time > 0);"));
461  ignore_result(db_.Execute(
462      "UPDATE segment_usage "
463      "SET time_slot = time_slot + 11644473600000000 "
464      "WHERE id IN (SELECT id FROM segment_usage WHERE time_slot > 0);"));
465
466  // Erase all the full text index files. These will take a while to update and
467  // are less important, so we just blow them away. Same with the archived
468  // database.
469  needs_version_17_migration_ = true;
470}
471#endif
472
473}  // namespace history
474