1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/history/history_database.h"
6
7#include <algorithm>
8#include <set>
9#include <string>
10
11#include "base/command_line.h"
12#include "base/files/file_util.h"
13#include "base/metrics/histogram.h"
14#include "base/rand_util.h"
15#include "base/strings/string_util.h"
16#include "base/time/time.h"
17#include "sql/transaction.h"
18
19#if defined(OS_MACOSX)
20#include "base/mac/mac_util.h"
21#endif
22
23namespace history {
24
25namespace {
26
27// Current version number. We write databases at the "current" version number,
28// but any previous version that can read the "compatible" one can make do with
29// our database without *too* many bad effects.
30const int kCurrentVersionNumber = 29;
31const int kCompatibleVersionNumber = 16;
32const char kEarlyExpirationThresholdKey[] = "early_expiration_threshold";
33
34}  // namespace
35
36HistoryDatabase::HistoryDatabase() {
37}
38
39HistoryDatabase::~HistoryDatabase() {
40}
41
42sql::InitStatus HistoryDatabase::Init(const base::FilePath& history_name) {
43  db_.set_histogram_tag("History");
44
45  // Set the exceptional sqlite error handler.
46  db_.set_error_callback(error_callback_);
47
48  // Set the database page size to something a little larger to give us
49  // better performance (we're typically seek rather than bandwidth limited).
50  // This only has an effect before any tables have been created, otherwise
51  // this is a NOP. Must be a power of 2 and a max of 8192.
52  db_.set_page_size(4096);
53
54  // Set the cache size. The page size, plus a little extra, times this
55  // value, tells us how much memory the cache will use maximum.
56  // 1000 * 4kB = 4MB
57  // TODO(brettw) scale this value to the amount of available memory.
58  db_.set_cache_size(1000);
59
60  // Note that we don't set exclusive locking here. That's done by
61  // BeginExclusiveMode below which is called later (we have to be in shared
62  // mode to start out for the in-memory backend to read the data).
63
64  if (!db_.Open(history_name))
65    return sql::INIT_FAILURE;
66
67  // Wrap the rest of init in a tranaction. This will prevent the database from
68  // getting corrupted if we crash in the middle of initialization or migration.
69  sql::Transaction committer(&db_);
70  if (!committer.Begin())
71    return sql::INIT_FAILURE;
72
73#if defined(OS_MACOSX)
74  // Exclude the history file from backups.
75  base::mac::SetFileBackupExclusion(history_name);
76#endif
77
78  // Prime the cache.
79  db_.Preload();
80
81  // Create the tables and indices.
82  // NOTE: If you add something here, also add it to
83  //       RecreateAllButStarAndURLTables.
84  if (!meta_table_.Init(&db_, GetCurrentVersion(), kCompatibleVersionNumber))
85    return sql::INIT_FAILURE;
86  if (!CreateURLTable(false) || !InitVisitTable() ||
87      !InitKeywordSearchTermsTable() || !InitDownloadTable() ||
88      !InitSegmentTables())
89    return sql::INIT_FAILURE;
90  CreateMainURLIndex();
91  CreateKeywordSearchTermsIndices();
92
93  // TODO(benjhayden) Remove at some point.
94  meta_table_.DeleteKey("next_download_id");
95
96  // Version check.
97  sql::InitStatus version_status = EnsureCurrentVersion();
98  if (version_status != sql::INIT_OK)
99    return version_status;
100
101  return committer.Commit() ? sql::INIT_OK : sql::INIT_FAILURE;
102}
103
104void HistoryDatabase::ComputeDatabaseMetrics(
105    const base::FilePath& history_name) {
106    base::TimeTicks start_time = base::TimeTicks::Now();
107  int64 file_size = 0;
108  if (!base::GetFileSize(history_name, &file_size))
109    return;
110  int file_mb = static_cast<int>(file_size / (1024 * 1024));
111  UMA_HISTOGRAM_MEMORY_MB("History.DatabaseFileMB", file_mb);
112
113  sql::Statement url_count(db_.GetUniqueStatement("SELECT count(*) FROM urls"));
114  if (!url_count.Step())
115    return;
116  UMA_HISTOGRAM_COUNTS("History.URLTableCount", url_count.ColumnInt(0));
117
118  sql::Statement visit_count(db_.GetUniqueStatement(
119      "SELECT count(*) FROM visits"));
120  if (!visit_count.Step())
121    return;
122  UMA_HISTOGRAM_COUNTS("History.VisitTableCount", visit_count.ColumnInt(0));
123
124  base::Time one_week_ago = base::Time::Now() - base::TimeDelta::FromDays(7);
125  sql::Statement weekly_visit_sql(db_.GetUniqueStatement(
126      "SELECT count(*) FROM visits WHERE visit_time > ?"));
127  weekly_visit_sql.BindInt64(0, one_week_ago.ToInternalValue());
128  int weekly_visit_count = 0;
129  if (weekly_visit_sql.Step())
130    weekly_visit_count = weekly_visit_sql.ColumnInt(0);
131  UMA_HISTOGRAM_COUNTS("History.WeeklyVisitCount", weekly_visit_count);
132
133  base::Time one_month_ago = base::Time::Now() - base::TimeDelta::FromDays(30);
134  sql::Statement monthly_visit_sql(db_.GetUniqueStatement(
135      "SELECT count(*) FROM visits WHERE visit_time > ? AND visit_time <= ?"));
136  monthly_visit_sql.BindInt64(0, one_month_ago.ToInternalValue());
137  monthly_visit_sql.BindInt64(1, one_week_ago.ToInternalValue());
138  int older_visit_count = 0;
139  if (monthly_visit_sql.Step())
140    older_visit_count = monthly_visit_sql.ColumnInt(0);
141  UMA_HISTOGRAM_COUNTS("History.MonthlyVisitCount",
142                       older_visit_count + weekly_visit_count);
143
144  UMA_HISTOGRAM_TIMES("History.DatabaseBasicMetricsTime",
145                      base::TimeTicks::Now() - start_time);
146
147  // Compute the advanced metrics even less often, pending timing data showing
148  // that's not necessary.
149  if (base::RandInt(1, 3) == 3) {
150    start_time = base::TimeTicks::Now();
151
152    // Collect all URLs visited within the last month.
153    sql::Statement url_sql(db_.GetUniqueStatement(
154        "SELECT url, last_visit_time FROM urls WHERE last_visit_time > ?"));
155    url_sql.BindInt64(0, one_month_ago.ToInternalValue());
156
157    // Count URLs (which will always be unique) and unique hosts within the last
158    // week and last month.
159    int week_url_count = 0;
160    int month_url_count = 0;
161    std::set<std::string> week_hosts;
162    std::set<std::string> month_hosts;
163    while (url_sql.Step()) {
164      GURL url(url_sql.ColumnString(0));
165      base::Time visit_time =
166          base::Time::FromInternalValue(url_sql.ColumnInt64(1));
167      ++month_url_count;
168      month_hosts.insert(url.host());
169      if (visit_time > one_week_ago) {
170        ++week_url_count;
171        week_hosts.insert(url.host());
172      }
173    }
174    UMA_HISTOGRAM_COUNTS("History.WeeklyURLCount", week_url_count);
175    UMA_HISTOGRAM_COUNTS_10000("History.WeeklyHostCount", week_hosts.size());
176    UMA_HISTOGRAM_COUNTS("History.MonthlyURLCount", month_url_count);
177    UMA_HISTOGRAM_COUNTS_10000("History.MonthlyHostCount", month_hosts.size());
178    UMA_HISTOGRAM_TIMES("History.DatabaseAdvancedMetricsTime",
179                        base::TimeTicks::Now() - start_time);
180  }
181}
182
183void HistoryDatabase::BeginExclusiveMode() {
184  // We can't use set_exclusive_locking() since that only has an effect before
185  // the DB is opened.
186  ignore_result(db_.Execute("PRAGMA locking_mode=EXCLUSIVE"));
187}
188
189// static
190int HistoryDatabase::GetCurrentVersion() {
191  return kCurrentVersionNumber;
192}
193
194void HistoryDatabase::BeginTransaction() {
195  db_.BeginTransaction();
196}
197
198void HistoryDatabase::CommitTransaction() {
199  db_.CommitTransaction();
200}
201
202void HistoryDatabase::RollbackTransaction() {
203  db_.RollbackTransaction();
204}
205
206bool HistoryDatabase::RecreateAllTablesButURL() {
207  if (!DropVisitTable())
208    return false;
209  if (!InitVisitTable())
210    return false;
211
212  if (!DropKeywordSearchTermsTable())
213    return false;
214  if (!InitKeywordSearchTermsTable())
215    return false;
216
217  if (!DropSegmentTables())
218    return false;
219  if (!InitSegmentTables())
220    return false;
221
222  CreateKeywordSearchTermsIndices();
223  return true;
224}
225
226void HistoryDatabase::Vacuum() {
227  DCHECK_EQ(0, db_.transaction_nesting()) <<
228      "Can not have a transaction when vacuuming.";
229  ignore_result(db_.Execute("VACUUM"));
230}
231
232void HistoryDatabase::TrimMemory(bool aggressively) {
233  db_.TrimMemory(aggressively);
234}
235
236bool HistoryDatabase::Raze() {
237  return db_.Raze();
238}
239
240bool HistoryDatabase::SetSegmentID(VisitID visit_id, SegmentID segment_id) {
241  sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
242      "UPDATE visits SET segment_id = ? WHERE id = ?"));
243  s.BindInt64(0, segment_id);
244  s.BindInt64(1, visit_id);
245  DCHECK(db_.GetLastChangeCount() == 1);
246
247  return s.Run();
248}
249
250SegmentID HistoryDatabase::GetSegmentID(VisitID visit_id) {
251  sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
252      "SELECT segment_id FROM visits WHERE id = ?"));
253  s.BindInt64(0, visit_id);
254
255  if (s.Step()) {
256    if (s.ColumnType(0) == sql::COLUMN_TYPE_NULL)
257      return 0;
258    else
259      return s.ColumnInt64(0);
260  }
261  return 0;
262}
263
264base::Time HistoryDatabase::GetEarlyExpirationThreshold() {
265  if (!cached_early_expiration_threshold_.is_null())
266    return cached_early_expiration_threshold_;
267
268  int64 threshold;
269  if (!meta_table_.GetValue(kEarlyExpirationThresholdKey, &threshold)) {
270    // Set to a very early non-zero time, so it's before all history, but not
271    // zero to avoid re-retrieval.
272    threshold = 1L;
273  }
274
275  cached_early_expiration_threshold_ = base::Time::FromInternalValue(threshold);
276  return cached_early_expiration_threshold_;
277}
278
279void HistoryDatabase::UpdateEarlyExpirationThreshold(base::Time threshold) {
280  meta_table_.SetValue(kEarlyExpirationThresholdKey,
281                       threshold.ToInternalValue());
282  cached_early_expiration_threshold_ = threshold;
283}
284
285sql::Connection& HistoryDatabase::GetDB() {
286  return db_;
287}
288
289// Migration -------------------------------------------------------------------
290
291sql::InitStatus HistoryDatabase::EnsureCurrentVersion() {
292  // We can't read databases newer than we were designed for.
293  if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {
294    LOG(WARNING) << "History database is too new.";
295    return sql::INIT_TOO_NEW;
296  }
297
298  int cur_version = meta_table_.GetVersionNumber();
299
300  // Put migration code here
301
302  if (cur_version == 15) {
303    if (!db_.Execute("DROP TABLE starred") || !DropStarredIDFromURLs()) {
304      LOG(WARNING) << "Unable to update history database to version 16.";
305      return sql::INIT_FAILURE;
306    }
307    ++cur_version;
308    meta_table_.SetVersionNumber(cur_version);
309    meta_table_.SetCompatibleVersionNumber(
310        std::min(cur_version, kCompatibleVersionNumber));
311  }
312
313  if (cur_version == 16) {
314#if !defined(OS_WIN)
315    // In this version we bring the time format on Mac & Linux in sync with the
316    // Windows version so that profiles can be moved between computers.
317    MigrateTimeEpoch();
318#endif
319    // On all platforms we bump the version number, so on Windows this
320    // migration is a NOP. We keep the compatible version at 16 since things
321    // will basically still work, just history will be in the future if an
322    // old version reads it.
323    ++cur_version;
324    meta_table_.SetVersionNumber(cur_version);
325  }
326
327  if (cur_version == 17) {
328    // Version 17 was for thumbnails to top sites migration. We ended up
329    // disabling it though, so 17->18 does nothing.
330    ++cur_version;
331    meta_table_.SetVersionNumber(cur_version);
332  }
333
334  if (cur_version == 18) {
335    // This is the version prior to adding url_source column. We need to
336    // migrate the database.
337    cur_version = 19;
338    meta_table_.SetVersionNumber(cur_version);
339  }
340
341  if (cur_version == 19) {
342    cur_version++;
343    meta_table_.SetVersionNumber(cur_version);
344    // This was the thumbnail migration.  Obsolete.
345  }
346
347  if (cur_version == 20) {
348    // This is the version prior to adding the visit_duration field in visits
349    // database. We need to migrate the database.
350    if (!MigrateVisitsWithoutDuration()) {
351      LOG(WARNING) << "Unable to update history database to version 21.";
352      return sql::INIT_FAILURE;
353    }
354    ++cur_version;
355    meta_table_.SetVersionNumber(cur_version);
356  }
357
358  if (cur_version == 21) {
359    // The android_urls table's data schemal was changed in version 21.
360#if defined(OS_ANDROID)
361    if (!MigrateToVersion22()) {
362      LOG(WARNING) << "Unable to migrate the android_urls table to version 22";
363    }
364#endif
365    ++cur_version;
366    meta_table_.SetVersionNumber(cur_version);
367  }
368
369  if (cur_version == 22) {
370    if (!MigrateDownloadsState()) {
371      LOG(WARNING) << "Unable to fix invalid downloads state values";
372      // Invalid state values may cause crashes.
373      return sql::INIT_FAILURE;
374    }
375    cur_version++;
376    meta_table_.SetVersionNumber(cur_version);
377  }
378
379  if (cur_version == 23) {
380    if (!MigrateDownloadsReasonPathsAndDangerType()) {
381      LOG(WARNING) << "Unable to upgrade download interrupt reason and paths";
382      // Invalid state values may cause crashes.
383      return sql::INIT_FAILURE;
384    }
385    cur_version++;
386    meta_table_.SetVersionNumber(cur_version);
387  }
388
389  if (cur_version == 24) {
390    if (!MigratePresentationIndex()) {
391      LOG(WARNING) << "Unable to migrate history to version 25";
392      return sql::INIT_FAILURE;
393    }
394    cur_version++;
395    meta_table_.SetVersionNumber(cur_version);
396  }
397
398  if (cur_version == 25) {
399    if (!MigrateReferrer()) {
400      LOG(WARNING) << "Unable to migrate history to version 26";
401      return sql::INIT_FAILURE;
402    }
403    cur_version++;
404    meta_table_.SetVersionNumber(cur_version);
405  }
406
407  if (cur_version == 26) {
408    if (!MigrateDownloadedByExtension()) {
409      LOG(WARNING) << "Unable to migrate history to version 27";
410      return sql::INIT_FAILURE;
411    }
412    cur_version++;
413    meta_table_.SetVersionNumber(cur_version);
414  }
415
416  if (cur_version == 27) {
417    if (!MigrateDownloadValidators()) {
418      LOG(WARNING) << "Unable to migrate history to version 28";
419      return sql::INIT_FAILURE;
420    }
421    cur_version++;
422    meta_table_.SetVersionNumber(cur_version);
423  }
424
425  if (cur_version == 28) {
426    if (!MigrateMimeType()) {
427      LOG(WARNING) << "Unable to migrate history to version 29";
428      return sql::INIT_FAILURE;
429    }
430    cur_version++;
431    meta_table_.SetVersionNumber(cur_version);
432  }
433
434  // When the version is too old, we just try to continue anyway, there should
435  // not be a released product that makes a database too old for us to handle.
436  LOG_IF(WARNING, cur_version < GetCurrentVersion()) <<
437         "History database version " << cur_version << " is too old to handle.";
438
439  return sql::INIT_OK;
440}
441
442#if !defined(OS_WIN)
443void HistoryDatabase::MigrateTimeEpoch() {
444  // Update all the times in the URLs and visits table in the main database.
445  ignore_result(db_.Execute(
446      "UPDATE urls "
447      "SET last_visit_time = last_visit_time + 11644473600000000 "
448      "WHERE id IN (SELECT id FROM urls WHERE last_visit_time > 0);"));
449  ignore_result(db_.Execute(
450      "UPDATE visits "
451      "SET visit_time = visit_time + 11644473600000000 "
452      "WHERE id IN (SELECT id FROM visits WHERE visit_time > 0);"));
453  ignore_result(db_.Execute(
454      "UPDATE segment_usage "
455      "SET time_slot = time_slot + 11644473600000000 "
456      "WHERE id IN (SELECT id FROM segment_usage WHERE time_slot > 0);"));
457}
458#endif
459
460}  // namespace history
461