url_database.cc revision 4e180b6a0b4720a9b8e9e959a882386f690f08ff
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/history/url_database.h"
6
7#include <algorithm>
8#include <limits>
9#include <string>
10#include <vector>
11
12#include "base/i18n/case_conversion.h"
13#include "base/strings/utf_string_conversions.h"
14#include "chrome/common/url_constants.h"
15#include "net/base/net_util.h"
16#include "sql/statement.h"
17#include "ui/base/l10n/l10n_util.h"
18#include "url/gurl.h"
19
20namespace history {
21
22const char URLDatabase::kURLRowFields[] = HISTORY_URL_ROW_FIELDS;
23const int URLDatabase::kNumURLRowFields = 9;
24
25URLDatabase::URLEnumeratorBase::URLEnumeratorBase()
26    : initialized_(false) {
27}
28
29URLDatabase::URLEnumeratorBase::~URLEnumeratorBase() {
30}
31
32URLDatabase::URLEnumerator::URLEnumerator() {
33}
34
35bool URLDatabase::URLEnumerator::GetNextURL(URLRow* r) {
36  if (statement_.Step()) {
37    FillURLRow(statement_, r);
38    return true;
39  }
40  return false;
41}
42
43URLDatabase::URLDatabase()
44    : has_keyword_search_terms_(false) {
45}
46
47URLDatabase::~URLDatabase() {
48}
49
50// static
51std::string URLDatabase::GURLToDatabaseURL(const GURL& gurl) {
52  // TODO(brettw): do something fancy here with encoding, etc.
53
54  // Strip username and password from URL before sending to DB.
55  GURL::Replacements replacements;
56  replacements.ClearUsername();
57  replacements.ClearPassword();
58
59  return (gurl.ReplaceComponents(replacements)).spec();
60}
61
62// Convenience to fill a history::URLRow. Must be in sync with the fields in
63// kURLRowFields.
64void URLDatabase::FillURLRow(sql::Statement& s, history::URLRow* i) {
65  DCHECK(i);
66  i->id_ = s.ColumnInt64(0);
67  i->url_ = GURL(s.ColumnString(1));
68  i->title_ = s.ColumnString16(2);
69  i->visit_count_ = s.ColumnInt(3);
70  i->typed_count_ = s.ColumnInt(4);
71  i->last_visit_ = base::Time::FromInternalValue(s.ColumnInt64(5));
72  i->hidden_ = s.ColumnInt(6) != 0;
73}
74
75bool URLDatabase::GetURLRow(URLID url_id, URLRow* info) {
76  // TODO(brettw) We need check for empty URLs to handle the case where
77  // there are old URLs in the database that are empty that got in before
78  // we added any checks. We should eventually be able to remove it
79  // when all inputs are using GURL (which prohibit empty input).
80  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
81      "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE id=?"));
82  statement.BindInt64(0, url_id);
83
84  if (statement.Step()) {
85    FillURLRow(statement, info);
86    return true;
87  }
88  return false;
89}
90
91bool URLDatabase::GetAllTypedUrls(URLRows* urls) {
92  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
93      "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE typed_count > 0"));
94
95  while (statement.Step()) {
96    URLRow info;
97    FillURLRow(statement, &info);
98    urls->push_back(info);
99  }
100  return true;
101}
102
103URLID URLDatabase::GetRowForURL(const GURL& url, history::URLRow* info) {
104  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
105      "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE url=?"));
106  std::string url_string = GURLToDatabaseURL(url);
107  statement.BindString(0, url_string);
108
109  if (!statement.Step())
110    return 0;  // no data
111
112  if (info)
113    FillURLRow(statement, info);
114  return statement.ColumnInt64(0);
115}
116
117bool URLDatabase::UpdateURLRow(URLID url_id,
118                               const history::URLRow& info) {
119  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
120      "UPDATE urls SET title=?,visit_count=?,typed_count=?,last_visit_time=?,"
121        "hidden=?"
122      "WHERE id=?"));
123  statement.BindString16(0, info.title());
124  statement.BindInt(1, info.visit_count());
125  statement.BindInt(2, info.typed_count());
126  statement.BindInt64(3, info.last_visit().ToInternalValue());
127  statement.BindInt(4, info.hidden() ? 1 : 0);
128  statement.BindInt64(5, url_id);
129
130  return statement.Run();
131}
132
133URLID URLDatabase::AddURLInternal(const history::URLRow& info,
134                                  bool is_temporary) {
135  // This function is used to insert into two different tables, so we have to
136  // do some shuffling. Unfortinately, we can't use the macro
137  // HISTORY_URL_ROW_FIELDS because that specifies the table name which is
138  // invalid in the insert syntax.
139  #define ADDURL_COMMON_SUFFIX \
140      " (url, title, visit_count, typed_count, "\
141      "last_visit_time, hidden) "\
142      "VALUES (?,?,?,?,?,?)"
143  const char* statement_name;
144  const char* statement_sql;
145  if (is_temporary) {
146    statement_name = "AddURLTemporary";
147    statement_sql = "INSERT INTO temp_urls" ADDURL_COMMON_SUFFIX;
148  } else {
149    statement_name = "AddURL";
150    statement_sql = "INSERT INTO urls" ADDURL_COMMON_SUFFIX;
151  }
152  #undef ADDURL_COMMON_SUFFIX
153
154  sql::Statement statement(GetDB().GetCachedStatement(
155      sql::StatementID(statement_name), statement_sql));
156  statement.BindString(0, GURLToDatabaseURL(info.url()));
157  statement.BindString16(1, info.title());
158  statement.BindInt(2, info.visit_count());
159  statement.BindInt(3, info.typed_count());
160  statement.BindInt64(4, info.last_visit().ToInternalValue());
161  statement.BindInt(5, info.hidden() ? 1 : 0);
162
163  if (!statement.Run()) {
164    VLOG(0) << "Failed to add url " << info.url().possibly_invalid_spec()
165            << " to table history.urls.";
166    return 0;
167  }
168  return GetDB().GetLastInsertRowId();
169}
170
171bool URLDatabase::DeleteURLRow(URLID id) {
172  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
173      "DELETE FROM urls WHERE id = ?"));
174  statement.BindInt64(0, id);
175
176  if (!statement.Run())
177    return false;
178
179  // And delete any keyword visits.
180  if (!has_keyword_search_terms_)
181    return true;
182
183  sql::Statement del_keyword_visit(GetDB().GetCachedStatement(SQL_FROM_HERE,
184                          "DELETE FROM keyword_search_terms WHERE url_id=?"));
185  del_keyword_visit.BindInt64(0, id);
186
187  return del_keyword_visit.Run();
188}
189
190bool URLDatabase::CreateTemporaryURLTable() {
191  return CreateURLTable(true);
192}
193
194bool URLDatabase::CommitTemporaryURLTable() {
195  // See the comments in the header file as well as
196  // HistoryBackend::DeleteAllHistory() for more information on how this works
197  // and why it does what it does.
198  //
199  // Note that the main database overrides this to additionally create the
200  // supplimentary indices that the archived database doesn't need.
201
202  // Swap the url table out and replace it with the temporary one.
203  if (!GetDB().Execute("DROP TABLE urls")) {
204    NOTREACHED() << GetDB().GetErrorMessage();
205    return false;
206  }
207  if (!GetDB().Execute("ALTER TABLE temp_urls RENAME TO urls")) {
208    NOTREACHED() << GetDB().GetErrorMessage();
209    return false;
210  }
211
212  // Create the index over URLs. This is needed for the main, in-memory, and
213  // archived databases, so we always do it. The supplimentary indices used by
214  // the main database are not created here. When deleting all history, they
215  // are created by HistoryDatabase::RecreateAllButStarAndURLTables().
216  CreateMainURLIndex();
217
218  return true;
219}
220
221bool URLDatabase::InitURLEnumeratorForEverything(URLEnumerator* enumerator) {
222  DCHECK(!enumerator->initialized_);
223  std::string sql("SELECT ");
224  sql.append(kURLRowFields);
225  sql.append(" FROM urls");
226  enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str()));
227  enumerator->initialized_ = enumerator->statement_.is_valid();
228  return enumerator->statement_.is_valid();
229}
230
231bool URLDatabase::InitURLEnumeratorForSignificant(URLEnumerator* enumerator) {
232  DCHECK(!enumerator->initialized_);
233  std::string sql("SELECT ");
234  sql.append(kURLRowFields);
235  sql.append(" FROM urls WHERE last_visit_time >= ? OR visit_count >= ? OR "
236             "typed_count >= ?");
237  enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str()));
238  enumerator->statement_.BindInt64(
239      0, AutocompleteAgeThreshold().ToInternalValue());
240  enumerator->statement_.BindInt(1, kLowQualityMatchVisitLimit);
241  enumerator->statement_.BindInt(2, kLowQualityMatchTypedLimit);
242  enumerator->initialized_ = enumerator->statement_.is_valid();
243  return enumerator->statement_.is_valid();
244}
245
246bool URLDatabase::AutocompleteForPrefix(const std::string& prefix,
247                                        size_t max_results,
248                                        bool typed_only,
249                                        URLRows* results) {
250  // NOTE: this query originally sorted by starred as the second parameter. But
251  // as bookmarks is no longer part of the db we no longer include the order
252  // by clause.
253  results->clear();
254  const char* sql;
255  int line;
256  if (typed_only) {
257    sql = "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls "
258        "WHERE url >= ? AND url < ? AND hidden = 0 AND typed_count > 0 "
259        "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC "
260        "LIMIT ?";
261    line = __LINE__;
262  } else {
263    sql = "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls "
264        "WHERE url >= ? AND url < ? AND hidden = 0 "
265        "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC "
266        "LIMIT ?";
267    line = __LINE__;
268  }
269  sql::Statement statement(
270      GetDB().GetCachedStatement(sql::StatementID(__FILE__, line), sql));
271
272  // We will find all strings between "prefix" and this string, which is prefix
273  // followed by the maximum character size. Use 8-bit strings for everything
274  // so we can be sure sqlite is comparing everything in 8-bit mode. Otherwise,
275  // it will have to convert strings either to UTF-8 or UTF-16 for comparison.
276  std::string end_query(prefix);
277  end_query.push_back(std::numeric_limits<unsigned char>::max());
278
279  statement.BindString(0, prefix);
280  statement.BindString(1, end_query);
281  statement.BindInt(2, static_cast<int>(max_results));
282
283  while (statement.Step()) {
284    history::URLRow info;
285    FillURLRow(statement, &info);
286    if (info.url().is_valid())
287      results->push_back(info);
288  }
289  return !results->empty();
290}
291
292bool URLDatabase::IsTypedHost(const std::string& host) {
293  const char* schemes[] = {
294    content::kHttpScheme,
295    content::kHttpsScheme,
296    chrome::kFtpScheme
297  };
298  URLRows dummy;
299  for (size_t i = 0; i < arraysize(schemes); ++i) {
300    std::string scheme_and_host(schemes[i]);
301    scheme_and_host += content::kStandardSchemeSeparator + host;
302    if (AutocompleteForPrefix(scheme_and_host + '/', 1, true, &dummy) ||
303        AutocompleteForPrefix(scheme_and_host + ':', 1, true, &dummy))
304      return true;
305  }
306  return false;
307}
308
309bool URLDatabase::FindShortestURLFromBase(const std::string& base,
310                                          const std::string& url,
311                                          int min_visits,
312                                          int min_typed,
313                                          bool allow_base,
314                                          history::URLRow* info) {
315  // Select URLs that start with |base| and are prefixes of |url|.  All parts
316  // of this query except the substr() call can be done using the index.  We
317  // could do this query with a couple of LIKE or GLOB statements as well, but
318  // those wouldn't use the index, and would run into problems with "wildcard"
319  // characters that appear in URLs (% for LIKE, or *, ? for GLOB).
320  std::string sql("SELECT ");
321  sql.append(kURLRowFields);
322  sql.append(" FROM urls WHERE url ");
323  sql.append(allow_base ? ">=" : ">");
324  sql.append(" ? AND url < :end AND url = substr(:end, 1, length(url)) "
325             "AND hidden = 0 AND visit_count >= ? AND typed_count >= ? "
326             "ORDER BY url LIMIT 1");
327  sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str()));
328  statement.BindString(0, base);
329  statement.BindString(1, url);   // :end
330  statement.BindInt(2, min_visits);
331  statement.BindInt(3, min_typed);
332
333  if (!statement.Step())
334    return false;
335
336  DCHECK(info);
337  FillURLRow(statement, info);
338  return true;
339}
340
341bool URLDatabase::GetTextMatches(const string16& query,
342                                 URLRows* results) {
343  ScopedVector<QueryNode> query_nodes;
344  query_parser_.ParseQueryNodes(query, &query_nodes.get());
345
346  results->clear();
347  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
348      "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE hidden = 0"));
349
350  while (statement.Step()) {
351    std::vector<QueryWord> query_words;
352    string16 url = base::i18n::ToLower(statement.ColumnString16(1));
353    query_parser_.ExtractQueryWords(url, &query_words);
354    GURL gurl(url);
355    if (gurl.is_valid()) {
356      // Decode punycode to match IDN.
357      // |query_words| won't be shown to user - therefore we can use empty
358      // |languages| to reduce dependency (no need to call PrefService).
359      string16 ascii = base::ASCIIToUTF16(gurl.host());
360      string16 utf = net::IDNToUnicode(gurl.host(), std::string());
361      if (ascii != utf)
362        query_parser_.ExtractQueryWords(utf, &query_words);
363    }
364    string16 title = base::i18n::ToLower(statement.ColumnString16(2));
365    query_parser_.ExtractQueryWords(title, &query_words);
366
367    if (query_parser_.DoesQueryMatch(query_words, query_nodes.get())) {
368      history::URLResult info;
369      FillURLRow(statement, &info);
370      if (info.url().is_valid())
371        results->push_back(info);
372    }
373  }
374  return !results->empty();
375}
376
377bool URLDatabase::InitKeywordSearchTermsTable() {
378  has_keyword_search_terms_ = true;
379  if (!GetDB().DoesTableExist("keyword_search_terms")) {
380    if (!GetDB().Execute("CREATE TABLE keyword_search_terms ("
381        "keyword_id INTEGER NOT NULL,"      // ID of the TemplateURL.
382        "url_id INTEGER NOT NULL,"          // ID of the url.
383        "lower_term LONGVARCHAR NOT NULL,"  // The search term, in lower case.
384        "term LONGVARCHAR NOT NULL)"))      // The actual search term.
385      return false;
386  }
387  return true;
388}
389
390bool URLDatabase::CreateKeywordSearchTermsIndices() {
391  // For searching.
392  if (!GetDB().Execute(
393          "CREATE INDEX IF NOT EXISTS keyword_search_terms_index1 ON "
394          "keyword_search_terms (keyword_id, lower_term)")) {
395    return false;
396  }
397
398  // For deletion.
399  if (!GetDB().Execute(
400          "CREATE INDEX IF NOT EXISTS keyword_search_terms_index2 ON "
401          "keyword_search_terms (url_id)")) {
402    return false;
403  }
404
405  // For query or deletion by term.
406  if (!GetDB().Execute(
407          "CREATE INDEX IF NOT EXISTS keyword_search_terms_index3 ON "
408          "keyword_search_terms (term)")) {
409    return false;
410  }
411  return true;
412}
413
414bool URLDatabase::DropKeywordSearchTermsTable() {
415  // This will implicitly delete the indices over the table.
416  return GetDB().Execute("DROP TABLE keyword_search_terms");
417}
418
419bool URLDatabase::SetKeywordSearchTermsForURL(URLID url_id,
420                                              TemplateURLID keyword_id,
421                                              const string16& term) {
422  DCHECK(url_id && keyword_id && !term.empty());
423
424  sql::Statement exist_statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
425      "SELECT term FROM keyword_search_terms "
426      "WHERE keyword_id = ? AND url_id = ?"));
427  exist_statement.BindInt64(0, keyword_id);
428  exist_statement.BindInt64(1, url_id);
429
430  if (exist_statement.Step())
431    return true;  // Term already exists, no need to add it.
432
433  if (!exist_statement.Succeeded())
434    return false;
435
436  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
437      "INSERT INTO keyword_search_terms (keyword_id, url_id, lower_term, term) "
438      "VALUES (?,?,?,?)"));
439  statement.BindInt64(0, keyword_id);
440  statement.BindInt64(1, url_id);
441  statement.BindString16(2, base::i18n::ToLower(term));
442  statement.BindString16(3, term);
443  return statement.Run();
444}
445
446bool URLDatabase::GetKeywordSearchTermRow(URLID url_id,
447                                          KeywordSearchTermRow* row) {
448  DCHECK(url_id);
449  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
450      "SELECT keyword_id, term FROM keyword_search_terms WHERE url_id=?"));
451  statement.BindInt64(0, url_id);
452
453  if (!statement.Step())
454    return false;
455
456  if (row) {
457    row->url_id = url_id;
458    row->keyword_id = statement.ColumnInt64(0);
459    row->term = statement.ColumnString16(1);
460  }
461  return true;
462}
463
464bool URLDatabase::GetKeywordSearchTermRows(
465    const string16& term,
466    std::vector<KeywordSearchTermRow>* rows) {
467  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
468      "SELECT keyword_id, url_id FROM keyword_search_terms WHERE term=?"));
469  statement.BindString16(0, term);
470
471  if (!statement.is_valid())
472    return false;
473
474  while (statement.Step()) {
475    KeywordSearchTermRow row;
476    row.url_id = statement.ColumnInt64(1);
477    row.keyword_id = statement.ColumnInt64(0);
478    row.term = term;
479    rows->push_back(row);
480  }
481  return true;
482}
483
484void URLDatabase::DeleteAllSearchTermsForKeyword(
485    TemplateURLID keyword_id) {
486  DCHECK(keyword_id);
487  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
488      "DELETE FROM keyword_search_terms WHERE keyword_id=?"));
489  statement.BindInt64(0, keyword_id);
490
491  statement.Run();
492}
493
494void URLDatabase::GetMostRecentKeywordSearchTerms(
495    TemplateURLID keyword_id,
496    const string16& prefix,
497    int max_count,
498    std::vector<KeywordSearchTermVisit>* matches) {
499  // NOTE: the keyword_id can be zero if on first run the user does a query
500  // before the TemplateURLService has finished loading. As the chances of this
501  // occurring are small, we ignore it.
502  if (!keyword_id)
503    return;
504
505  DCHECK(!prefix.empty());
506  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
507      "SELECT DISTINCT kv.term, u.visit_count, u.last_visit_time "
508      "FROM keyword_search_terms kv "
509      "JOIN urls u ON kv.url_id = u.id "
510      "WHERE kv.keyword_id = ? AND kv.lower_term >= ? AND kv.lower_term < ? "
511      "ORDER BY u.last_visit_time DESC LIMIT ?"));
512
513  // NOTE: Keep this ToLower() call in sync with search_provider.cc.
514  string16 lower_prefix = base::i18n::ToLower(prefix);
515  // This magic gives us a prefix search.
516  string16 next_prefix = lower_prefix;
517  next_prefix[next_prefix.size() - 1] =
518      next_prefix[next_prefix.size() - 1] + 1;
519  statement.BindInt64(0, keyword_id);
520  statement.BindString16(1, lower_prefix);
521  statement.BindString16(2, next_prefix);
522  statement.BindInt(3, max_count);
523
524  KeywordSearchTermVisit visit;
525  while (statement.Step()) {
526    visit.term = statement.ColumnString16(0);
527    visit.visits = statement.ColumnInt(1);
528    visit.time = base::Time::FromInternalValue(statement.ColumnInt64(2));
529    matches->push_back(visit);
530  }
531}
532
533bool URLDatabase::DeleteKeywordSearchTerm(const string16& term) {
534  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
535      "DELETE FROM keyword_search_terms WHERE term=?"));
536  statement.BindString16(0, term);
537
538  return statement.Run();
539}
540
541bool URLDatabase::DropStarredIDFromURLs() {
542  if (!GetDB().DoesColumnExist("urls", "starred_id"))
543    return true;  // urls is already updated, no need to continue.
544
545  // Create a temporary table to contain the new URLs table.
546  if (!CreateTemporaryURLTable()) {
547    NOTREACHED();
548    return false;
549  }
550
551  // Copy the contents.
552  if (!GetDB().Execute(
553      "INSERT INTO temp_urls (id, url, title, visit_count, typed_count, "
554      "last_visit_time, hidden, favicon_id) "
555      "SELECT id, url, title, visit_count, typed_count, last_visit_time, "
556      "hidden, favicon_id FROM urls")) {
557    NOTREACHED() << GetDB().GetErrorMessage();
558    return false;
559  }
560
561  // Rename/commit the tmp table.
562  CommitTemporaryURLTable();
563
564  return true;
565}
566
567bool URLDatabase::CreateURLTable(bool is_temporary) {
568  const char* name = is_temporary ? "temp_urls" : "urls";
569  if (GetDB().DoesTableExist(name))
570    return true;
571
572  std::string sql;
573  sql.append("CREATE TABLE ");
574  sql.append(name);
575  sql.append("("
576      "id INTEGER PRIMARY KEY,"
577      "url LONGVARCHAR,"
578      "title LONGVARCHAR,"
579      "visit_count INTEGER DEFAULT 0 NOT NULL,"
580      "typed_count INTEGER DEFAULT 0 NOT NULL,"
581      "last_visit_time INTEGER NOT NULL,"
582      "hidden INTEGER DEFAULT 0 NOT NULL,"
583      "favicon_id INTEGER DEFAULT 0 NOT NULL)"); // favicon_id is not used now.
584
585  return GetDB().Execute(sql.c_str());
586}
587
588bool URLDatabase::CreateMainURLIndex() {
589  // Index over URLs so we can quickly look up based on URL.
590  return GetDB().Execute(
591      "CREATE INDEX IF NOT EXISTS urls_url_index ON urls (url)");
592}
593
594}  // namespace history
595