url_database.cc revision 4e180b6a0b4720a9b8e9e959a882386f690f08ff
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/history/url_database.h" 6 7#include <algorithm> 8#include <limits> 9#include <string> 10#include <vector> 11 12#include "base/i18n/case_conversion.h" 13#include "base/strings/utf_string_conversions.h" 14#include "chrome/common/url_constants.h" 15#include "net/base/net_util.h" 16#include "sql/statement.h" 17#include "ui/base/l10n/l10n_util.h" 18#include "url/gurl.h" 19 20namespace history { 21 22const char URLDatabase::kURLRowFields[] = HISTORY_URL_ROW_FIELDS; 23const int URLDatabase::kNumURLRowFields = 9; 24 25URLDatabase::URLEnumeratorBase::URLEnumeratorBase() 26 : initialized_(false) { 27} 28 29URLDatabase::URLEnumeratorBase::~URLEnumeratorBase() { 30} 31 32URLDatabase::URLEnumerator::URLEnumerator() { 33} 34 35bool URLDatabase::URLEnumerator::GetNextURL(URLRow* r) { 36 if (statement_.Step()) { 37 FillURLRow(statement_, r); 38 return true; 39 } 40 return false; 41} 42 43URLDatabase::URLDatabase() 44 : has_keyword_search_terms_(false) { 45} 46 47URLDatabase::~URLDatabase() { 48} 49 50// static 51std::string URLDatabase::GURLToDatabaseURL(const GURL& gurl) { 52 // TODO(brettw): do something fancy here with encoding, etc. 53 54 // Strip username and password from URL before sending to DB. 55 GURL::Replacements replacements; 56 replacements.ClearUsername(); 57 replacements.ClearPassword(); 58 59 return (gurl.ReplaceComponents(replacements)).spec(); 60} 61 62// Convenience to fill a history::URLRow. Must be in sync with the fields in 63// kURLRowFields. 64void URLDatabase::FillURLRow(sql::Statement& s, history::URLRow* i) { 65 DCHECK(i); 66 i->id_ = s.ColumnInt64(0); 67 i->url_ = GURL(s.ColumnString(1)); 68 i->title_ = s.ColumnString16(2); 69 i->visit_count_ = s.ColumnInt(3); 70 i->typed_count_ = s.ColumnInt(4); 71 i->last_visit_ = base::Time::FromInternalValue(s.ColumnInt64(5)); 72 i->hidden_ = s.ColumnInt(6) != 0; 73} 74 75bool URLDatabase::GetURLRow(URLID url_id, URLRow* info) { 76 // TODO(brettw) We need check for empty URLs to handle the case where 77 // there are old URLs in the database that are empty that got in before 78 // we added any checks. We should eventually be able to remove it 79 // when all inputs are using GURL (which prohibit empty input). 80 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 81 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE id=?")); 82 statement.BindInt64(0, url_id); 83 84 if (statement.Step()) { 85 FillURLRow(statement, info); 86 return true; 87 } 88 return false; 89} 90 91bool URLDatabase::GetAllTypedUrls(URLRows* urls) { 92 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 93 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE typed_count > 0")); 94 95 while (statement.Step()) { 96 URLRow info; 97 FillURLRow(statement, &info); 98 urls->push_back(info); 99 } 100 return true; 101} 102 103URLID URLDatabase::GetRowForURL(const GURL& url, history::URLRow* info) { 104 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 105 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE url=?")); 106 std::string url_string = GURLToDatabaseURL(url); 107 statement.BindString(0, url_string); 108 109 if (!statement.Step()) 110 return 0; // no data 111 112 if (info) 113 FillURLRow(statement, info); 114 return statement.ColumnInt64(0); 115} 116 117bool URLDatabase::UpdateURLRow(URLID url_id, 118 const history::URLRow& info) { 119 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 120 "UPDATE urls SET title=?,visit_count=?,typed_count=?,last_visit_time=?," 121 "hidden=?" 122 "WHERE id=?")); 123 statement.BindString16(0, info.title()); 124 statement.BindInt(1, info.visit_count()); 125 statement.BindInt(2, info.typed_count()); 126 statement.BindInt64(3, info.last_visit().ToInternalValue()); 127 statement.BindInt(4, info.hidden() ? 1 : 0); 128 statement.BindInt64(5, url_id); 129 130 return statement.Run(); 131} 132 133URLID URLDatabase::AddURLInternal(const history::URLRow& info, 134 bool is_temporary) { 135 // This function is used to insert into two different tables, so we have to 136 // do some shuffling. Unfortinately, we can't use the macro 137 // HISTORY_URL_ROW_FIELDS because that specifies the table name which is 138 // invalid in the insert syntax. 139 #define ADDURL_COMMON_SUFFIX \ 140 " (url, title, visit_count, typed_count, "\ 141 "last_visit_time, hidden) "\ 142 "VALUES (?,?,?,?,?,?)" 143 const char* statement_name; 144 const char* statement_sql; 145 if (is_temporary) { 146 statement_name = "AddURLTemporary"; 147 statement_sql = "INSERT INTO temp_urls" ADDURL_COMMON_SUFFIX; 148 } else { 149 statement_name = "AddURL"; 150 statement_sql = "INSERT INTO urls" ADDURL_COMMON_SUFFIX; 151 } 152 #undef ADDURL_COMMON_SUFFIX 153 154 sql::Statement statement(GetDB().GetCachedStatement( 155 sql::StatementID(statement_name), statement_sql)); 156 statement.BindString(0, GURLToDatabaseURL(info.url())); 157 statement.BindString16(1, info.title()); 158 statement.BindInt(2, info.visit_count()); 159 statement.BindInt(3, info.typed_count()); 160 statement.BindInt64(4, info.last_visit().ToInternalValue()); 161 statement.BindInt(5, info.hidden() ? 1 : 0); 162 163 if (!statement.Run()) { 164 VLOG(0) << "Failed to add url " << info.url().possibly_invalid_spec() 165 << " to table history.urls."; 166 return 0; 167 } 168 return GetDB().GetLastInsertRowId(); 169} 170 171bool URLDatabase::DeleteURLRow(URLID id) { 172 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 173 "DELETE FROM urls WHERE id = ?")); 174 statement.BindInt64(0, id); 175 176 if (!statement.Run()) 177 return false; 178 179 // And delete any keyword visits. 180 if (!has_keyword_search_terms_) 181 return true; 182 183 sql::Statement del_keyword_visit(GetDB().GetCachedStatement(SQL_FROM_HERE, 184 "DELETE FROM keyword_search_terms WHERE url_id=?")); 185 del_keyword_visit.BindInt64(0, id); 186 187 return del_keyword_visit.Run(); 188} 189 190bool URLDatabase::CreateTemporaryURLTable() { 191 return CreateURLTable(true); 192} 193 194bool URLDatabase::CommitTemporaryURLTable() { 195 // See the comments in the header file as well as 196 // HistoryBackend::DeleteAllHistory() for more information on how this works 197 // and why it does what it does. 198 // 199 // Note that the main database overrides this to additionally create the 200 // supplimentary indices that the archived database doesn't need. 201 202 // Swap the url table out and replace it with the temporary one. 203 if (!GetDB().Execute("DROP TABLE urls")) { 204 NOTREACHED() << GetDB().GetErrorMessage(); 205 return false; 206 } 207 if (!GetDB().Execute("ALTER TABLE temp_urls RENAME TO urls")) { 208 NOTREACHED() << GetDB().GetErrorMessage(); 209 return false; 210 } 211 212 // Create the index over URLs. This is needed for the main, in-memory, and 213 // archived databases, so we always do it. The supplimentary indices used by 214 // the main database are not created here. When deleting all history, they 215 // are created by HistoryDatabase::RecreateAllButStarAndURLTables(). 216 CreateMainURLIndex(); 217 218 return true; 219} 220 221bool URLDatabase::InitURLEnumeratorForEverything(URLEnumerator* enumerator) { 222 DCHECK(!enumerator->initialized_); 223 std::string sql("SELECT "); 224 sql.append(kURLRowFields); 225 sql.append(" FROM urls"); 226 enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str())); 227 enumerator->initialized_ = enumerator->statement_.is_valid(); 228 return enumerator->statement_.is_valid(); 229} 230 231bool URLDatabase::InitURLEnumeratorForSignificant(URLEnumerator* enumerator) { 232 DCHECK(!enumerator->initialized_); 233 std::string sql("SELECT "); 234 sql.append(kURLRowFields); 235 sql.append(" FROM urls WHERE last_visit_time >= ? OR visit_count >= ? OR " 236 "typed_count >= ?"); 237 enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str())); 238 enumerator->statement_.BindInt64( 239 0, AutocompleteAgeThreshold().ToInternalValue()); 240 enumerator->statement_.BindInt(1, kLowQualityMatchVisitLimit); 241 enumerator->statement_.BindInt(2, kLowQualityMatchTypedLimit); 242 enumerator->initialized_ = enumerator->statement_.is_valid(); 243 return enumerator->statement_.is_valid(); 244} 245 246bool URLDatabase::AutocompleteForPrefix(const std::string& prefix, 247 size_t max_results, 248 bool typed_only, 249 URLRows* results) { 250 // NOTE: this query originally sorted by starred as the second parameter. But 251 // as bookmarks is no longer part of the db we no longer include the order 252 // by clause. 253 results->clear(); 254 const char* sql; 255 int line; 256 if (typed_only) { 257 sql = "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls " 258 "WHERE url >= ? AND url < ? AND hidden = 0 AND typed_count > 0 " 259 "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC " 260 "LIMIT ?"; 261 line = __LINE__; 262 } else { 263 sql = "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls " 264 "WHERE url >= ? AND url < ? AND hidden = 0 " 265 "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC " 266 "LIMIT ?"; 267 line = __LINE__; 268 } 269 sql::Statement statement( 270 GetDB().GetCachedStatement(sql::StatementID(__FILE__, line), sql)); 271 272 // We will find all strings between "prefix" and this string, which is prefix 273 // followed by the maximum character size. Use 8-bit strings for everything 274 // so we can be sure sqlite is comparing everything in 8-bit mode. Otherwise, 275 // it will have to convert strings either to UTF-8 or UTF-16 for comparison. 276 std::string end_query(prefix); 277 end_query.push_back(std::numeric_limits<unsigned char>::max()); 278 279 statement.BindString(0, prefix); 280 statement.BindString(1, end_query); 281 statement.BindInt(2, static_cast<int>(max_results)); 282 283 while (statement.Step()) { 284 history::URLRow info; 285 FillURLRow(statement, &info); 286 if (info.url().is_valid()) 287 results->push_back(info); 288 } 289 return !results->empty(); 290} 291 292bool URLDatabase::IsTypedHost(const std::string& host) { 293 const char* schemes[] = { 294 content::kHttpScheme, 295 content::kHttpsScheme, 296 chrome::kFtpScheme 297 }; 298 URLRows dummy; 299 for (size_t i = 0; i < arraysize(schemes); ++i) { 300 std::string scheme_and_host(schemes[i]); 301 scheme_and_host += content::kStandardSchemeSeparator + host; 302 if (AutocompleteForPrefix(scheme_and_host + '/', 1, true, &dummy) || 303 AutocompleteForPrefix(scheme_and_host + ':', 1, true, &dummy)) 304 return true; 305 } 306 return false; 307} 308 309bool URLDatabase::FindShortestURLFromBase(const std::string& base, 310 const std::string& url, 311 int min_visits, 312 int min_typed, 313 bool allow_base, 314 history::URLRow* info) { 315 // Select URLs that start with |base| and are prefixes of |url|. All parts 316 // of this query except the substr() call can be done using the index. We 317 // could do this query with a couple of LIKE or GLOB statements as well, but 318 // those wouldn't use the index, and would run into problems with "wildcard" 319 // characters that appear in URLs (% for LIKE, or *, ? for GLOB). 320 std::string sql("SELECT "); 321 sql.append(kURLRowFields); 322 sql.append(" FROM urls WHERE url "); 323 sql.append(allow_base ? ">=" : ">"); 324 sql.append(" ? AND url < :end AND url = substr(:end, 1, length(url)) " 325 "AND hidden = 0 AND visit_count >= ? AND typed_count >= ? " 326 "ORDER BY url LIMIT 1"); 327 sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str())); 328 statement.BindString(0, base); 329 statement.BindString(1, url); // :end 330 statement.BindInt(2, min_visits); 331 statement.BindInt(3, min_typed); 332 333 if (!statement.Step()) 334 return false; 335 336 DCHECK(info); 337 FillURLRow(statement, info); 338 return true; 339} 340 341bool URLDatabase::GetTextMatches(const string16& query, 342 URLRows* results) { 343 ScopedVector<QueryNode> query_nodes; 344 query_parser_.ParseQueryNodes(query, &query_nodes.get()); 345 346 results->clear(); 347 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 348 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE hidden = 0")); 349 350 while (statement.Step()) { 351 std::vector<QueryWord> query_words; 352 string16 url = base::i18n::ToLower(statement.ColumnString16(1)); 353 query_parser_.ExtractQueryWords(url, &query_words); 354 GURL gurl(url); 355 if (gurl.is_valid()) { 356 // Decode punycode to match IDN. 357 // |query_words| won't be shown to user - therefore we can use empty 358 // |languages| to reduce dependency (no need to call PrefService). 359 string16 ascii = base::ASCIIToUTF16(gurl.host()); 360 string16 utf = net::IDNToUnicode(gurl.host(), std::string()); 361 if (ascii != utf) 362 query_parser_.ExtractQueryWords(utf, &query_words); 363 } 364 string16 title = base::i18n::ToLower(statement.ColumnString16(2)); 365 query_parser_.ExtractQueryWords(title, &query_words); 366 367 if (query_parser_.DoesQueryMatch(query_words, query_nodes.get())) { 368 history::URLResult info; 369 FillURLRow(statement, &info); 370 if (info.url().is_valid()) 371 results->push_back(info); 372 } 373 } 374 return !results->empty(); 375} 376 377bool URLDatabase::InitKeywordSearchTermsTable() { 378 has_keyword_search_terms_ = true; 379 if (!GetDB().DoesTableExist("keyword_search_terms")) { 380 if (!GetDB().Execute("CREATE TABLE keyword_search_terms (" 381 "keyword_id INTEGER NOT NULL," // ID of the TemplateURL. 382 "url_id INTEGER NOT NULL," // ID of the url. 383 "lower_term LONGVARCHAR NOT NULL," // The search term, in lower case. 384 "term LONGVARCHAR NOT NULL)")) // The actual search term. 385 return false; 386 } 387 return true; 388} 389 390bool URLDatabase::CreateKeywordSearchTermsIndices() { 391 // For searching. 392 if (!GetDB().Execute( 393 "CREATE INDEX IF NOT EXISTS keyword_search_terms_index1 ON " 394 "keyword_search_terms (keyword_id, lower_term)")) { 395 return false; 396 } 397 398 // For deletion. 399 if (!GetDB().Execute( 400 "CREATE INDEX IF NOT EXISTS keyword_search_terms_index2 ON " 401 "keyword_search_terms (url_id)")) { 402 return false; 403 } 404 405 // For query or deletion by term. 406 if (!GetDB().Execute( 407 "CREATE INDEX IF NOT EXISTS keyword_search_terms_index3 ON " 408 "keyword_search_terms (term)")) { 409 return false; 410 } 411 return true; 412} 413 414bool URLDatabase::DropKeywordSearchTermsTable() { 415 // This will implicitly delete the indices over the table. 416 return GetDB().Execute("DROP TABLE keyword_search_terms"); 417} 418 419bool URLDatabase::SetKeywordSearchTermsForURL(URLID url_id, 420 TemplateURLID keyword_id, 421 const string16& term) { 422 DCHECK(url_id && keyword_id && !term.empty()); 423 424 sql::Statement exist_statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 425 "SELECT term FROM keyword_search_terms " 426 "WHERE keyword_id = ? AND url_id = ?")); 427 exist_statement.BindInt64(0, keyword_id); 428 exist_statement.BindInt64(1, url_id); 429 430 if (exist_statement.Step()) 431 return true; // Term already exists, no need to add it. 432 433 if (!exist_statement.Succeeded()) 434 return false; 435 436 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 437 "INSERT INTO keyword_search_terms (keyword_id, url_id, lower_term, term) " 438 "VALUES (?,?,?,?)")); 439 statement.BindInt64(0, keyword_id); 440 statement.BindInt64(1, url_id); 441 statement.BindString16(2, base::i18n::ToLower(term)); 442 statement.BindString16(3, term); 443 return statement.Run(); 444} 445 446bool URLDatabase::GetKeywordSearchTermRow(URLID url_id, 447 KeywordSearchTermRow* row) { 448 DCHECK(url_id); 449 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 450 "SELECT keyword_id, term FROM keyword_search_terms WHERE url_id=?")); 451 statement.BindInt64(0, url_id); 452 453 if (!statement.Step()) 454 return false; 455 456 if (row) { 457 row->url_id = url_id; 458 row->keyword_id = statement.ColumnInt64(0); 459 row->term = statement.ColumnString16(1); 460 } 461 return true; 462} 463 464bool URLDatabase::GetKeywordSearchTermRows( 465 const string16& term, 466 std::vector<KeywordSearchTermRow>* rows) { 467 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 468 "SELECT keyword_id, url_id FROM keyword_search_terms WHERE term=?")); 469 statement.BindString16(0, term); 470 471 if (!statement.is_valid()) 472 return false; 473 474 while (statement.Step()) { 475 KeywordSearchTermRow row; 476 row.url_id = statement.ColumnInt64(1); 477 row.keyword_id = statement.ColumnInt64(0); 478 row.term = term; 479 rows->push_back(row); 480 } 481 return true; 482} 483 484void URLDatabase::DeleteAllSearchTermsForKeyword( 485 TemplateURLID keyword_id) { 486 DCHECK(keyword_id); 487 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 488 "DELETE FROM keyword_search_terms WHERE keyword_id=?")); 489 statement.BindInt64(0, keyword_id); 490 491 statement.Run(); 492} 493 494void URLDatabase::GetMostRecentKeywordSearchTerms( 495 TemplateURLID keyword_id, 496 const string16& prefix, 497 int max_count, 498 std::vector<KeywordSearchTermVisit>* matches) { 499 // NOTE: the keyword_id can be zero if on first run the user does a query 500 // before the TemplateURLService has finished loading. As the chances of this 501 // occurring are small, we ignore it. 502 if (!keyword_id) 503 return; 504 505 DCHECK(!prefix.empty()); 506 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 507 "SELECT DISTINCT kv.term, u.visit_count, u.last_visit_time " 508 "FROM keyword_search_terms kv " 509 "JOIN urls u ON kv.url_id = u.id " 510 "WHERE kv.keyword_id = ? AND kv.lower_term >= ? AND kv.lower_term < ? " 511 "ORDER BY u.last_visit_time DESC LIMIT ?")); 512 513 // NOTE: Keep this ToLower() call in sync with search_provider.cc. 514 string16 lower_prefix = base::i18n::ToLower(prefix); 515 // This magic gives us a prefix search. 516 string16 next_prefix = lower_prefix; 517 next_prefix[next_prefix.size() - 1] = 518 next_prefix[next_prefix.size() - 1] + 1; 519 statement.BindInt64(0, keyword_id); 520 statement.BindString16(1, lower_prefix); 521 statement.BindString16(2, next_prefix); 522 statement.BindInt(3, max_count); 523 524 KeywordSearchTermVisit visit; 525 while (statement.Step()) { 526 visit.term = statement.ColumnString16(0); 527 visit.visits = statement.ColumnInt(1); 528 visit.time = base::Time::FromInternalValue(statement.ColumnInt64(2)); 529 matches->push_back(visit); 530 } 531} 532 533bool URLDatabase::DeleteKeywordSearchTerm(const string16& term) { 534 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 535 "DELETE FROM keyword_search_terms WHERE term=?")); 536 statement.BindString16(0, term); 537 538 return statement.Run(); 539} 540 541bool URLDatabase::DropStarredIDFromURLs() { 542 if (!GetDB().DoesColumnExist("urls", "starred_id")) 543 return true; // urls is already updated, no need to continue. 544 545 // Create a temporary table to contain the new URLs table. 546 if (!CreateTemporaryURLTable()) { 547 NOTREACHED(); 548 return false; 549 } 550 551 // Copy the contents. 552 if (!GetDB().Execute( 553 "INSERT INTO temp_urls (id, url, title, visit_count, typed_count, " 554 "last_visit_time, hidden, favicon_id) " 555 "SELECT id, url, title, visit_count, typed_count, last_visit_time, " 556 "hidden, favicon_id FROM urls")) { 557 NOTREACHED() << GetDB().GetErrorMessage(); 558 return false; 559 } 560 561 // Rename/commit the tmp table. 562 CommitTemporaryURLTable(); 563 564 return true; 565} 566 567bool URLDatabase::CreateURLTable(bool is_temporary) { 568 const char* name = is_temporary ? "temp_urls" : "urls"; 569 if (GetDB().DoesTableExist(name)) 570 return true; 571 572 std::string sql; 573 sql.append("CREATE TABLE "); 574 sql.append(name); 575 sql.append("(" 576 "id INTEGER PRIMARY KEY," 577 "url LONGVARCHAR," 578 "title LONGVARCHAR," 579 "visit_count INTEGER DEFAULT 0 NOT NULL," 580 "typed_count INTEGER DEFAULT 0 NOT NULL," 581 "last_visit_time INTEGER NOT NULL," 582 "hidden INTEGER DEFAULT 0 NOT NULL," 583 "favicon_id INTEGER DEFAULT 0 NOT NULL)"); // favicon_id is not used now. 584 585 return GetDB().Execute(sql.c_str()); 586} 587 588bool URLDatabase::CreateMainURLIndex() { 589 // Index over URLs so we can quickly look up based on URL. 590 return GetDB().Execute( 591 "CREATE INDEX IF NOT EXISTS urls_url_index ON urls (url)"); 592} 593 594} // namespace history 595