history_provider.cc revision f2477e01787aa58f445919b809d89e252beef54f
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/autocomplete/history_provider.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string_util.h"
10868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/utf_string_conversions.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/autocomplete/autocomplete_input.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/autocomplete/autocomplete_match.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/autocomplete/autocomplete_provider_listener.h"
142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/browser/history/history_service.h"
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/history/history_service_factory.h"
16f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/browser/history/in_memory_url_index_types.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/profiles/profile.h"
18558790d6acca3451cf3a6b497803a5f07d0bec58Ben Murdoch#include "chrome/common/net/url_fixer_upper.h"
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/url_constants.h"
20eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "url/url_util.h"
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)HistoryProvider::HistoryProvider(AutocompleteProviderListener* listener,
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 Profile* profile,
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 AutocompleteProvider::Type type)
257dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    : AutocompleteProvider(listener, profile, type) {
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void HistoryProvider::DeleteMatch(const AutocompleteMatch& match) {
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(done_);
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(profile_);
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(match.deletable);
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  HistoryService* const history_service =
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS);
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Delete the match from the history DB.
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(history_service);
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(match.destination_url.is_valid());
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  history_service->DeleteURL(match.destination_url);
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DeleteMatchFromMatches(match);
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)HistoryProvider::~HistoryProvider() {}
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void HistoryProvider::DeleteMatchFromMatches(const AutocompleteMatch& match) {
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool found = false;
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (ACMatches::iterator i(matches_.begin()); i != matches_.end(); ++i) {
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (i->destination_url == match.destination_url && i->type == match.type) {
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      found = true;
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (i->is_history_what_you_typed_match || i->starred) {
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We can't get rid of What-You-Typed or Bookmarked matches,
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // but we can make them look like they have no backing data.
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        i->deletable = false;
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        i->description.clear();
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        i->description_class.clear();
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        matches_.erase(i);
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(found) << "Asked to delete a URL that isn't in our set of matches";
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  listener_->OnProviderUpdate(true);
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool HistoryProvider::FixupUserInput(AutocompleteInput* input) {
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const string16& input_text = input->text();
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Fixup and canonicalize user input.
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const GURL canonical_gurl(URLFixerUpper::FixupURL(UTF16ToUTF8(input_text),
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                    std::string()));
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string canonical_gurl_str(canonical_gurl.possibly_invalid_spec());
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (canonical_gurl_str.empty()) {
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // This probably won't happen, but there are no guarantees.
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If the user types a number, GURL will convert it to a dotted quad.
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // However, if the parser did not mark this as a URL, then the user probably
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // didn't intend this interpretation.  Since this can break history matching
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // for hostname beginning with numbers (e.g. input of "17173" will be matched
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // against "0.0.67.21" instead of the original "17173", failing to find
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "17173.com"), swap the original hostname in for the fixed-up one.
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ((input->type() != AutocompleteInput::URL) &&
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      canonical_gurl.HostIsIPAddress()) {
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::string original_hostname =
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        UTF16ToUTF8(input_text.substr(input->parts().host.begin,
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      input->parts().host.len));
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const url_parse::Parsed& parts =
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        canonical_gurl.parsed_for_possibly_invalid_spec();
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // parts.host must not be empty when HostIsIPAddress() is true.
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DCHECK(parts.host.is_nonempty());
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    canonical_gurl_str.replace(parts.host.begin, parts.host.len,
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               original_hostname);
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string16 output = UTF8ToUTF16(canonical_gurl_str);
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Don't prepend a scheme when the user didn't have one.  Since the fixer
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // upper only prepends the "http" scheme, that's all we need to check for.
990f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  if (!AutocompleteInput::HasHTTPScheme(input_text))
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    TrimHttpPrefix(&output);
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Make the number of trailing slashes on the output exactly match the input.
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Examples of why not doing this would matter:
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // * The user types "a" and has this fixed up to "a/".  Now no other sites
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   beginning with "a" will match.
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // * The user types "file:" and has this fixed up to "file://".  Now inline
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   autocomplete will append too few slashes, resulting in e.g. "file:/b..."
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   instead of "file:///b..."
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // * The user types "http:/" and has this fixed up to "http:".  Now inline
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   autocomplete will append too many slashes, resulting in e.g.
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //   "http:///c..." instead of "http://c...".
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // NOTE: We do this after calling TrimHttpPrefix() since that can strip
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // trailing slashes (if the scheme is the only thing in the input).  It's not
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // clear that the result of fixup really matters in this case, but there's no
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // harm in making sure.
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t last_input_nonslash =
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      input_text.find_last_not_of(ASCIIToUTF16("/\\"));
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t num_input_slashes = (last_input_nonslash == string16::npos) ?
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      input_text.length() : (input_text.length() - 1 - last_input_nonslash);
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t last_output_nonslash =
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      output.find_last_not_of(ASCIIToUTF16("/\\"));
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t num_output_slashes =
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      (last_output_nonslash == string16::npos) ?
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      output.length() : (output.length() - 1 - last_output_nonslash);
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (num_output_slashes < num_input_slashes)
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    output.append(num_input_slashes - num_output_slashes, '/');
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  else if (num_output_slashes > num_input_slashes)
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    output.erase(output.length() - num_output_slashes + num_input_slashes);
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  url_parse::Parsed parts;
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  URLFixerUpper::SegmentURL(output, &parts);
1322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  input->UpdateText(output, string16::npos, parts);
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return !output.empty();
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)size_t HistoryProvider::TrimHttpPrefix(string16* url) {
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Find any "http:".
1390f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  if (!AutocompleteInput::HasHTTPScheme(*url))
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return 0;
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t scheme_pos =
142d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)      url->find(ASCIIToUTF16(content::kHttpScheme) + char16(':'));
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_NE(string16::npos, scheme_pos);
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Erase scheme plus up to two slashes.
146d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)  size_t prefix_end = scheme_pos + strlen(content::kHttpScheme) + 1;
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t after_slashes = std::min(url->length(), prefix_end + 2);
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while ((prefix_end < after_slashes) && ((*url)[prefix_end] == '/'))
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++prefix_end;
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  url->erase(scheme_pos, prefix_end - scheme_pos);
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return (scheme_pos == 0) ? prefix_end : 0;
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool HistoryProvider::PreventInlineAutocomplete(
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const AutocompleteInput& input) {
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return input.prevent_inline_autocomplete() ||
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      (!input.text().empty() &&
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       IsWhitespace(input.text()[input.text().length() - 1]));
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
161f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
162f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// static
163f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)ACMatchClassifications HistoryProvider::SpansFromTermMatch(
164f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    const history::TermMatches& matches,
165f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    size_t text_length,
166f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    bool is_url) {
167f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ACMatchClassification::Style url_style =
168f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      is_url ? ACMatchClassification::URL : ACMatchClassification::NONE;
169f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ACMatchClassifications spans;
170f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  if (matches.empty()) {
171f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if (text_length)
172f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      spans.push_back(ACMatchClassification(0, url_style));
173f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return spans;
174f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
175f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  if (matches[0].offset)
176f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    spans.push_back(ACMatchClassification(0, url_style));
177f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  size_t match_count = matches.size();
178f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  for (size_t i = 0; i < match_count;) {
179f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    size_t offset = matches[i].offset;
180f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    spans.push_back(ACMatchClassification(offset,
181f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        ACMatchClassification::MATCH | url_style));
182f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    // Skip all adjacent matches.
183f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    do {
184f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      offset += matches[i].length;
185f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      ++i;
186f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    } while ((i < match_count) && (offset == matches[i].offset));
187f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if (offset < text_length)
188f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      spans.push_back(ACMatchClassification(offset, url_style));
189f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
190f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
191f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  return spans;
192f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
193f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
194