1dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// Use of this source code is governed by a BSD-style license that can be
3731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// found in the LICENSE file.
4731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
5731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "chrome/browser/autocomplete/history_url_provider.h"
6731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
7731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include <string>
8731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
9731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "base/string_util.h"
10731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "base/utf_string_conversions.h"
1121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "chrome/browser/autocomplete/autocomplete.h"
1221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "chrome/browser/autocomplete/autocomplete_match.h"
1321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "chrome/browser/history/history.h"
14731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "chrome/browser/net/url_fixer_upper.h"
1521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "chrome/browser/profiles/profile.h"
16731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "chrome/common/url_constants.h"
17731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "googleurl/src/url_util.h"
18731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
19731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickHistoryProvider::HistoryProvider(ACProviderListener* listener,
20731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick                                 Profile* profile,
21731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick                                 const char* name)
22731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    : AutocompleteProvider(listener, profile, name) {
23731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick}
24731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
2521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenvoid HistoryProvider::DeleteMatch(const AutocompleteMatch& match) {
2621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  DCHECK(done_);
2721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  DCHECK(profile_);
2821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  DCHECK(match.deletable);
2921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
3021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  HistoryService* const history_service =
3121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
3221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
3321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  // Delete the match from the history DB.
3421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  GURL selected_url(match.destination_url);
3521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  if (!history_service || !selected_url.is_valid()) {
3621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    NOTREACHED() << "Can't delete requested URL";
3721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    return;
3821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  }
3921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  history_service->DeleteURL(selected_url);
4021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
4121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  // Delete the match from the current set of matches.
4221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  bool found = false;
4321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  for (ACMatches::iterator i(matches_.begin()); i != matches_.end(); ++i) {
4421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    if (i->destination_url == selected_url && i->type == match.type) {
4521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      found = true;
463f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen      if (i->is_history_what_you_typed_match || i->starred) {
473f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen        // We can't get rid of What-You-Typed or Bookmarked matches,
483f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen        // but we can make them look like they have no backing data.
4921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen        i->deletable = false;
5021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen        i->description.clear();
5121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen        i->description_class.clear();
5221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      } else {
5321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen        matches_.erase(i);
5421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      }
5521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen      break;
5621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen    }
5721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  }
5821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  DCHECK(found) << "Asked to delete a URL that isn't in our set of matches";
5921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen  listener_->OnProviderUpdate(true);
6021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen}
6121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen
62731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// static
6372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsenstring16 HistoryProvider::FixupUserInput(const AutocompleteInput& input) {
6472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  const string16& input_text = input.text();
65731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // Fixup and canonicalize user input.
6672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  const GURL canonical_gurl(URLFixerUpper::FixupURL(UTF16ToUTF8(input_text),
67731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick                                                    std::string()));
68731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  std::string canonical_gurl_str(canonical_gurl.possibly_invalid_spec());
69731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if (canonical_gurl_str.empty()) {
70731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    // This probably won't happen, but there are no guarantees.
71731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    return input_text;
72731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  }
73731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
74731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // If the user types a number, GURL will convert it to a dotted quad.
75731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // However, if the parser did not mark this as a URL, then the user probably
76731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // didn't intend this interpretation.  Since this can break history matching
77731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // for hostname beginning with numbers (e.g. input of "17173" will be matched
78731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // against "0.0.67.21" instead of the original "17173", failing to find
79731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // "17173.com"), swap the original hostname in for the fixed-up one.
80731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if ((input.type() != AutocompleteInput::URL) &&
81731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      canonical_gurl.HostIsIPAddress()) {
82731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    std::string original_hostname =
8372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen        UTF16ToUTF8(input_text.substr(input.parts().host.begin,
8472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen                                      input.parts().host.len));
85731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    const url_parse::Parsed& parts =
86731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick        canonical_gurl.parsed_for_possibly_invalid_spec();
87731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    // parts.host must not be empty when HostIsIPAddress() is true.
88731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    DCHECK(parts.host.is_nonempty());
89731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    canonical_gurl_str.replace(parts.host.begin, parts.host.len,
90731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick                               original_hostname);
91731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  }
9272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  string16 output = UTF8ToUTF16(canonical_gurl_str);
93731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // Don't prepend a scheme when the user didn't have one.  Since the fixer
94731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // upper only prepends the "http" scheme, that's all we need to check for.
95731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if (canonical_gurl.SchemeIs(chrome::kHttpScheme) &&
9672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen      !url_util::FindAndCompareScheme(UTF16ToUTF8(input_text),
97731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick                                      chrome::kHttpScheme, NULL))
98731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    TrimHttpPrefix(&output);
99731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
100731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // Make the number of trailing slashes on the output exactly match the input.
101731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // Examples of why not doing this would matter:
102731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // * The user types "a" and has this fixed up to "a/".  Now no other sites
103731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  //   beginning with "a" will match.
104731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // * The user types "file:" and has this fixed up to "file://".  Now inline
105731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  //   autocomplete will append too few slashes, resulting in e.g. "file:/b..."
106731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  //   instead of "file:///b..."
107731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // * The user types "http:/" and has this fixed up to "http:".  Now inline
108731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  //   autocomplete will append too many slashes, resulting in e.g.
109731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  //   "http:///c..." instead of "http://c...".
110731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // NOTE: We do this after calling TrimHttpPrefix() since that can strip
111731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // trailing slashes (if the scheme is the only thing in the input).  It's not
112731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // clear that the result of fixup really matters in this case, but there's no
113731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // harm in making sure.
11472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  const size_t last_input_nonslash =
11572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen      input_text.find_last_not_of(ASCIIToUTF16("/\\"));
11672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  const size_t num_input_slashes = (last_input_nonslash == string16::npos) ?
117731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      input_text.length() : (input_text.length() - 1 - last_input_nonslash);
11872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  const size_t last_output_nonslash =
11972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen      output.find_last_not_of(ASCIIToUTF16("/\\"));
120731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  const size_t num_output_slashes =
12172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen      (last_output_nonslash == string16::npos) ?
122731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      output.length() : (output.length() - 1 - last_output_nonslash);
123731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if (num_output_slashes < num_input_slashes)
124731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    output.append(num_input_slashes - num_output_slashes, '/');
125731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  else if (num_output_slashes > num_input_slashes)
126731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    output.erase(output.length() - num_output_slashes + num_input_slashes);
127731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
128731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  return output;
129731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick}
130731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
131731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// static
13272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsensize_t HistoryProvider::TrimHttpPrefix(string16* url) {
133731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // Find any "http:".
134731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if (!HasHTTPScheme(*url))
135731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    return 0;
13672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  size_t scheme_pos =
13772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen      url->find(ASCIIToUTF16(chrome::kHttpScheme) + char16(':'));
138dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  DCHECK_NE(string16::npos, scheme_pos);
139731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
140731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // Erase scheme plus up to two slashes.
141731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  size_t prefix_end = scheme_pos + strlen(chrome::kHttpScheme) + 1;
142731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  const size_t after_slashes = std::min(url->length(), prefix_end + 2);
14372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  while ((prefix_end < after_slashes) && ((*url)[prefix_end] == '/'))
144731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    ++prefix_end;
145731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  url->erase(scheme_pos, prefix_end - scheme_pos);
146731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  return (scheme_pos == 0) ? prefix_end : 0;
147731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick}
148