1dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// Use of this source code is governed by a BSD-style license that can be 3731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// found in the LICENSE file. 4731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 5731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "chrome/browser/autocomplete/history_url_provider.h" 6731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 7731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include <string> 8731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 9731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "base/string_util.h" 10731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "base/utf_string_conversions.h" 1121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "chrome/browser/autocomplete/autocomplete.h" 1221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "chrome/browser/autocomplete/autocomplete_match.h" 1321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "chrome/browser/history/history.h" 14731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "chrome/browser/net/url_fixer_upper.h" 1521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "chrome/browser/profiles/profile.h" 16731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "chrome/common/url_constants.h" 17731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "googleurl/src/url_util.h" 18731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 19731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickHistoryProvider::HistoryProvider(ACProviderListener* listener, 20731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick Profile* profile, 21731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick const char* name) 22731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick : AutocompleteProvider(listener, profile, name) { 23731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick} 24731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 2521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsenvoid HistoryProvider::DeleteMatch(const AutocompleteMatch& match) { 2621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen DCHECK(done_); 2721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen DCHECK(profile_); 2821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen DCHECK(match.deletable); 2921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 3021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen HistoryService* const history_service = 3121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); 3221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 3321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen // Delete the match from the history DB. 3421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen GURL selected_url(match.destination_url); 3521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen if (!history_service || !selected_url.is_valid()) { 3621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen NOTREACHED() << "Can't delete requested URL"; 3721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen return; 3821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } 3921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen history_service->DeleteURL(selected_url); 4021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 4121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen // Delete the match from the current set of matches. 4221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen bool found = false; 4321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen for (ACMatches::iterator i(matches_.begin()); i != matches_.end(); ++i) { 4421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen if (i->destination_url == selected_url && i->type == match.type) { 4521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen found = true; 463f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen if (i->is_history_what_you_typed_match || i->starred) { 473f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen // We can't get rid of What-You-Typed or Bookmarked matches, 483f50c38dc070f4bb515c1b64450dae14f316474eKristian Monsen // but we can make them look like they have no backing data. 4921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen i->deletable = false; 5021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen i->description.clear(); 5121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen i->description_class.clear(); 5221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } else { 5321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen matches_.erase(i); 5421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } 5521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen break; 5621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } 5721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen } 5821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen DCHECK(found) << "Asked to delete a URL that isn't in our set of matches"; 5921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen listener_->OnProviderUpdate(true); 6021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen} 6121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 62731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// static 6372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsenstring16 HistoryProvider::FixupUserInput(const AutocompleteInput& input) { 6472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen const string16& input_text = input.text(); 65731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // Fixup and canonicalize user input. 6672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen const GURL canonical_gurl(URLFixerUpper::FixupURL(UTF16ToUTF8(input_text), 67731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick std::string())); 68731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick std::string canonical_gurl_str(canonical_gurl.possibly_invalid_spec()); 69731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick if (canonical_gurl_str.empty()) { 70731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // This probably won't happen, but there are no guarantees. 71731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick return input_text; 72731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick } 73731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 74731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // If the user types a number, GURL will convert it to a dotted quad. 75731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // However, if the parser did not mark this as a URL, then the user probably 76731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // didn't intend this interpretation. Since this can break history matching 77731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // for hostname beginning with numbers (e.g. input of "17173" will be matched 78731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // against "0.0.67.21" instead of the original "17173", failing to find 79731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // "17173.com"), swap the original hostname in for the fixed-up one. 80731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick if ((input.type() != AutocompleteInput::URL) && 81731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick canonical_gurl.HostIsIPAddress()) { 82731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick std::string original_hostname = 8372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen UTF16ToUTF8(input_text.substr(input.parts().host.begin, 8472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen input.parts().host.len)); 85731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick const url_parse::Parsed& parts = 86731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick canonical_gurl.parsed_for_possibly_invalid_spec(); 87731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // parts.host must not be empty when HostIsIPAddress() is true. 88731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick DCHECK(parts.host.is_nonempty()); 89731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick canonical_gurl_str.replace(parts.host.begin, parts.host.len, 90731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick original_hostname); 91731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick } 9272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen string16 output = UTF8ToUTF16(canonical_gurl_str); 93731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // Don't prepend a scheme when the user didn't have one. Since the fixer 94731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // upper only prepends the "http" scheme, that's all we need to check for. 95731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick if (canonical_gurl.SchemeIs(chrome::kHttpScheme) && 9672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen !url_util::FindAndCompareScheme(UTF16ToUTF8(input_text), 97731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick chrome::kHttpScheme, NULL)) 98731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick TrimHttpPrefix(&output); 99731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 100731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // Make the number of trailing slashes on the output exactly match the input. 101731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // Examples of why not doing this would matter: 102731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // * The user types "a" and has this fixed up to "a/". Now no other sites 103731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // beginning with "a" will match. 104731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // * The user types "file:" and has this fixed up to "file://". Now inline 105731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // autocomplete will append too few slashes, resulting in e.g. "file:/b..." 106731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // instead of "file:///b..." 107731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // * The user types "http:/" and has this fixed up to "http:". Now inline 108731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // autocomplete will append too many slashes, resulting in e.g. 109731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // "http:///c..." instead of "http://c...". 110731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // NOTE: We do this after calling TrimHttpPrefix() since that can strip 111731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // trailing slashes (if the scheme is the only thing in the input). It's not 112731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // clear that the result of fixup really matters in this case, but there's no 113731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // harm in making sure. 11472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen const size_t last_input_nonslash = 11572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen input_text.find_last_not_of(ASCIIToUTF16("/\\")); 11672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen const size_t num_input_slashes = (last_input_nonslash == string16::npos) ? 117731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick input_text.length() : (input_text.length() - 1 - last_input_nonslash); 11872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen const size_t last_output_nonslash = 11972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen output.find_last_not_of(ASCIIToUTF16("/\\")); 120731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick const size_t num_output_slashes = 12172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen (last_output_nonslash == string16::npos) ? 122731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick output.length() : (output.length() - 1 - last_output_nonslash); 123731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick if (num_output_slashes < num_input_slashes) 124731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick output.append(num_input_slashes - num_output_slashes, '/'); 125731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick else if (num_output_slashes > num_input_slashes) 126731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick output.erase(output.length() - num_output_slashes + num_input_slashes); 127731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 128731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick return output; 129731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick} 130731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 131731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// static 13272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsensize_t HistoryProvider::TrimHttpPrefix(string16* url) { 133731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // Find any "http:". 134731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick if (!HasHTTPScheme(*url)) 135731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick return 0; 13672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen size_t scheme_pos = 13772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen url->find(ASCIIToUTF16(chrome::kHttpScheme) + char16(':')); 138dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen DCHECK_NE(string16::npos, scheme_pos); 139731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 140731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick // Erase scheme plus up to two slashes. 141731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick size_t prefix_end = scheme_pos + strlen(chrome::kHttpScheme) + 1; 142731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick const size_t after_slashes = std::min(url->length(), prefix_end + 2); 14372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen while ((prefix_end < after_slashes) && ((*url)[prefix_end] == '/')) 144731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick ++prefix_end; 145731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick url->erase(scheme_pos, prefix_end - scheme_pos); 146731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick return (scheme_pos == 0) ? prefix_end : 0; 147731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick} 148