1116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch// Copyright 2014 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#include "components/omnibox/autocomplete_input.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string_util.h" 8868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/utf_string_conversions.h" 96d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)#include "components/metrics/proto/omnibox_event.pb.h" 105f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#include "components/omnibox/autocomplete_scheme_classifier.h" 11f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "components/url_fixer/url_fixer.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/base/net_util.h" 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/base/registry_controlled_domains/registry_controlled_domain.h" 14eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "url/url_canon_ip.h" 150f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)#include "url/url_util.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace { 182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 19116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch// Hardcode constant to avoid any dependencies on content/. 20116680a4aac90f2aa7413d9095a592090648e557Ben Murdochconst char kViewSourceScheme[] = "view-source"; 21116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void AdjustCursorPositionIfNecessary(size_t num_leading_chars_removed, 232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) size_t* cursor_position) { 24a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) if (*cursor_position == base::string16::npos) 252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return; 262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (num_leading_chars_removed < *cursor_position) 272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *cursor_position -= num_leading_chars_removed; 282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else 292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *cursor_position = 0; 302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} // namespace 332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)AutocompleteInput::AutocompleteInput() 35a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) : cursor_position_(base::string16::npos), 366d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) current_page_classification_(metrics::OmniboxEventProto::INVALID_SPEC), 37f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) type_(metrics::OmniboxInputType::INVALID), 382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) prevent_inline_autocomplete_(false), 392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) prefer_keyword_(false), 402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) allow_exact_keyword_match_(true), 410529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch want_asynchronous_matches_(true) { 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 44d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochAutocompleteInput::AutocompleteInput( 45a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) const base::string16& text, 46d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch size_t cursor_position, 47a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) const base::string16& desired_tld, 48d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch const GURL& current_url, 496d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) metrics::OmniboxEventProto::PageClassification current_page_classification, 50d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch bool prevent_inline_autocomplete, 51d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch bool prefer_keyword, 52d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch bool allow_exact_keyword_match, 53116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch bool want_asynchronous_matches, 54116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch const AutocompleteSchemeClassifier& scheme_classifier) 552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) : cursor_position_(cursor_position), 562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) current_url_(current_url), 57d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch current_page_classification_(current_page_classification), 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prevent_inline_autocomplete_(prevent_inline_autocomplete), 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prefer_keyword_(prefer_keyword), 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) allow_exact_keyword_match_(allow_exact_keyword_match), 610529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch want_asynchronous_matches_(want_asynchronous_matches) { 62a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) DCHECK(cursor_position <= text.length() || 63a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) cursor_position == base::string16::npos) 642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) << "Text: '" << text << "', cp: " << cursor_position; 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // None of the providers care about leading white space so we always trim it. 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Providers that care about trailing white space handle trimming themselves. 67a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) if ((base::TrimWhitespace(text, base::TRIM_LEADING, &text_) & 68a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) base::TRIM_LEADING) != 0) 692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) AdjustCursorPositionIfNecessary(text.length() - text_.length(), 702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) &cursor_position_); 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GURL canonicalized_url; 73116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch type_ = Parse(text_, desired_tld, scheme_classifier, &parts_, &scheme_, 74116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch &canonicalized_url); 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 76f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (type_ == metrics::OmniboxInputType::INVALID) 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 79f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (((type_ == metrics::OmniboxInputType::UNKNOWN) || 80f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) (type_ == metrics::OmniboxInputType::URL)) && 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) canonicalized_url.is_valid() && 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (!canonicalized_url.IsStandard() || canonicalized_url.SchemeIsFile() || 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) canonicalized_url.SchemeIsFileSystem() || 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) !canonicalized_url.host().empty())) 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) canonicalized_url_ = canonicalized_url; 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) size_t chars_removed = RemoveForcedQueryStringIfNecessary(type_, &text_); 882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) AdjustCursorPositionIfNecessary(chars_removed, &cursor_position_); 892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (chars_removed) { 902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Remove spaces between opening question mark and first actual character. 91a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) base::string16 trimmed_text; 92a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) if ((base::TrimWhitespace(text_, base::TRIM_LEADING, &trimmed_text) & 93a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) base::TRIM_LEADING) != 0) { 942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) AdjustCursorPositionIfNecessary(text_.length() - trimmed_text.length(), 952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) &cursor_position_); 962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) text_ = trimmed_text; 972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)AutocompleteInput::~AutocompleteInput() { 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 105a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)size_t AutocompleteInput::RemoveForcedQueryStringIfNecessary( 1066d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) metrics::OmniboxInputType::Type type, 107a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) base::string16* text) { 108f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if ((type != metrics::OmniboxInputType::FORCED_QUERY) || text->empty() || 109f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) (*text)[0] != L'?') 1102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return 0; 1112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Drop the leading '?'. 1122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) text->erase(0, 1); 1132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return 1; 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 1176d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)std::string AutocompleteInput::TypeToString( 1186d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) metrics::OmniboxInputType::Type type) { 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (type) { 120f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) case metrics::OmniboxInputType::INVALID: return "invalid"; 121f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) case metrics::OmniboxInputType::UNKNOWN: return "unknown"; 122f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) case metrics::OmniboxInputType::DEPRECATED_REQUESTED_URL: 123f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return "deprecated-requested-url"; 124f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) case metrics::OmniboxInputType::URL: return "url"; 125f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) case metrics::OmniboxInputType::QUERY: return "query"; 126f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) case metrics::OmniboxInputType::FORCED_QUERY: return "forced-query"; 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 128f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return std::string(); 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 1326d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)metrics::OmniboxInputType::Type AutocompleteInput::Parse( 133a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) const base::string16& text, 134a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) const base::string16& desired_tld, 135116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch const AutocompleteSchemeClassifier& scheme_classifier, 1365c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu url::Parsed* parts, 137a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) base::string16* scheme, 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GURL* canonicalized_url) { 139a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) size_t first_non_white = text.find_first_not_of(base::kWhitespaceUTF16, 0); 140a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) if (first_non_white == base::string16::npos) 141f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::INVALID; // All whitespace. 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 143cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) if (text[first_non_white] == L'?') { 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If the first non-whitespace character is a '?', we magically treat this 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // as a query. 146f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::FORCED_QUERY; 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Ask our parsing back-end to help us understand what the user typed. We 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // use the URLFixerUpper here because we want to be smart about what we 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // consider a scheme. For example, we shouldn't consider www.google.com:80 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to have a scheme. 1535c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu url::Parsed local_parts; 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!parts) 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) parts = &local_parts; 156f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) const base::string16 parsed_scheme(url_fixer::SegmentURL(text, parts)); 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (scheme) 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *scheme = parsed_scheme; 159116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch const std::string parsed_scheme_utf8(base::UTF16ToUTF8(parsed_scheme)); 160a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 1615c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // If we can't canonicalize the user's input, the rest of the autocomplete 1625c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // system isn't going to be able to produce a navigable URL match for it. 1635c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // So we just return QUERY immediately in these cases. 164a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) GURL placeholder_canonicalized_url; 165a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) if (!canonicalized_url) 166a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) canonicalized_url = &placeholder_canonicalized_url; 167f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) *canonicalized_url = url_fixer::FixupURL(base::UTF16ToUTF8(text), 168f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) base::UTF16ToUTF8(desired_tld)); 1695c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu if (!canonicalized_url->is_valid()) 170f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::QUERY; 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 172116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch if (LowerCaseEqualsASCII(parsed_scheme_utf8, url::kFileScheme)) { 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // A user might or might not type a scheme when entering a file URL. In 174116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // either case, |parsed_scheme_utf8| will tell us that this is a file URL, 175116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // but |parts->scheme| might be empty, e.g. if the user typed "C:\foo". 176f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::URL; 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // well enough that we can fall through to the heuristics below. If it's 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // something else, we can just determine our action based on what we do with 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // any input of this scheme. In theory we could do better with some schemes 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // until I run into some cases that really need it. 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (parts->scheme.is_nonempty() && 186116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch !LowerCaseEqualsASCII(parsed_scheme_utf8, url::kHttpScheme) && 187116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch !LowerCaseEqualsASCII(parsed_scheme_utf8, url::kHttpsScheme)) { 188116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch metrics::OmniboxInputType::Type type = 189116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch scheme_classifier.GetInputTypeForScheme(parsed_scheme_utf8); 190116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch if (type != metrics::OmniboxInputType::INVALID) 191116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return type; 192116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 193116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // We don't know about this scheme. It might be that the user typed a 194116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // URL of the form "username:password@foo.com". 195116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch const base::string16 http_scheme_prefix = 196116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch base::ASCIIToUTF16(std::string(url::kHttpScheme) + 197116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch url::kStandardSchemeSeparator); 198116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch url::Parsed http_parts; 199116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch base::string16 http_scheme; 200116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch GURL http_canonicalized_url; 201116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch metrics::OmniboxInputType::Type http_type = 202116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch Parse(http_scheme_prefix + text, desired_tld, scheme_classifier, 203116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch &http_parts, &http_scheme, &http_canonicalized_url); 204116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch DCHECK_EQ(std::string(url::kHttpScheme), 205116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch base::UTF16ToUTF8(http_scheme)); 206116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 207116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch if ((http_type == metrics::OmniboxInputType::URL) && 208116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch http_parts.username.is_nonempty() && 209116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch http_parts.password.is_nonempty()) { 210116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // Manually re-jigger the parsed parts to match |text| (without the 211116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // http scheme added). 212116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch http_parts.scheme.reset(); 213116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch url::Component* components[] = { 214116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch &http_parts.username, 215116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch &http_parts.password, 216116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch &http_parts.host, 217116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch &http_parts.port, 218116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch &http_parts.path, 219116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch &http_parts.query, 220116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch &http_parts.ref, 221116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch }; 222116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch for (size_t i = 0; i < arraysize(components); ++i) { 223116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch url_fixer::OffsetComponent( 224116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch -static_cast<int>(http_scheme_prefix.length()), components[i]); 225116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch } 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 227116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch *parts = http_parts; 228116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch if (scheme) 229116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch scheme->clear(); 230116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch *canonicalized_url = http_canonicalized_url; 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 232116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return metrics::OmniboxInputType::URL; 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 234116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 235116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // We don't know about this scheme and it doesn't look like the user 236116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // typed a username and password. It's likely to be a search operator 237116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // like "site:" or "link:". We classify it as UNKNOWN so the user has 238116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // the option of treating it as a URL if we're wrong. 239116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // Note that SegmentURL() is smart so we aren't tricked by "c:\foo" or 240116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // "www.example.com:81" in this case. 241116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return metrics::OmniboxInputType::UNKNOWN; 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Either the user didn't type a scheme, in which case we need to distinguish 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // case we should reject invalid formulations. 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2485c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // If we have an empty host it can't be a valid HTTP[S] URL. (This should 2495c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // only trigger for input that begins with a colon, which GURL will parse as a 2505c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // valid, non-standard URL; for standard URLs, an empty host would have 2515c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // resulted in an invalid |canonicalized_url| above.) 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!parts->host.is_nonempty()) 253f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::QUERY; 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2555c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // Sanity-check: GURL should have failed to canonicalize this URL if it had an 2565c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // invalid port. 2575c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu DCHECK_NE(url::PORT_INVALID, url::ParsePort(text.c_str(), parts->port)); 2585c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // use the registry length later below.) 261a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) const base::string16 host(text.substr(parts->host.begin, parts->host.len)); 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const size_t registry_length = 263a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) net::registry_controlled_domains::GetRegistryLength( 2645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::UTF16ToUTF8(host), 265a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 266a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (registry_length == std::string::npos) { 2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Try to append the desired_tld. 2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!desired_tld.empty()) { 270a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) base::string16 host_with_tld(host); 2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (host[host.length() - 1] != '.') 2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) host_with_tld += '.'; 2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) host_with_tld += desired_tld; 274a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) const size_t tld_length = 275a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) net::registry_controlled_domains::GetRegistryLength( 2765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::UTF16ToUTF8(host_with_tld), 277a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 278a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 279f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (tld_length != std::string::npos) { 280f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // Something like "99999999999" that looks like a bad IP 281f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // address, but becomes valid on attaching a TLD. 282f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::URL; 283f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 285f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // Could be a broken IP address, etc. 286f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::QUERY; 2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // See if the hostname is valid. While IE and GURL allow hostnames to contain 2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // many other characters (perhaps for weird intranet machines), it's extremely 2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // unlikely that a user would be trying to type those in for anything other 2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // than a search query. 2945c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu url::CanonHostInfo host_info; 2955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) const std::string canonicalized_host(net::CanonicalizeHost( 2965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::UTF16ToUTF8(host), &host_info)); 2975c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu if ((host_info.family == url::CanonHostInfo::NEUTRAL) && 2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) !net::IsCanonicalizedHostCompliant(canonicalized_host, 2995d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::UTF16ToUTF8(desired_tld))) { 3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Invalid hostname. There are several possible cases: 3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * Our checker is too strict and the user pasted in a real-world URL 3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // that's "invalid" but resolves. To catch these, we return UNKNOWN when 3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the user explicitly typed a scheme, so we'll still search by default 3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // but we'll show the accidental search infobar if necessary. 3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * The user is typing a multi-word query. If we see a space anywhere in 3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the hostname we assume this is a search and return QUERY. 3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * Our checker is too strict and the user is typing a real-world hostname 3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // that's "invalid" but resolves. We return UNKNOWN if the TLD is known. 3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Note that we explicitly excluded hosts with spaces above so that 3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // "toys at amazon.com" will be treated as a search. 3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * The user is typing some garbage string. Return QUERY. 3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Thus we fall down in the following cases: 3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * Trying to navigate to a hostname with spaces 3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * Trying to navigate to a hostname with invalid characters and an unknown 3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TLD 3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // These are rare, though probably possible in intranets. 3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (parts->scheme.is_nonempty() || 319a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) ((registry_length != 0) && 320f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) (host.find(' ') == base::string16::npos))) ? 321f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) metrics::OmniboxInputType::UNKNOWN : metrics::OmniboxInputType::QUERY; 3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Now that we've ruled out all schemes other than http or https and done a 3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // little more sanity checking, the presence of a scheme means this is likely 3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // a URL. 3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (parts->scheme.is_nonempty()) 328f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::URL; 3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // See if the host is an IP address. 3315c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu if (host_info.family == url::CanonHostInfo::IPV6) 332f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::URL; 3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If the user originally typed a host that looks like an IP address (a 3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // dotted quad), they probably want to open it. If the original input was 3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // something else (like a single number), they probably wanted to search for 3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // it, unless they explicitly typed a scheme. This is true even if the URL 3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // appears to have a path: "1.2/45" is more likely a search (for the answer 3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to a math problem) than a URL. However, if there are more non-host 3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // components, then maybe this really was intended to be a navigation. For 3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // this reason we only check the dotted-quad case here, and save the "other 3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // IP addresses" case for after we check the number of non-host components 3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // below. 3435c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu if ((host_info.family == url::CanonHostInfo::IPV4) && 3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (host_info.num_ipv4_components == 4)) 345f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::URL; 3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Presence of a password means this is likely a URL. Note that unless the 3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // user has typed an explicit "http://" or similar, we'll probably think that 3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the username is some unknown scheme, and bail out in the scheme-handling 3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // code above. 3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (parts->password.is_nonempty()) 352f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::URL; 3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Trailing slashes force the input to be treated as a URL. 3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (parts->path.is_nonempty()) { 3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char c = text[parts->path.end() - 1]; 3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((c == '\\') || (c == '/')) 358f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::URL; 3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If there is more than one recognized non-host component, this is likely to 3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // be a URL, even if the TLD is unknown (in which case this is likely an 3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // intranet URL). 3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (NumNonHostComponents(*parts) > 1) 365f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::URL; 3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If the host has a known TLD or a port, it's probably a URL, with the 3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // following exceptions: 3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * Any "IP addresses" that make it here are more likely searches 3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (see above). 3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * If we reach here with a username, our input looks like "user@host[.tld]". 3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Because there is no scheme explicitly specified, we think this is more 3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // likely an email address than an HTTP auth attempt. Hence, we search by 3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // default and let users correct us on a case-by-case basis. 3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Note that we special-case "localhost" as a known hostname. 3765c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu if ((host_info.family != url::CanonHostInfo::IPV4) && 3775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ((registry_length != 0) || (host == base::ASCIIToUTF16("localhost") || 378f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) parts->port.is_nonempty()))) { 379f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return parts->username.is_nonempty() ? metrics::OmniboxInputType::UNKNOWN : 380f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) metrics::OmniboxInputType::URL; 381f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If we reach this point, we know there's no known TLD on the input, so if 3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the user wishes to add a desired_tld, the fixup code will oblige; thus this 3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // is a URL. 3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!desired_tld.empty()) 387f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::URL; 3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // No scheme, password, port, path, and no known TLD on the host. 3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // This could be: 3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * An "incomplete IP address"; likely a search (see above). 3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * An email-like input like "user@host", where "host" has no known TLD. 3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // It's not clear what the user means here and searching seems reasonable. 3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * A single word "foo"; possibly an intranet site, but more likely a search. 3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code 3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // catch our mistakes. 3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds 3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // "xxx" as a TLD, then until we add it to our data file, Chrome won't know 3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // "foo.xxx" is a real URL. So ideally this is a URL, but we can't really 4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // distinguish this case from: 4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * A "URL-like" string that's not really a URL (like 4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // "browser.tabs.closeButtons" or "java.awt.event.*"). This is ideally a 4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // QUERY. Since this is indistinguishable from the case above, and this 4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // case is much more likely, claim these are UNKNOWN, which should default 4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to the right thing and let users correct us on a case-by-case basis. 406f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return metrics::OmniboxInputType::UNKNOWN; 4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 410116680a4aac90f2aa7413d9095a592090648e557Ben Murdochvoid AutocompleteInput::ParseForEmphasizeComponents( 411116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch const base::string16& text, 412116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch const AutocompleteSchemeClassifier& scheme_classifier, 413116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch url::Component* scheme, 414116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch url::Component* host) { 4155c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu url::Parsed parts; 416a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) base::string16 scheme_str; 417116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch Parse(text, base::string16(), scheme_classifier, &parts, &scheme_str, NULL); 4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *scheme = parts.scheme; 4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *host = parts.host; 4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int after_scheme_and_colon = parts.scheme.end() + 1; 4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // For the view-source scheme, we should emphasize the scheme and host of the 4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // URL qualified by the view-source prefix. 425116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch if (LowerCaseEqualsASCII(scheme_str, kViewSourceScheme) && 4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (static_cast<int>(text.length()) > after_scheme_and_colon)) { 4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Obtain the URL prefixed by view-source and parse it. 428a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) base::string16 real_url(text.substr(after_scheme_and_colon)); 4295c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu url::Parsed real_parts; 430116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch AutocompleteInput::Parse(real_url, base::string16(), scheme_classifier, 431116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch &real_parts, NULL, NULL); 4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (real_parts.scheme.is_nonempty() || real_parts.host.is_nonempty()) { 4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (real_parts.scheme.is_nonempty()) { 4345c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu *scheme = url::Component( 4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) after_scheme_and_colon + real_parts.scheme.begin, 4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) real_parts.scheme.len); 4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scheme->reset(); 4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (real_parts.host.is_nonempty()) { 4415c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu *host = url::Component(after_scheme_and_colon + real_parts.host.begin, 4425c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu real_parts.host.len); 4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) host->reset(); 4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 447cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) } else if (LowerCaseEqualsASCII(scheme_str, url::kFileSystemScheme) && 4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) parts.inner_parsed() && parts.inner_parsed()->scheme.is_valid()) { 4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *host = parts.inner_parsed()->host; 4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 454a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)base::string16 AutocompleteInput::FormattedStringWithEquivalentMeaning( 4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const GURL& url, 456116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch const base::string16& formatted_url, 457116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch const AutocompleteSchemeClassifier& scheme_classifier) { 4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!net::CanStripTrailingSlash(url)) 4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return formatted_url; 4605d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) const base::string16 url_with_path(formatted_url + base::char16('/')); 461116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return (AutocompleteInput::Parse(formatted_url, base::string16(), 462116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch scheme_classifier, NULL, NULL, NULL) == 463116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch AutocompleteInput::Parse(url_with_path, base::string16(), 464116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch scheme_classifier, NULL, NULL, NULL)) ? 4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) formatted_url : url_with_path; 4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 4695c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liuint AutocompleteInput::NumNonHostComponents(const url::Parsed& parts) { 4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int num_nonhost_components = 0; 4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (parts.scheme.is_nonempty()) 4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++num_nonhost_components; 4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (parts.username.is_nonempty()) 4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++num_nonhost_components; 4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (parts.password.is_nonempty()) 4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++num_nonhost_components; 4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (parts.port.is_nonempty()) 4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++num_nonhost_components; 4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (parts.path.is_nonempty()) 4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++num_nonhost_components; 4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (parts.query.is_nonempty()) 4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++num_nonhost_components; 4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (parts.ref.is_nonempty()) 4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++num_nonhost_components; 4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return num_nonhost_components; 4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4880f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)// static 489a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)bool AutocompleteInput::HasHTTPScheme(const base::string16& input) { 4905d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) std::string utf8_input(base::UTF16ToUTF8(input)); 4915c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu url::Component scheme; 492116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch if (url::FindAndCompareScheme(utf8_input, kViewSourceScheme, &scheme)) { 4930f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) utf8_input.erase(0, scheme.end() + 1); 4945c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu } 495010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) return url::FindAndCompareScheme(utf8_input, url::kHttpScheme, NULL); 4960f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)} 4970f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 498a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)void AutocompleteInput::UpdateText(const base::string16& text, 4992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) size_t cursor_position, 5005c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu const url::Parsed& parts) { 501a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) DCHECK(cursor_position <= text.length() || 502a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) cursor_position == base::string16::npos) 5032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) << "Text: '" << text << "', cp: " << cursor_position; 5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) text_ = text; 5052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) cursor_position_ = cursor_position; 5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) parts_ = parts; 5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void AutocompleteInput::Clear() { 5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) text_.clear(); 511a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) cursor_position_ = base::string16::npos; 5122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) current_url_ = GURL(); 5136d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) current_page_classification_ = metrics::OmniboxEventProto::INVALID_SPEC; 514f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) type_ = metrics::OmniboxInputType::INVALID; 5155c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu parts_ = url::Parsed(); 5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scheme_.clear(); 5172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) canonicalized_url_ = GURL(); 5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prevent_inline_autocomplete_ = false; 5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prefer_keyword_ = false; 5202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) allow_exact_keyword_match_ = false; 5210529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch want_asynchronous_matches_ = true; 5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 523