1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
8#include <string>
10#include "base/basictypes.h"
11#include "base/gtest_prod_util.h"
12#include "base/strings/string16.h"
13#include "components/metrics/proto/omnibox_event.pb.h"
14#include "components/metrics/proto/omnibox_input_type.pb.h"
15#include "url/gurl.h"
16#include "url/url_parse.h"
18class AutocompleteSchemeClassifier;
20// The user input for an autocomplete query.  Allows copying.
21class AutocompleteInput {
22 public:
23  AutocompleteInput();
24  // |text| and |cursor_position| represent the input query and location of
25  // the cursor with the query respectively.  |cursor_position| may be set to
26  // base::string16::npos if the input |text| doesn't come directly from the
27  // user's typing.
28  //
29  // |desired_tld| is the user's desired TLD, if one is not already present in
30  // the text to autocomplete.  When this is non-empty, it also implies that
31  // "www." should be prepended to the domain where possible. The |desired_tld|
32  // should not contain a leading '.' (use "com" instead of ".com").
33  //
34  // If |current_url| is set to a valid search result page URL, providers can
35  // use it to perform query refinement. For example, if it is set to an image
36  // search result page, the search provider may generate an image search URL.
37  // Query refinement is only used by mobile ports, so only these set
38  // |current_url| to a non-empty string.
39  //
40  // |current_page_classification| represents the type of page the user is
41  // viewing and manner in which the user is accessing the omnibox; it's
42  // more than simply the URL.  It includes, for example, whether the page
43  // is a search result page doing search term replacement or not.
44  //
45  // |prevent_inline_autocomplete| is true if the generated result set should
46  // not require inline autocomplete for the default match.  This is difficult
47  // to explain in the abstract; the practical use case is that after the user
48  // deletes text in the edit, the HistoryURLProvider should make sure not to
49  // promote a match requiring inline autocomplete too highly.
50  //
51  // |prefer_keyword| should be true when the keyword UI is onscreen; this will
52  // bias the autocomplete result set toward the keyword provider when the input
53  // string is a bare keyword.
54  //
55  // |allow_exact_keyword_match| should be false when triggering keyword mode on
56  // the input string would be surprising or wrong, e.g. when highlighting text
57  // in a page and telling the browser to search for it or navigate to it. This
58  // parameter only applies to substituting keywords.
59  //
60  // If |want_asynchronous_matches| is false the controller asks the providers
61  // to only return matches which are synchronously available, which should mean
62  // that all providers will be done immediately.
63  //
64  // |scheme_classifier| is passed to Parse() to help determine the type of
65  // input this is; see comments there.
66  AutocompleteInput(const base::string16& text,
67                    size_t cursor_position,
68                    const base::string16& desired_tld,
69                    const GURL& current_url,
70                    metrics::OmniboxEventProto::PageClassification
71                        current_page_classification,
72                    bool prevent_inline_autocomplete,
73                    bool prefer_keyword,
74                    bool allow_exact_keyword_match,
75                    bool want_asynchronous_matches,
76                    const AutocompleteSchemeClassifier& scheme_classifier);
77  ~AutocompleteInput();
79  // If type is |FORCED_QUERY| and |text| starts with '?', it is removed.
80  // Returns number of leading characters removed.
81  static size_t RemoveForcedQueryStringIfNecessary(
82      metrics::OmniboxInputType::Type type,
83      base::string16* text);
85  // Converts |type| to a string representation.  Used in logging.
86  static std::string TypeToString(metrics::OmniboxInputType::Type type);
88  // Parses |text| (including an optional |desired_tld|) and returns the type of
89  // input this will be interpreted as.  |scheme_classifier| is used to check
90  // the scheme in |text| is known and registered in the current environment.
91  // The components of the input are stored in the output parameter |parts|, if
92  // it is non-NULL. The scheme is stored in |scheme| if it is non-NULL. The
93  // canonicalized URL is stored in |canonicalized_url|; however, this URL is
94  // not guaranteed to be valid, especially if the parsed type is, e.g., QUERY.
95  static metrics::OmniboxInputType::Type Parse(
96      const base::string16& text,
97      const base::string16& desired_tld,
98      const AutocompleteSchemeClassifier& scheme_classifier,
99      url::Parsed* parts,
100      base::string16* scheme,
101      GURL* canonicalized_url);
103  // Parses |text| and fill |scheme| and |host| by the positions of them.
104  // The results are almost as same as the result of Parse(), but if the scheme
105  // is view-source, this function returns the positions of scheme and host
106  // in the URL qualified by "view-source:" prefix.
107  static void ParseForEmphasizeComponents(
108      const base::string16& text,
109      const AutocompleteSchemeClassifier& scheme_classifier,
110      url::Component* scheme,
111      url::Component* host);
113  // Code that wants to format URLs with a format flag including
114  // net::kFormatUrlOmitTrailingSlashOnBareHostname risk changing the meaning if
115  // the result is then parsed as AutocompleteInput.  Such code can call this
116  // function with the URL and its formatted string, and it will return a
117  // formatted string with the same meaning as the original URL (i.e. it will
118  // re-append a slash if necessary).  Because this uses Parse() under the hood
119  // to determine the meaning of the different strings, callers need to supply a
120  // |scheme_classifier| to pass to Parse().
121  static base::string16 FormattedStringWithEquivalentMeaning(
122      const GURL& url,
123      const base::string16& formatted_url,
124      const AutocompleteSchemeClassifier& scheme_classifier);
126  // Returns the number of non-empty components in |parts| besides the host.
127  static int NumNonHostComponents(const url::Parsed& parts);
129  // Returns whether |text| begins "http:" or "view-source:http:".
130  static bool HasHTTPScheme(const base::string16& text);
132  // User-provided text to be completed.
133  const base::string16& text() const { return text_; }
135  // Returns 0-based cursor position within |text_| or base::string16::npos if
136  // not used.
137  size_t cursor_position() const { return cursor_position_; }
139  // Use of this setter is risky, since no other internal state is updated
140  // besides |text_|, |cursor_position_| and |parts_|.  Only callers who know
141  // that they're not changing the type/scheme/etc. should use this.
142  void UpdateText(const base::string16& text,
143                  size_t cursor_position,
144                  const url::Parsed& parts);
146  // The current URL, or an invalid GURL if query refinement is not desired.
147  const GURL& current_url() const { return current_url_; }
149  // The type of page that is currently behind displayed and how it is
150  // displayed (e.g., with search term replacement or without).
151  metrics::OmniboxEventProto::PageClassification current_page_classification()
152      const {
153    return current_page_classification_;
154  }
156  // The type of input supplied.
157  metrics::OmniboxInputType::Type type() const { return type_; }
159  // Returns parsed URL components.
160  const url::Parsed& parts() const { return parts_; }
162  // The scheme parsed from the provided text; only meaningful when type_ is
163  // URL.
164  const base::string16& scheme() const { return scheme_; }
166  // The input as an URL to navigate to, if possible.
167  const GURL& canonicalized_url() const { return canonicalized_url_; }
169  // Returns whether inline autocompletion should be prevented.
170  bool prevent_inline_autocomplete() const {
171    return prevent_inline_autocomplete_;
172  }
174  // Returns whether, given an input string consisting solely of a substituting
175  // keyword, we should score it like a non-substituting keyword.
176  bool prefer_keyword() const { return prefer_keyword_; }
178  // Returns whether this input is allowed to be treated as an exact
179  // keyword match.  If not, the default result is guaranteed not to be a
180  // keyword search, even if the input is "<keyword> <search string>".
181  bool allow_exact_keyword_match() const { return allow_exact_keyword_match_; }
183  // Returns whether providers should be allowed to make asynchronous requests
184  // when processing this input.
185  bool want_asynchronous_matches() const { return want_asynchronous_matches_; }
187  // Resets all internal variables to the null-constructed state.
188  void Clear();
190 private:
191  FRIEND_TEST_ALL_PREFIXES(AutocompleteProviderTest, GetDestinationURL);
193  // NOTE: Whenever adding a new field here, please make sure to update Clear()
194  // method.
195  base::string16 text_;
196  size_t cursor_position_;
197  GURL current_url_;
198  metrics::OmniboxEventProto::PageClassification current_page_classification_;
199  metrics::OmniboxInputType::Type type_;
200  url::Parsed parts_;
201  base::string16 scheme_;
202  GURL canonicalized_url_;
203  bool prevent_inline_autocomplete_;
204  bool prefer_keyword_;
205  bool allow_exact_keyword_match_;
206  bool want_asynchronous_matches_;