1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autocomplete/autocomplete.h"
6
7#include <algorithm>
8
9#include "base/basictypes.h"
10#include "base/command_line.h"
11#include "base/i18n/number_formatting.h"
12#include "base/metrics/histogram.h"
13#include "base/string_number_conversions.h"
14#include "base/string_util.h"
15#include "base/utf_string_conversions.h"
16#include "chrome/browser/autocomplete/autocomplete_controller_delegate.h"
17#include "chrome/browser/autocomplete/autocomplete_match.h"
18#include "chrome/browser/autocomplete/builtin_provider.h"
19#include "chrome/browser/autocomplete/extension_app_provider.h"
20#include "chrome/browser/autocomplete/history_contents_provider.h"
21#include "chrome/browser/autocomplete/history_quick_provider.h"
22#include "chrome/browser/autocomplete/history_url_provider.h"
23#include "chrome/browser/autocomplete/keyword_provider.h"
24#include "chrome/browser/autocomplete/search_provider.h"
25#include "chrome/browser/bookmarks/bookmark_model.h"
26#include "chrome/browser/external_protocol_handler.h"
27#include "chrome/browser/net/url_fixer_upper.h"
28#include "chrome/browser/prefs/pref_service.h"
29#include "chrome/browser/profiles/profile.h"
30#include "chrome/browser/ui/webui/history_ui.h"
31#include "chrome/common/chrome_switches.h"
32#include "chrome/common/pref_names.h"
33#include "chrome/common/url_constants.h"
34#include "content/common/notification_service.h"
35#include "googleurl/src/gurl.h"
36#include "googleurl/src/url_canon_ip.h"
37#include "googleurl/src/url_util.h"
38#include "grit/generated_resources.h"
39#include "grit/theme_resources.h"
40#include "net/base/net_util.h"
41#include "net/base/registry_controlled_domain.h"
42#include "net/url_request/url_request.h"
43#include "ui/base/l10n/l10n_util.h"
44
45using base::TimeDelta;
46
47// AutocompleteInput ----------------------------------------------------------
48
49AutocompleteInput::AutocompleteInput()
50  : type_(INVALID),
51    initial_prevent_inline_autocomplete_(false),
52    prevent_inline_autocomplete_(false),
53    prefer_keyword_(false),
54    allow_exact_keyword_match_(true),
55    matches_requested_(ALL_MATCHES) {
56}
57
58AutocompleteInput::AutocompleteInput(const string16& text,
59                                     const string16& desired_tld,
60                                     bool prevent_inline_autocomplete,
61                                     bool prefer_keyword,
62                                     bool allow_exact_keyword_match,
63                                     MatchesRequested matches_requested)
64    : original_text_(text),
65      desired_tld_(desired_tld),
66      initial_prevent_inline_autocomplete_(prevent_inline_autocomplete),
67      prevent_inline_autocomplete_(prevent_inline_autocomplete),
68      prefer_keyword_(prefer_keyword),
69      allow_exact_keyword_match_(allow_exact_keyword_match),
70      matches_requested_(matches_requested) {
71  // Trim whitespace from edges of input; don't inline autocomplete if there
72  // was trailing whitespace.
73  if (TrimWhitespace(text, TRIM_ALL, &text_) & TRIM_TRAILING)
74    prevent_inline_autocomplete_ = true;
75
76  GURL canonicalized_url;
77  type_ = Parse(text_, desired_tld, &parts_, &scheme_, &canonicalized_url);
78
79  if (type_ == INVALID)
80    return;
81
82  if (((type_ == UNKNOWN) || (type_ == REQUESTED_URL) || (type_ == URL)) &&
83      canonicalized_url.is_valid() &&
84      (!canonicalized_url.IsStandard() || canonicalized_url.SchemeIsFile() ||
85       !canonicalized_url.host().empty()))
86    canonicalized_url_ = canonicalized_url;
87
88  RemoveForcedQueryStringIfNecessary(type_, &text_);
89}
90
91AutocompleteInput::~AutocompleteInput() {
92}
93
94// static
95void AutocompleteInput::RemoveForcedQueryStringIfNecessary(Type type,
96                                                           string16* text) {
97  if (type == FORCED_QUERY && !text->empty() && (*text)[0] == L'?')
98    text->erase(0, 1);
99}
100
101// static
102std::string AutocompleteInput::TypeToString(Type type) {
103  switch (type) {
104    case INVALID:       return "invalid";
105    case UNKNOWN:       return "unknown";
106    case REQUESTED_URL: return "requested-url";
107    case URL:           return "url";
108    case QUERY:         return "query";
109    case FORCED_QUERY:  return "forced-query";
110
111    default:
112      NOTREACHED();
113      return std::string();
114  }
115}
116
117// static
118AutocompleteInput::Type AutocompleteInput::Parse(
119    const string16& text,
120    const string16& desired_tld,
121    url_parse::Parsed* parts,
122    string16* scheme,
123    GURL* canonicalized_url) {
124  const size_t first_non_white = text.find_first_not_of(kWhitespaceUTF16, 0);
125  if (first_non_white == string16::npos)
126    return INVALID;  // All whitespace.
127
128  if (text.at(first_non_white) == L'?') {
129    // If the first non-whitespace character is a '?', we magically treat this
130    // as a query.
131    return FORCED_QUERY;
132  }
133
134  // Ask our parsing back-end to help us understand what the user typed.  We
135  // use the URLFixerUpper here because we want to be smart about what we
136  // consider a scheme.  For example, we shouldn't consider www.google.com:80
137  // to have a scheme.
138  url_parse::Parsed local_parts;
139  if (!parts)
140    parts = &local_parts;
141  const string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts));
142  if (scheme)
143    *scheme = parsed_scheme;
144  if (canonicalized_url) {
145    *canonicalized_url = URLFixerUpper::FixupURL(UTF16ToUTF8(text),
146                                                 UTF16ToUTF8(desired_tld));
147  }
148
149  if (LowerCaseEqualsASCII(parsed_scheme, chrome::kFileScheme)) {
150    // A user might or might not type a scheme when entering a file URL.  In
151    // either case, |parsed_scheme| will tell us that this is a file URL, but
152    // |parts->scheme| might be empty, e.g. if the user typed "C:\foo".
153    return URL;
154  }
155
156  // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it
157  // well enough that we can fall through to the heuristics below.  If it's
158  // something else, we can just determine our action based on what we do with
159  // any input of this scheme.  In theory we could do better with some schemes
160  // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that
161  // until I run into some cases that really need it.
162  if (parts->scheme.is_nonempty() &&
163      !LowerCaseEqualsASCII(parsed_scheme, chrome::kHttpScheme) &&
164      !LowerCaseEqualsASCII(parsed_scheme, chrome::kHttpsScheme)) {
165    // See if we know how to handle the URL internally.
166    if (net::URLRequest::IsHandledProtocol(UTF16ToASCII(parsed_scheme)))
167      return URL;
168
169    // There are also some schemes that we convert to other things before they
170    // reach the renderer or else the renderer handles internally without
171    // reaching the net::URLRequest logic.  We thus won't catch these above, but
172    // we should still claim to handle them.
173    if (LowerCaseEqualsASCII(parsed_scheme, chrome::kViewSourceScheme) ||
174        LowerCaseEqualsASCII(parsed_scheme, chrome::kJavaScriptScheme) ||
175        LowerCaseEqualsASCII(parsed_scheme, chrome::kDataScheme))
176      return URL;
177
178    // Finally, check and see if the user has explicitly opened this scheme as
179    // a URL before, or if the "scheme" is actually a username.  We need to do
180    // this last because some schemes (e.g. "javascript") may be treated as
181    // "blocked" by the external protocol handler because we don't want pages to
182    // open them, but users still can.
183    // TODO(viettrungluu): get rid of conversion.
184    ExternalProtocolHandler::BlockState block_state =
185        ExternalProtocolHandler::GetBlockState(UTF16ToUTF8(parsed_scheme));
186    switch (block_state) {
187      case ExternalProtocolHandler::DONT_BLOCK:
188        return URL;
189
190      case ExternalProtocolHandler::BLOCK:
191        // If we don't want the user to open the URL, don't let it be navigated
192        // to at all.
193        return QUERY;
194
195      default: {
196        // We don't know about this scheme.  It might be that the user typed a
197        // URL of the form "username:password@foo.com".
198        const string16 http_scheme_prefix =
199            ASCIIToUTF16(std::string(chrome::kHttpScheme) +
200                         chrome::kStandardSchemeSeparator);
201        url_parse::Parsed http_parts;
202        string16 http_scheme;
203        GURL http_canonicalized_url;
204        Type http_type = Parse(http_scheme_prefix + text, desired_tld,
205                               &http_parts, &http_scheme,
206                               &http_canonicalized_url);
207        DCHECK_EQ(std::string(chrome::kHttpScheme), UTF16ToUTF8(http_scheme));
208
209        if ((http_type == URL || http_type == REQUESTED_URL) &&
210            http_parts.username.is_nonempty() &&
211            http_parts.password.is_nonempty()) {
212          // Manually re-jigger the parsed parts to match |text| (without the
213          // http scheme added).
214          http_parts.scheme.reset();
215          url_parse::Component* components[] = {
216            &http_parts.username,
217            &http_parts.password,
218            &http_parts.host,
219            &http_parts.port,
220            &http_parts.path,
221            &http_parts.query,
222            &http_parts.ref,
223          };
224          for (size_t i = 0; i < arraysize(components); ++i) {
225            URLFixerUpper::OffsetComponent(
226                -static_cast<int>(http_scheme_prefix.size()), components[i]);
227          }
228
229          *parts = http_parts;
230          if (scheme)
231            scheme->clear();
232          if (canonicalized_url)
233            *canonicalized_url = http_canonicalized_url;
234
235          return http_type;
236        }
237
238        // We don't know about this scheme and it doesn't look like the user
239        // typed a username and password.  It's likely to be a search operator
240        // like "site:" or "link:".  We classify it as UNKNOWN so the user has
241        // the option of treating it as a URL if we're wrong.
242        // Note that SegmentURL() is smart so we aren't tricked by "c:\foo" or
243        // "www.example.com:81" in this case.
244        return UNKNOWN;
245      }
246    }
247  }
248
249  // Either the user didn't type a scheme, in which case we need to distinguish
250  // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which
251  // case we should reject invalid formulations.
252
253  // If we have an empty host it can't be a URL.
254  if (!parts->host.is_nonempty())
255    return QUERY;
256
257  // Likewise, the RCDS can reject certain obviously-invalid hosts.  (We also
258  // use the registry length later below.)
259  const string16 host(text.substr(parts->host.begin, parts->host.len));
260  const size_t registry_length =
261      net::RegistryControlledDomainService::GetRegistryLength(UTF16ToUTF8(host),
262                                                              false);
263  if (registry_length == std::string::npos) {
264    // Try to append the desired_tld.
265    if (!desired_tld.empty()) {
266      string16 host_with_tld(host);
267      if (host[host.length() - 1] != '.')
268        host_with_tld += '.';
269      host_with_tld += desired_tld;
270      if (net::RegistryControlledDomainService::GetRegistryLength(
271          UTF16ToUTF8(host_with_tld), false) != std::string::npos)
272        return REQUESTED_URL;  // Something like "99999999999" that looks like a
273                               // bad IP address, but becomes valid on attaching
274                               // a TLD.
275    }
276    return QUERY;  // Could be a broken IP address, etc.
277  }
278
279
280  // See if the hostname is valid.  While IE and GURL allow hostnames to contain
281  // many other characters (perhaps for weird intranet machines), it's extremely
282  // unlikely that a user would be trying to type those in for anything other
283  // than a search query.
284  url_canon::CanonHostInfo host_info;
285  const std::string canonicalized_host(net::CanonicalizeHost(UTF16ToUTF8(host),
286                                                             &host_info));
287  if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) &&
288      !net::IsCanonicalizedHostCompliant(canonicalized_host,
289                                         UTF16ToUTF8(desired_tld))) {
290    // Invalid hostname.  There are several possible cases:
291    // * Our checker is too strict and the user pasted in a real-world URL
292    //   that's "invalid" but resolves.  To catch these, we return UNKNOWN when
293    //   the user explicitly typed a scheme, so we'll still search by default
294    //   but we'll show the accidental search infobar if necessary.
295    // * The user is typing a multi-word query.  If we see a space anywhere in
296    //   the hostname we assume this is a search and return QUERY.
297    // * Our checker is too strict and the user is typing a real-world hostname
298    //   that's "invalid" but resolves.  We return UNKNOWN if the TLD is known.
299    //   Note that we explicitly excluded hosts with spaces above so that
300    //   "toys at amazon.com" will be treated as a search.
301    // * The user is typing some garbage string.  Return QUERY.
302    //
303    // Thus we fall down in the following cases:
304    // * Trying to navigate to a hostname with spaces
305    // * Trying to navigate to a hostname with invalid characters and an unknown
306    //   TLD
307    // These are rare, though probably possible in intranets.
308    return (parts->scheme.is_nonempty() ||
309           ((registry_length != 0) && (host.find(' ') == string16::npos))) ?
310        UNKNOWN : QUERY;
311  }
312
313  // A port number is a good indicator that this is a URL.  However, it might
314  // also be a query like "1.66:1" that looks kind of like an IP address and
315  // port number. So here we only check for "port numbers" that are illegal and
316  // thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save
317  // handling legal port numbers until after the "IP address" determination
318  // below.
319  if (parts->port.is_nonempty()) {
320    int port;
321    if (!base::StringToInt(text.substr(parts->port.begin, parts->port.len),
322                           &port) ||
323        (port < 0) || (port > 65535))
324      return QUERY;
325  }
326
327  // Now that we've ruled out all schemes other than http or https and done a
328  // little more sanity checking, the presence of a scheme means this is likely
329  // a URL.
330  if (parts->scheme.is_nonempty())
331    return URL;
332
333  // See if the host is an IP address.
334  if (host_info.family == url_canon::CanonHostInfo::IPV4) {
335    // If the user originally typed a host that looks like an IP address (a
336    // dotted quad), they probably want to open it.  If the original input was
337    // something else (like a single number), they probably wanted to search for
338    // it, unless they explicitly typed a scheme.  This is true even if the URL
339    // appears to have a path: "1.2/45" is more likely a search (for the answer
340    // to a math problem) than a URL.
341    if (host_info.num_ipv4_components == 4)
342      return URL;
343    return desired_tld.empty() ? UNKNOWN : REQUESTED_URL;
344  }
345  if (host_info.family == url_canon::CanonHostInfo::IPV6)
346    return URL;
347
348  // Now that we've ruled out invalid ports and queries that look like they have
349  // a port, the presence of a port means this is likely a URL.
350  if (parts->port.is_nonempty())
351    return URL;
352
353  // Presence of a password means this is likely a URL.  Note that unless the
354  // user has typed an explicit "http://" or similar, we'll probably think that
355  // the username is some unknown scheme, and bail out in the scheme-handling
356  // code above.
357  if (parts->password.is_nonempty())
358    return URL;
359
360  // The host doesn't look like a number, so see if the user's given us a path.
361  if (parts->path.is_nonempty()) {
362    // Most inputs with paths are URLs, even ones without known registries (e.g.
363    // intranet URLs).  However, if there's no known registry and the path has
364    // a space, this is more likely a query with a slash in the first term
365    // (e.g. "ps/2 games") than a URL.  We can still open URLs with spaces in
366    // the path by escaping the space, and we will still inline autocomplete
367    // them if users have typed them in the past, but we default to searching
368    // since that's the common case.
369    return ((registry_length == 0) &&
370            (text.substr(parts->path.begin, parts->path.len).find(' ') !=
371                string16::npos)) ? UNKNOWN : URL;
372  }
373
374  // If we reach here with a username, our input looks like "user@host".
375  // Because there is no scheme explicitly specified, we think this is more
376  // likely an email address than an HTTP auth attempt.  Hence, we search by
377  // default and let users correct us on a case-by-case basis.
378  if (parts->username.is_nonempty())
379    return UNKNOWN;
380
381  // We have a bare host string.  If it has a known TLD, it's probably a URL.
382  if (registry_length != 0)
383    return URL;
384
385  // No TLD that we know about.  This could be:
386  // * A string that the user wishes to add a desired_tld to to get a URL.  If
387  //   we reach this point, we know there's no known TLD on the string, so the
388  //   fixup code will be willing to add one; thus this is a URL.
389  // * A single word "foo"; possibly an intranet site, but more likely a search.
390  //   This is ideally an UNKNOWN, and we can let the Alternate Nav URL code
391  //   catch our mistakes.
392  // * A URL with a valid TLD we don't know about yet.  If e.g. a registrar adds
393  //   "xxx" as a TLD, then until we add it to our data file, Chrome won't know
394  //   "foo.xxx" is a real URL.  So ideally this is a URL, but we can't really
395  //   distinguish this case from:
396  // * A "URL-like" string that's not really a URL (like
397  //   "browser.tabs.closeButtons" or "java.awt.event.*").  This is ideally a
398  //   QUERY.  Since the above case and this one are indistinguishable, and this
399  //   case is likely to be much more common, just say these are both UNKNOWN,
400  //   which should default to the right thing and let users correct us on a
401  //   case-by-case basis.
402  return desired_tld.empty() ? UNKNOWN : REQUESTED_URL;
403}
404
405// static
406void AutocompleteInput::ParseForEmphasizeComponents(
407    const string16& text,
408    const string16& desired_tld,
409    url_parse::Component* scheme,
410    url_parse::Component* host) {
411  url_parse::Parsed parts;
412  string16 scheme_str;
413  Parse(text, desired_tld, &parts, &scheme_str, NULL);
414
415  *scheme = parts.scheme;
416  *host = parts.host;
417
418  int after_scheme_and_colon = parts.scheme.end() + 1;
419  // For the view-source scheme, we should emphasize the scheme and host of the
420  // URL qualified by the view-source prefix.
421  if (LowerCaseEqualsASCII(scheme_str, chrome::kViewSourceScheme) &&
422      (static_cast<int>(text.length()) > after_scheme_and_colon)) {
423    // Obtain the URL prefixed by view-source and parse it.
424    string16 real_url(text.substr(after_scheme_and_colon));
425    url_parse::Parsed real_parts;
426    AutocompleteInput::Parse(real_url, desired_tld, &real_parts, NULL, NULL);
427    if (real_parts.scheme.is_nonempty() || real_parts.host.is_nonempty()) {
428      if (real_parts.scheme.is_nonempty()) {
429        *scheme = url_parse::Component(
430            after_scheme_and_colon + real_parts.scheme.begin,
431            real_parts.scheme.len);
432      } else {
433        scheme->reset();
434      }
435      if (real_parts.host.is_nonempty()) {
436        *host = url_parse::Component(
437            after_scheme_and_colon + real_parts.host.begin,
438            real_parts.host.len);
439      } else {
440        host->reset();
441      }
442    }
443  }
444}
445
446// static
447string16 AutocompleteInput::FormattedStringWithEquivalentMeaning(
448    const GURL& url,
449    const string16& formatted_url) {
450  if (!net::CanStripTrailingSlash(url))
451    return formatted_url;
452  const string16 url_with_path(formatted_url + char16('/'));
453  return (AutocompleteInput::Parse(formatted_url, string16(), NULL, NULL,
454                                   NULL) ==
455          AutocompleteInput::Parse(url_with_path, string16(), NULL, NULL,
456                                   NULL)) ?
457      formatted_url : url_with_path;
458}
459
460
461bool AutocompleteInput::Equals(const AutocompleteInput& other) const {
462  return (text_ == other.text_) &&
463         (type_ == other.type_) &&
464         (desired_tld_ == other.desired_tld_) &&
465         (scheme_ == other.scheme_) &&
466         (prevent_inline_autocomplete_ == other.prevent_inline_autocomplete_) &&
467         (prefer_keyword_ == other.prefer_keyword_) &&
468         (matches_requested_ == other.matches_requested_);
469}
470
471void AutocompleteInput::Clear() {
472  text_.clear();
473  type_ = INVALID;
474  parts_ = url_parse::Parsed();
475  scheme_.clear();
476  desired_tld_.clear();
477  prevent_inline_autocomplete_ = false;
478  prefer_keyword_ = false;
479}
480
481// AutocompleteProvider -------------------------------------------------------
482
483// static
484const size_t AutocompleteProvider::kMaxMatches = 3;
485
486AutocompleteProvider::ACProviderListener::~ACProviderListener() {
487}
488
489AutocompleteProvider::AutocompleteProvider(ACProviderListener* listener,
490                                           Profile* profile,
491                                           const char* name)
492    : profile_(profile),
493      listener_(listener),
494      done_(true),
495      name_(name) {
496}
497
498void AutocompleteProvider::SetProfile(Profile* profile) {
499  DCHECK(profile);
500  DCHECK(done_);  // The controller should have already stopped us.
501  profile_ = profile;
502}
503
504void AutocompleteProvider::Stop() {
505  done_ = true;
506}
507
508void AutocompleteProvider::DeleteMatch(const AutocompleteMatch& match) {
509}
510
511AutocompleteProvider::~AutocompleteProvider() {
512  Stop();
513}
514
515// static
516bool AutocompleteProvider::HasHTTPScheme(const string16& input) {
517  std::string utf8_input(UTF16ToUTF8(input));
518  url_parse::Component scheme;
519  if (url_util::FindAndCompareScheme(utf8_input, chrome::kViewSourceScheme,
520                                     &scheme))
521    utf8_input.erase(0, scheme.end() + 1);
522  return url_util::FindAndCompareScheme(utf8_input, chrome::kHttpScheme, NULL);
523}
524
525void AutocompleteProvider::UpdateStarredStateOfMatches() {
526  if (matches_.empty())
527    return;
528
529  if (!profile_)
530    return;
531  BookmarkModel* bookmark_model = profile_->GetBookmarkModel();
532  if (!bookmark_model || !bookmark_model->IsLoaded())
533    return;
534
535  for (ACMatches::iterator i = matches_.begin(); i != matches_.end(); ++i)
536    i->starred = bookmark_model->IsBookmarked(GURL(i->destination_url));
537}
538
539string16 AutocompleteProvider::StringForURLDisplay(const GURL& url,
540                                                   bool check_accept_lang,
541                                                   bool trim_http) const {
542  std::string languages = (check_accept_lang && profile_) ?
543      profile_->GetPrefs()->GetString(prefs::kAcceptLanguages) : std::string();
544  return net::FormatUrl(
545      url,
546      languages,
547      net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP),
548      UnescapeRule::SPACES, NULL, NULL, NULL);
549}
550
551// AutocompleteResult ---------------------------------------------------------
552
553// static
554const size_t AutocompleteResult::kMaxMatches = 6;
555
556void AutocompleteResult::Selection::Clear() {
557  destination_url = GURL();
558  provider_affinity = NULL;
559  is_history_what_you_typed_match = false;
560}
561
562AutocompleteResult::AutocompleteResult() {
563  // Reserve space for the max number of matches we'll show.
564  matches_.reserve(kMaxMatches);
565
566  // It's probably safe to do this in the initializer list, but there's little
567  // penalty to doing it here and it ensures our object is fully constructed
568  // before calling member functions.
569  default_match_ = end();
570}
571
572AutocompleteResult::~AutocompleteResult() {}
573
574void AutocompleteResult::CopyFrom(const AutocompleteResult& rhs) {
575  if (this == &rhs)
576    return;
577
578  matches_ = rhs.matches_;
579  // Careful!  You can't just copy iterators from another container, you have to
580  // reconstruct them.
581  default_match_ = (rhs.default_match_ == rhs.end()) ?
582      end() : (begin() + (rhs.default_match_ - rhs.begin()));
583
584  alternate_nav_url_ = rhs.alternate_nav_url_;
585}
586
587void AutocompleteResult::CopyOldMatches(const AutocompleteInput& input,
588                                        const AutocompleteResult& old_matches) {
589  if (old_matches.empty())
590    return;
591
592  if (empty()) {
593    // If we've got no matches we can copy everything from the last result.
594    CopyFrom(old_matches);
595    for (ACMatches::iterator i = begin(); i != end(); ++i)
596      i->from_previous = true;
597    return;
598  }
599
600  // In hopes of providing a stable popup we try to keep the number of matches
601  // per provider consistent. Other schemes (such as blindly copying the most
602  // relevant matches) typically result in many successive 'What You Typed'
603  // results filling all the matches, which looks awful.
604  //
605  // Instead of starting with the current matches and then adding old matches
606  // until we hit our overall limit, we copy enough old matches so that each
607  // provider has at least as many as before, and then use SortAndCull() to
608  // clamp globally. This way, old high-relevance matches will starve new
609  // low-relevance matches, under the assumption that the new matches will
610  // ultimately be similar.  If the assumption holds, this prevents seeing the
611  // new low-relevance match appear and then quickly get pushed off the bottom;
612  // if it doesn't, then once the providers are done and we expire the old
613  // matches, the new ones will all become visible, so we won't have lost
614  // anything permanently.
615  ProviderToMatches matches_per_provider, old_matches_per_provider;
616  BuildProviderToMatches(&matches_per_provider);
617  old_matches.BuildProviderToMatches(&old_matches_per_provider);
618  for (ProviderToMatches::const_iterator i = old_matches_per_provider.begin();
619       i != old_matches_per_provider.end(); ++i) {
620    MergeMatchesByProvider(i->second, matches_per_provider[i->first]);
621  }
622
623  SortAndCull(input);
624}
625
626void AutocompleteResult::AppendMatches(const ACMatches& matches) {
627  std::copy(matches.begin(), matches.end(), std::back_inserter(matches_));
628  default_match_ = end();
629  alternate_nav_url_ = GURL();
630}
631
632void AutocompleteResult::AddMatch(const AutocompleteMatch& match) {
633  DCHECK(default_match_ != end());
634  ACMatches::iterator insertion_point =
635      std::upper_bound(begin(), end(), match, &AutocompleteMatch::MoreRelevant);
636  ACMatches::iterator::difference_type default_offset =
637      default_match_ - begin();
638  if ((insertion_point - begin()) <= default_offset)
639    ++default_offset;
640  matches_.insert(insertion_point, match);
641  default_match_ = begin() + default_offset;
642}
643
644void AutocompleteResult::SortAndCull(const AutocompleteInput& input) {
645  // Remove duplicates.
646  std::sort(matches_.begin(), matches_.end(),
647            &AutocompleteMatch::DestinationSortFunc);
648  matches_.erase(std::unique(matches_.begin(), matches_.end(),
649                             &AutocompleteMatch::DestinationsEqual),
650                 matches_.end());
651
652  // Sort and trim to the most relevant kMaxMatches matches.
653  const size_t num_matches = std::min(kMaxMatches, matches_.size());
654  std::partial_sort(matches_.begin(), matches_.begin() + num_matches,
655                    matches_.end(), &AutocompleteMatch::MoreRelevant);
656  matches_.resize(num_matches);
657
658  default_match_ = begin();
659
660  // Set the alternate nav URL.
661  alternate_nav_url_ = GURL();
662  if (((input.type() == AutocompleteInput::UNKNOWN) ||
663       (input.type() == AutocompleteInput::REQUESTED_URL)) &&
664      (default_match_ != end()) &&
665      (default_match_->transition != PageTransition::TYPED) &&
666      (default_match_->transition != PageTransition::KEYWORD) &&
667      (input.canonicalized_url() != default_match_->destination_url))
668    alternate_nav_url_ = input.canonicalized_url();
669}
670
671bool AutocompleteResult::HasCopiedMatches() const {
672  for (ACMatches::const_iterator i = begin(); i != end(); ++i) {
673    if (i->from_previous)
674      return true;
675  }
676  return false;
677}
678
679size_t AutocompleteResult::size() const {
680  return matches_.size();
681}
682
683bool AutocompleteResult::empty() const {
684  return matches_.empty();
685}
686
687AutocompleteResult::const_iterator AutocompleteResult::begin() const {
688  return matches_.begin();
689}
690
691AutocompleteResult::iterator AutocompleteResult::begin() {
692  return matches_.begin();
693}
694
695AutocompleteResult::const_iterator AutocompleteResult::end() const {
696  return matches_.end();
697}
698
699AutocompleteResult::iterator AutocompleteResult::end() {
700  return matches_.end();
701}
702
703// Returns the match at the given index.
704const AutocompleteMatch& AutocompleteResult::match_at(size_t index) const {
705  DCHECK(index < matches_.size());
706  return matches_[index];
707}
708
709void AutocompleteResult::Reset() {
710  matches_.clear();
711  default_match_ = end();
712}
713
714void AutocompleteResult::Swap(AutocompleteResult* other) {
715  const size_t default_match_offset = default_match_ - begin();
716  const size_t other_default_match_offset =
717      other->default_match_ - other->begin();
718  matches_.swap(other->matches_);
719  default_match_ = begin() + other_default_match_offset;
720  other->default_match_ = other->begin() + default_match_offset;
721  alternate_nav_url_.Swap(&(other->alternate_nav_url_));
722}
723
724#ifndef NDEBUG
725void AutocompleteResult::Validate() const {
726  for (const_iterator i(begin()); i != end(); ++i)
727    i->Validate();
728}
729#endif
730
731void AutocompleteResult::BuildProviderToMatches(
732    ProviderToMatches* provider_to_matches) const {
733  for (ACMatches::const_iterator i = begin(); i != end(); ++i)
734    (*provider_to_matches)[i->provider].push_back(*i);
735}
736
737// static
738bool AutocompleteResult::HasMatchByDestination(const AutocompleteMatch& match,
739                                               const ACMatches& matches) {
740  for (ACMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) {
741    if (i->destination_url == match.destination_url)
742      return true;
743  }
744  return false;
745}
746
747void AutocompleteResult::MergeMatchesByProvider(const ACMatches& old_matches,
748                                                const ACMatches& new_matches) {
749  if (new_matches.size() >= old_matches.size())
750    return;
751
752  size_t delta = old_matches.size() - new_matches.size();
753  const int max_relevance = (new_matches.empty() ?
754      matches_.front().relevance : new_matches[0].relevance) - 1;
755  // Because the goal is a visibly-stable popup, rather than one that preserves
756  // the highest-relevance matches, we copy in the lowest-relevance matches
757  // first. This means that within each provider's "group" of matches, any
758  // synchronous matches (which tend to have the highest scores) will
759  // "overwrite" the initial matches from that provider's previous results,
760  // minimally disturbing the rest of the matches.
761  for (ACMatches::const_reverse_iterator i = old_matches.rbegin();
762       i != old_matches.rend() && delta > 0; ++i) {
763    if (!HasMatchByDestination(*i, new_matches)) {
764      AutocompleteMatch match = *i;
765      match.relevance = std::min(max_relevance, match.relevance);
766      match.from_previous = true;
767      AddMatch(match);
768      delta--;
769    }
770  }
771}
772
773// AutocompleteController -----------------------------------------------------
774
775const int AutocompleteController::kNoItemSelected = -1;
776
777// Amount of time (in ms) between when the user stops typing and when we remove
778// any copied entries. We do this from the time the user stopped typing as some
779// providers (such as SearchProvider) wait for the user to stop typing before
780// they initiate a query.
781static const int kExpireTimeMS = 500;
782
783AutocompleteController::AutocompleteController(
784    Profile* profile,
785    AutocompleteControllerDelegate* delegate)
786    : delegate_(delegate),
787      done_(true),
788      in_start_(false) {
789  search_provider_ = new SearchProvider(this, profile);
790  providers_.push_back(search_provider_);
791  if (CommandLine::ForCurrentProcess()->HasSwitch(
792          switches::kEnableHistoryQuickProvider) &&
793      !CommandLine::ForCurrentProcess()->HasSwitch(
794          switches::kDisableHistoryQuickProvider))
795    providers_.push_back(new HistoryQuickProvider(this, profile));
796  if (!CommandLine::ForCurrentProcess()->HasSwitch(
797      switches::kDisableHistoryURLProvider))
798    providers_.push_back(new HistoryURLProvider(this, profile));
799  providers_.push_back(new KeywordProvider(this, profile));
800  providers_.push_back(new HistoryContentsProvider(this, profile));
801  providers_.push_back(new BuiltinProvider(this, profile));
802  providers_.push_back(new ExtensionAppProvider(this, profile));
803  for (ACProviders::iterator i(providers_.begin()); i != providers_.end(); ++i)
804    (*i)->AddRef();
805}
806
807AutocompleteController::~AutocompleteController() {
808  // The providers may have tasks outstanding that hold refs to them.  We need
809  // to ensure they won't call us back if they outlive us.  (Practically,
810  // calling Stop() should also cancel those tasks and make it so that we hold
811  // the only refs.)  We also don't want to bother notifying anyone of our
812  // result changes here, because the notification observer is in the midst of
813  // shutdown too, so we don't ask Stop() to clear |result_| (and notify).
814  result_.Reset();  // Not really necessary.
815  Stop(false);
816
817  for (ACProviders::iterator i(providers_.begin()); i != providers_.end(); ++i)
818    (*i)->Release();
819
820  providers_.clear();  // Not really necessary.
821}
822
823void AutocompleteController::SetProfile(Profile* profile) {
824  Stop(true);
825  for (ACProviders::iterator i(providers_.begin()); i != providers_.end(); ++i)
826    (*i)->SetProfile(profile);
827  input_.Clear();  // Ensure we don't try to do a "minimal_changes" query on a
828                   // different profile.
829}
830
831void AutocompleteController::Start(
832    const string16& text,
833    const string16& desired_tld,
834    bool prevent_inline_autocomplete,
835    bool prefer_keyword,
836    bool allow_exact_keyword_match,
837    AutocompleteInput::MatchesRequested matches_requested) {
838  const string16 old_input_text(input_.text());
839  const AutocompleteInput::MatchesRequested old_matches_requested =
840      input_.matches_requested();
841  input_ = AutocompleteInput(text, desired_tld, prevent_inline_autocomplete,
842      prefer_keyword, allow_exact_keyword_match, matches_requested);
843
844  // See if we can avoid rerunning autocomplete when the query hasn't changed
845  // much.  When the user presses or releases the ctrl key, the desired_tld
846  // changes, and when the user finishes an IME composition, inline autocomplete
847  // may no longer be prevented.  In both these cases the text itself hasn't
848  // changed since the last query, and some providers can do much less work (and
849  // get matches back more quickly).  Taking advantage of this reduces flicker.
850  //
851  // NOTE: This comes after constructing |input_| above since that construction
852  // can change the text string (e.g. by stripping off a leading '?').
853  const bool minimal_changes = (input_.text() == old_input_text) &&
854      (input_.matches_requested() == old_matches_requested);
855
856  expire_timer_.Stop();
857
858  // Start the new query.
859  in_start_ = true;
860  base::TimeTicks start_time = base::TimeTicks::Now();
861  for (ACProviders::iterator i(providers_.begin()); i != providers_.end();
862       ++i) {
863    (*i)->Start(input_, minimal_changes);
864    if (matches_requested != AutocompleteInput::ALL_MATCHES)
865      DCHECK((*i)->done());
866  }
867  if (matches_requested == AutocompleteInput::ALL_MATCHES && text.size() < 6) {
868    base::TimeTicks end_time = base::TimeTicks::Now();
869    std::string name = "Omnibox.QueryTime." + base::IntToString(text.size());
870    base::Histogram* counter = base::Histogram::FactoryGet(
871        name, 1, 1000, 50, base::Histogram::kUmaTargetedHistogramFlag);
872    counter->Add(static_cast<int>((end_time - start_time).InMilliseconds()));
873  }
874  in_start_ = false;
875  CheckIfDone();
876  UpdateResult(true);
877
878  if (!done_)
879    StartExpireTimer();
880}
881
882void AutocompleteController::Stop(bool clear_result) {
883  for (ACProviders::const_iterator i(providers_.begin()); i != providers_.end();
884       ++i) {
885    (*i)->Stop();
886  }
887
888  expire_timer_.Stop();
889  done_ = true;
890  if (clear_result && !result_.empty()) {
891    result_.Reset();
892    // NOTE: We pass in false since we're trying to only clear the popup, not
893    // touch the edit... this is all a mess and should be cleaned up :(
894    NotifyChanged(false);
895  }
896}
897
898void AutocompleteController::DeleteMatch(const AutocompleteMatch& match) {
899  DCHECK(match.deletable);
900  match.provider->DeleteMatch(match);  // This may synchronously call back to
901                                       // OnProviderUpdate().
902  // If DeleteMatch resulted in a callback to OnProviderUpdate and we're
903  // not done, we might attempt to redisplay the deleted match. Make sure
904  // we aren't displaying it by removing any old entries.
905  ExpireCopiedEntries();
906}
907
908void AutocompleteController::ExpireCopiedEntries() {
909  // Clear out the results. This ensures no results from the previous result set
910  // are copied over.
911  result_.Reset();
912  // We allow matches from the previous result set to starve out matches from
913  // the new result set. This means in order to expire matches we have to query
914  // the providers again.
915  UpdateResult(false);
916}
917
918void AutocompleteController::OnProviderUpdate(bool updated_matches) {
919  CheckIfDone();
920  // Multiple providers may provide synchronous results, so we only update the
921  // results if we're not in Start().
922  if (!in_start_ && (updated_matches || done_))
923    UpdateResult(false);
924}
925
926void AutocompleteController::UpdateResult(bool is_synchronous_pass) {
927  AutocompleteResult last_result;
928  last_result.Swap(&result_);
929
930  for (ACProviders::const_iterator i(providers_.begin()); i != providers_.end();
931       ++i)
932    result_.AppendMatches((*i)->matches());
933
934  // Sort the matches and trim to a small number of "best" matches.
935  result_.SortAndCull(input_);
936
937  // Need to validate before invoking CopyOldMatches as the old matches are not
938  // valid against the current input.
939#ifndef NDEBUG
940  result_.Validate();
941#endif
942
943  if (!done_) {
944    // This conditional needs to match the conditional in Start that invokes
945    // StartExpireTimer.
946    result_.CopyOldMatches(input_, last_result);
947  }
948
949  bool notify_default_match = is_synchronous_pass;
950  if (!is_synchronous_pass) {
951    const bool last_default_was_valid =
952        last_result.default_match() != last_result.end();
953    const bool default_is_valid = result_.default_match() != result_.end();
954    // We've gotten async results. Send notification that the default match
955    // updated if fill_into_edit differs. We don't check the URL as that may
956    // change for the default match even though the fill into edit hasn't
957    // changed (see SearchProvider for one case of this).
958    notify_default_match =
959        (last_default_was_valid != default_is_valid) ||
960        (default_is_valid &&
961         (result_.default_match()->fill_into_edit !=
962          last_result.default_match()->fill_into_edit));
963  }
964
965  NotifyChanged(notify_default_match);
966}
967
968void AutocompleteController::NotifyChanged(bool notify_default_match) {
969  if (delegate_)
970    delegate_->OnResultChanged(notify_default_match);
971  if (done_) {
972    NotificationService::current()->Notify(
973        NotificationType::AUTOCOMPLETE_CONTROLLER_RESULT_READY,
974        Source<AutocompleteController>(this),
975        NotificationService::NoDetails());
976  }
977}
978
979void AutocompleteController::CheckIfDone() {
980  for (ACProviders::const_iterator i(providers_.begin()); i != providers_.end();
981       ++i) {
982    if (!(*i)->done()) {
983      done_ = false;
984      return;
985    }
986  }
987  done_ = true;
988}
989
990void AutocompleteController::StartExpireTimer() {
991  if (result_.HasCopiedMatches())
992    expire_timer_.Start(base::TimeDelta::FromMilliseconds(kExpireTimeMS),
993                        this, &AutocompleteController::ExpireCopiedEntries);
994}
995