zero_suggest_provider.cc revision 4e180b6a0b4720a9b8e9e959a882386f690f08ff
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autocomplete/zero_suggest_provider.h"
6
7#include "base/callback.h"
8#include "base/i18n/case_conversion.h"
9#include "base/json/json_string_value_serializer.h"
10#include "base/metrics/histogram.h"
11#include "base/prefs/pref_service.h"
12#include "base/strings/string16.h"
13#include "base/strings/string_util.h"
14#include "base/strings/utf_string_conversions.h"
15#include "base/time/time.h"
16#include "chrome/browser/autocomplete/autocomplete_classifier.h"
17#include "chrome/browser/autocomplete/autocomplete_classifier_factory.h"
18#include "chrome/browser/autocomplete/autocomplete_input.h"
19#include "chrome/browser/autocomplete/autocomplete_match.h"
20#include "chrome/browser/autocomplete/autocomplete_provider_listener.h"
21#include "chrome/browser/autocomplete/history_url_provider.h"
22#include "chrome/browser/autocomplete/search_provider.h"
23#include "chrome/browser/autocomplete/url_prefix.h"
24#include "chrome/browser/google/google_util.h"
25#include "chrome/browser/history/history_types.h"
26#include "chrome/browser/history/top_sites.h"
27#include "chrome/browser/metrics/variations/variations_http_header_provider.h"
28#include "chrome/browser/omnibox/omnibox_field_trial.h"
29#include "chrome/browser/profiles/profile.h"
30#include "chrome/browser/search/search.h"
31#include "chrome/browser/search_engines/template_url_service.h"
32#include "chrome/browser/search_engines/template_url_service_factory.h"
33#include "chrome/browser/sync/profile_sync_service.h"
34#include "chrome/browser/sync/profile_sync_service_factory.h"
35#include "chrome/common/net/url_fixer_upper.h"
36#include "chrome/common/pref_names.h"
37#include "chrome/common/url_constants.h"
38#include "net/base/escape.h"
39#include "net/base/load_flags.h"
40#include "net/base/net_util.h"
41#include "net/http/http_request_headers.h"
42#include "net/http/http_response_headers.h"
43#include "net/url_request/url_fetcher.h"
44#include "net/url_request/url_request_status.h"
45#include "url/gurl.h"
46
47namespace {
48
49// TODO(hfung): The histogram code was copied and modified from
50// search_provider.cc.  Refactor and consolidate the code.
51// We keep track in a histogram how many suggest requests we send, how
52// many suggest requests we invalidate (e.g., due to a user typing
53// another character), and how many replies we receive.
54// *** ADD NEW ENUMS AFTER ALL PREVIOUSLY DEFINED ONES! ***
55//     (excluding the end-of-list enum value)
56// We do not want values of existing enums to change or else it screws
57// up the statistics.
58enum ZeroSuggestRequestsHistogramValue {
59  ZERO_SUGGEST_REQUEST_SENT = 1,
60  ZERO_SUGGEST_REQUEST_INVALIDATED,
61  ZERO_SUGGEST_REPLY_RECEIVED,
62  ZERO_SUGGEST_MAX_REQUEST_HISTOGRAM_VALUE
63};
64
65void LogOmniboxZeroSuggestRequest(
66    ZeroSuggestRequestsHistogramValue request_value) {
67  UMA_HISTOGRAM_ENUMERATION("Omnibox.ZeroSuggestRequests", request_value,
68                            ZERO_SUGGEST_MAX_REQUEST_HISTOGRAM_VALUE);
69}
70
71// The maximum relevance of the top match from this provider.
72const int kDefaultVerbatimZeroSuggestRelevance = 1300;
73
74// Relevance value to use if it was not set explicitly by the server.
75const int kDefaultZeroSuggestRelevance = 100;
76
77}  // namespace
78
79// static
80ZeroSuggestProvider* ZeroSuggestProvider::Create(
81    AutocompleteProviderListener* listener,
82    Profile* profile) {
83  return new ZeroSuggestProvider(listener, profile);
84}
85
86void ZeroSuggestProvider::Start(const AutocompleteInput& input,
87                                bool /*minimal_changes*/) {
88}
89
90void ZeroSuggestProvider::Stop(bool clear_cached_results) {
91  if (have_pending_request_)
92    LogOmniboxZeroSuggestRequest(ZERO_SUGGEST_REQUEST_INVALIDATED);
93  have_pending_request_ = false;
94  fetcher_.reset();
95  done_ = true;
96  if (clear_cached_results) {
97    query_matches_map_.clear();
98    navigation_results_.clear();
99    current_query_.clear();
100    matches_.clear();
101  }
102}
103
104void ZeroSuggestProvider::AddProviderInfo(ProvidersInfo* provider_info) const {
105  provider_info->push_back(metrics::OmniboxEventProto_ProviderInfo());
106  metrics::OmniboxEventProto_ProviderInfo& new_entry = provider_info->back();
107  new_entry.set_provider(AsOmniboxEventProviderType());
108  new_entry.set_provider_done(done_);
109  std::vector<uint32> field_trial_hashes;
110  OmniboxFieldTrial::GetActiveSuggestFieldTrialHashes(&field_trial_hashes);
111  for (size_t i = 0; i < field_trial_hashes.size(); ++i) {
112    if (field_trial_triggered_)
113      new_entry.mutable_field_trial_triggered()->Add(field_trial_hashes[i]);
114    if (field_trial_triggered_in_session_) {
115      new_entry.mutable_field_trial_triggered_in_session()->Add(
116          field_trial_hashes[i]);
117     }
118  }
119}
120
121void ZeroSuggestProvider::ResetSession() {
122  // The user has started editing in the omnibox, so leave
123  // |field_trial_triggered_in_session_| unchanged and set
124  // |field_trial_triggered_| to false since zero suggest is inactive now.
125  field_trial_triggered_ = false;
126  Stop(true);
127}
128
129void ZeroSuggestProvider::OnURLFetchComplete(const net::URLFetcher* source) {
130  have_pending_request_ = false;
131  LogOmniboxZeroSuggestRequest(ZERO_SUGGEST_REPLY_RECEIVED);
132
133  std::string json_data;
134  source->GetResponseAsString(&json_data);
135  const bool request_succeeded =
136      source->GetStatus().is_success() && source->GetResponseCode() == 200;
137
138  if (request_succeeded) {
139    JSONStringValueSerializer deserializer(json_data);
140    deserializer.set_allow_trailing_comma(true);
141    scoped_ptr<Value> data(deserializer.Deserialize(NULL, NULL));
142    if (data.get())
143      ParseSuggestResults(*data.get());
144  }
145  done_ = true;
146
147  ConvertResultsToAutocompleteMatches();
148  if (!matches_.empty())
149    listener_->OnProviderUpdate(true);
150}
151
152void ZeroSuggestProvider::StartZeroSuggest(
153    const GURL& url,
154    AutocompleteInput::PageClassification page_classification,
155    const string16& permanent_text) {
156  Stop(true);
157  field_trial_triggered_ = false;
158  field_trial_triggered_in_session_ = false;
159  if (!ShouldRunZeroSuggest(url, page_classification))
160    return;
161  verbatim_relevance_ = kDefaultVerbatimZeroSuggestRelevance;
162  done_ = false;
163  permanent_text_ = permanent_text;
164  current_query_ = url.spec();
165  current_page_classification_ = page_classification;
166  current_url_match_ = MatchForCurrentURL();
167  // TODO(jered): Consider adding locally-sourced zero-suggestions here too.
168  // These may be useful on the NTP or more relevant to the user than server
169  // suggestions, if based on local browsing history.
170  Run();
171}
172
173ZeroSuggestProvider::ZeroSuggestProvider(
174  AutocompleteProviderListener* listener,
175  Profile* profile)
176    : AutocompleteProvider(listener, profile,
177          AutocompleteProvider::TYPE_ZERO_SUGGEST),
178      template_url_service_(TemplateURLServiceFactory::GetForProfile(profile)),
179      have_pending_request_(false),
180      verbatim_relevance_(kDefaultVerbatimZeroSuggestRelevance),
181      field_trial_triggered_(false),
182      field_trial_triggered_in_session_(false),
183      weak_ptr_factory_(this) {
184}
185
186ZeroSuggestProvider::~ZeroSuggestProvider() {
187}
188
189bool ZeroSuggestProvider::ShouldRunZeroSuggest(
190    const GURL& url,
191    AutocompleteInput::PageClassification page_classification) const {
192  if (!ShouldSendURL(url, page_classification))
193    return false;
194
195  // Don't run if there's no profile or in incognito mode.
196  if (profile_ == NULL || profile_->IsOffTheRecord())
197    return false;
198
199  // Don't run if we can't get preferences or search suggest is not enabled.
200  PrefService* prefs = profile_->GetPrefs();
201  if (prefs == NULL || !prefs->GetBoolean(prefs::kSearchSuggestEnabled))
202    return false;
203
204  ProfileSyncService* service =
205      ProfileSyncServiceFactory::GetInstance()->GetForProfile(profile_);
206  browser_sync::SyncPrefs sync_prefs(prefs);
207
208  // ZeroSuggest requires sending the current URL to the suggest provider, so we
209  // only want to enable it if the user is willing to have this data sent.
210  // Because tab sync involves sending the same data, we currently use
211  // "tab sync is enabled and tab sync data is unencrypted" as a proxy for
212  // "the user is OK with sending this data".  We might someday want to change
213  // this to a standalone setting or part of some other explicit general opt-in.
214  if (!OmniboxFieldTrial::InZeroSuggestFieldTrial() ||
215      service == NULL ||
216      !service->IsSyncEnabledAndLoggedIn() ||
217      !sync_prefs.GetPreferredDataTypes(syncer::UserTypes()).Has(
218          syncer::PROXY_TABS) ||
219      service->GetEncryptedDataTypes().Has(syncer::SESSIONS)) {
220    return false;
221  }
222  return true;
223}
224
225bool ZeroSuggestProvider::ShouldSendURL(
226    const GURL& url,
227    AutocompleteInput::PageClassification page_classification) const {
228  if (!url.is_valid())
229    return false;
230
231  // TODO(hfung): Show Most Visited on NTP with appropriate verbatim
232  // description when the user actively focuses on the omnibox as discussed in
233  // crbug/305366 if Most Visited (or something similar) will launch.
234  if (page_classification ==
235      AutocompleteInput::INSTANT_NEW_TAB_PAGE_WITH_FAKEBOX_AS_STARTING_FOCUS ||
236      page_classification ==
237      AutocompleteInput::INSTANT_NEW_TAB_PAGE_WITH_OMNIBOX_AS_STARTING_FOCUS)
238    return false;
239
240  // Only allow HTTP URLs or Google HTTPS URLs (including Google search
241  // result pages).  For the latter case, Google was already sent the HTTPS
242  // URLs when requesting the page, so the information is just re-sent.
243  return (url.scheme() == content::kHttpScheme) ||
244      google_util::IsGoogleDomainUrl(url, google_util::ALLOW_SUBDOMAIN,
245                                     google_util::ALLOW_NON_STANDARD_PORTS);
246}
247
248void ZeroSuggestProvider::FillResults(
249    const Value& root_val,
250    int* verbatim_relevance,
251    SearchProvider::SuggestResults* suggest_results,
252    SearchProvider::NavigationResults* navigation_results) {
253  string16 query;
254  const ListValue* root_list = NULL;
255  const ListValue* results = NULL;
256  const ListValue* relevances = NULL;
257  // The response includes the query, which should be empty for ZeroSuggest
258  // responses.
259  if (!root_val.GetAsList(&root_list) || !root_list->GetString(0, &query) ||
260      (!query.empty()) || !root_list->GetList(1, &results))
261    return;
262
263  // 3rd element: Description list.
264  const ListValue* descriptions = NULL;
265  root_list->GetList(2, &descriptions);
266
267  // 4th element: Disregard the query URL list for now.
268
269  // Reset suggested relevance information from the provider.
270  *verbatim_relevance = kDefaultVerbatimZeroSuggestRelevance;
271
272  // 5th element: Optional key-value pairs from the Suggest server.
273  const ListValue* types = NULL;
274  const DictionaryValue* extras = NULL;
275  if (root_list->GetDictionary(4, &extras)) {
276    extras->GetList("google:suggesttype", &types);
277
278    // Discard this list if its size does not match that of the suggestions.
279    if (extras->GetList("google:suggestrelevance", &relevances) &&
280        relevances->GetSize() != results->GetSize())
281      relevances = NULL;
282    extras->GetInteger("google:verbatimrelevance", verbatim_relevance);
283
284    // Check if the active suggest field trial (if any) has triggered.
285    bool triggered = false;
286    extras->GetBoolean("google:fieldtrialtriggered", &triggered);
287    field_trial_triggered_ |= triggered;
288    field_trial_triggered_in_session_ |= triggered;
289  }
290
291  // Clear the previous results now that new results are available.
292  suggest_results->clear();
293  navigation_results->clear();
294
295  string16 result, title;
296  std::string type;
297  for (size_t index = 0; results->GetString(index, &result); ++index) {
298    // Google search may return empty suggestions for weird input characters,
299    // they make no sense at all and can cause problems in our code.
300    if (result.empty())
301      continue;
302
303    int relevance = kDefaultZeroSuggestRelevance;
304
305    // Apply valid suggested relevance scores; discard invalid lists.
306    if (relevances != NULL && !relevances->GetInteger(index, &relevance))
307      relevances = NULL;
308    if (types && types->GetString(index, &type) && (type == "NAVIGATION")) {
309      // Do not blindly trust the URL coming from the server to be valid.
310      GURL url(URLFixerUpper::FixupURL(UTF16ToUTF8(result), std::string()));
311      if (url.is_valid()) {
312        if (descriptions != NULL)
313          descriptions->GetString(index, &title);
314        navigation_results->push_back(SearchProvider::NavigationResult(
315            *this, url, title, false, relevance, relevances != NULL));
316      }
317    } else {
318      suggest_results->push_back(SearchProvider::SuggestResult(
319          result, false, relevance, relevances != NULL, false));
320    }
321  }
322}
323
324void ZeroSuggestProvider::AddSuggestResultsToMap(
325    const SearchProvider::SuggestResults& results,
326    const TemplateURL* template_url,
327    SearchProvider::MatchMap* map) {
328  for (size_t i = 0; i < results.size(); ++i) {
329    AddMatchToMap(results[i].relevance(), AutocompleteMatchType::SEARCH_SUGGEST,
330                  template_url, results[i].suggestion(), i, map);
331  }
332}
333
334void ZeroSuggestProvider::AddMatchToMap(int relevance,
335                                        AutocompleteMatch::Type type,
336                                        const TemplateURL* template_url,
337                                        const string16& query_string,
338                                        int accepted_suggestion,
339                                        SearchProvider::MatchMap* map) {
340  // Pass in query_string as the input_text since we don't want any bolding.
341  // TODO(samarth|melevin): use the actual omnibox margin here as well instead
342  // of passing in -1.
343  AutocompleteMatch match = SearchProvider::CreateSearchSuggestion(
344      this, relevance, type, template_url, query_string, query_string,
345      AutocompleteInput(), false, accepted_suggestion, -1, true);
346  if (!match.destination_url.is_valid())
347    return;
348
349  // Try to add |match| to |map|.  If a match for |query_string| is already in
350  // |map|, replace it if |match| is more relevant.
351  // NOTE: Keep this ToLower() call in sync with url_database.cc.
352  const std::pair<SearchProvider::MatchMap::iterator, bool> i(map->insert(
353      std::make_pair(base::i18n::ToLower(query_string), match)));
354  // NOTE: We purposefully do a direct relevance comparison here instead of
355  // using AutocompleteMatch::MoreRelevant(), so that we'll prefer "items added
356  // first" rather than "items alphabetically first" when the scores are equal.
357  // The only case this matters is when a user has results with the same score
358  // that differ only by capitalization; because the history system returns
359  // results sorted by recency, this means we'll pick the most recent such
360  // result even if the precision of our relevance score is too low to
361  // distinguish the two.
362  if (!i.second && (match.relevance > i.first->second.relevance))
363    i.first->second = match;
364}
365
366AutocompleteMatch ZeroSuggestProvider::NavigationToMatch(
367    const SearchProvider::NavigationResult& navigation) {
368  AutocompleteMatch match(this, navigation.relevance(), false,
369                          AutocompleteMatchType::NAVSUGGEST);
370  match.destination_url = navigation.url();
371
372  const std::string languages(
373      profile_->GetPrefs()->GetString(prefs::kAcceptLanguages));
374  match.contents = net::FormatUrl(navigation.url(), languages,
375      net::kFormatUrlOmitAll, net::UnescapeRule::SPACES, NULL, NULL, NULL);
376  match.fill_into_edit +=
377      AutocompleteInput::FormattedStringWithEquivalentMeaning(navigation.url(),
378          match.contents);
379
380  AutocompleteMatch::ClassifyLocationInString(string16::npos, 0,
381      match.contents.length(), ACMatchClassification::URL,
382      &match.contents_class);
383
384  match.description =
385      AutocompleteMatch::SanitizeString(navigation.description());
386  AutocompleteMatch::ClassifyLocationInString(string16::npos, 0,
387      match.description.length(), ACMatchClassification::NONE,
388      &match.description_class);
389  return match;
390}
391
392void ZeroSuggestProvider::Run() {
393  have_pending_request_ = false;
394  const int kFetcherID = 1;
395
396  const TemplateURL* default_provider =
397     template_url_service_->GetDefaultSearchProvider();
398  // TODO(hfung): Generalize if the default provider supports zero suggest.
399  // Only make the request if we know that the provider supports zero suggest
400  // (currently only the prepopulated Google provider).
401  if (default_provider == NULL || !default_provider->SupportsReplacement() ||
402      default_provider->prepopulate_id() != 1) {
403    Stop(true);
404    return;
405  }
406  string16 prefix;
407  TemplateURLRef::SearchTermsArgs search_term_args(prefix);
408  search_term_args.zero_prefix_url = current_query_;
409  std::string req_url = default_provider->suggestions_url_ref().
410      ReplaceSearchTerms(search_term_args);
411  GURL suggest_url(req_url);
412  // Make sure we are sending the suggest request through HTTPS.
413  if (!suggest_url.SchemeIs(content::kHttpsScheme)) {
414    Stop(true);
415    return;
416  }
417
418  fetcher_.reset(
419      net::URLFetcher::Create(kFetcherID,
420          suggest_url,
421          net::URLFetcher::GET, this));
422  fetcher_->SetRequestContext(profile_->GetRequestContext());
423  fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES);
424  // Add Chrome experiment state to the request headers.
425  net::HttpRequestHeaders headers;
426  chrome_variations::VariationsHttpHeaderProvider::GetInstance()->AppendHeaders(
427      fetcher_->GetOriginalURL(), profile_->IsOffTheRecord(), false, &headers);
428  fetcher_->SetExtraRequestHeaders(headers.ToString());
429
430  fetcher_->Start();
431
432  if (OmniboxFieldTrial::InZeroSuggestMostVisitedFieldTrial()) {
433    most_visited_urls_.clear();
434    history::TopSites* ts = profile_->GetTopSites();
435    if (ts) {
436      ts->GetMostVisitedURLs(
437          base::Bind(&ZeroSuggestProvider::OnMostVisitedUrlsAvailable,
438                     weak_ptr_factory_.GetWeakPtr()));
439    }
440  }
441  have_pending_request_ = true;
442  LogOmniboxZeroSuggestRequest(ZERO_SUGGEST_REQUEST_SENT);
443}
444
445void ZeroSuggestProvider::ParseSuggestResults(const Value& root_val) {
446  SearchProvider::SuggestResults suggest_results;
447  FillResults(root_val, &verbatim_relevance_,
448              &suggest_results, &navigation_results_);
449
450  query_matches_map_.clear();
451  AddSuggestResultsToMap(suggest_results,
452                         template_url_service_->GetDefaultSearchProvider(),
453                         &query_matches_map_);
454}
455
456void ZeroSuggestProvider::OnMostVisitedUrlsAvailable(
457    const history::MostVisitedURLList& urls) {
458  most_visited_urls_ = urls;
459}
460
461void ZeroSuggestProvider::ConvertResultsToAutocompleteMatches() {
462  matches_.clear();
463
464  const TemplateURL* default_provider =
465      template_url_service_->GetDefaultSearchProvider();
466  // Fail if we can't set the clickthrough URL for query suggestions.
467  if (default_provider == NULL || !default_provider->SupportsReplacement())
468    return;
469
470  const int num_query_results = query_matches_map_.size();
471  const int num_nav_results = navigation_results_.size();
472  const int num_results = num_query_results + num_nav_results;
473  UMA_HISTOGRAM_COUNTS("ZeroSuggest.QueryResults", num_query_results);
474  UMA_HISTOGRAM_COUNTS("ZeroSuggest.URLResults",  num_nav_results);
475  UMA_HISTOGRAM_COUNTS("ZeroSuggest.AllResults", num_results);
476
477  // Show Most Visited results after ZeroSuggest response is received.
478  if (OmniboxFieldTrial::InZeroSuggestMostVisitedFieldTrial()) {
479    if (!current_url_match_.destination_url.is_valid())
480      return;
481    matches_.push_back(current_url_match_);
482    int relevance = 600;
483    if (num_results > 0) {
484      UMA_HISTOGRAM_COUNTS(
485          "Omnibox.ZeroSuggest.MostVisitedResultsCounterfactual",
486          most_visited_urls_.size());
487    }
488    for (size_t i = 0; i < most_visited_urls_.size(); i++) {
489      const history::MostVisitedURL& url = most_visited_urls_[i];
490      SearchProvider::NavigationResult nav(*this, url.url, url.title, false,
491                                           relevance, true);
492      matches_.push_back(NavigationToMatch(nav));
493      --relevance;
494    }
495    return;
496  }
497
498  if (num_results == 0)
499    return;
500
501  // TODO(jered): Rip this out once the first match is decoupled from the
502  // current typing in the omnibox.
503  matches_.push_back(current_url_match_);
504
505  for (SearchProvider::MatchMap::const_iterator it(query_matches_map_.begin());
506       it != query_matches_map_.end(); ++it)
507    matches_.push_back(it->second);
508
509  for (SearchProvider::NavigationResults::const_iterator it(
510       navigation_results_.begin()); it != navigation_results_.end(); ++it)
511    matches_.push_back(NavigationToMatch(*it));
512}
513
514AutocompleteMatch ZeroSuggestProvider::MatchForCurrentURL() {
515  AutocompleteInput input(permanent_text_, string16::npos, string16(),
516                          GURL(current_query_), current_page_classification_,
517                          false, false, true, AutocompleteInput::ALL_MATCHES);
518
519  AutocompleteMatch match;
520  AutocompleteClassifierFactory::GetForProfile(profile_)->Classify(
521      permanent_text_, false, true, &match, NULL);
522  match.is_history_what_you_typed_match = false;
523  match.allowed_to_be_default_match = true;
524
525  // The placeholder suggestion for the current URL has high relevance so
526  // that it is in the first suggestion slot and inline autocompleted. It
527  // gets dropped as soon as the user types something.
528  match.relevance = verbatim_relevance_;
529
530  return match;
531}
532