1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/omnibox/search_suggestion_parser.h"
6
7#include "base/i18n/icu_string_conversions.h"
8#include "base/json/json_string_value_serializer.h"
9#include "base/json/json_writer.h"
10#include "base/logging.h"
11#include "base/strings/string_util.h"
12#include "base/strings/utf_string_conversions.h"
13#include "base/values.h"
14#include "components/omnibox/autocomplete_input.h"
15#include "components/omnibox/url_prefix.h"
16#include "components/url_fixer/url_fixer.h"
17#include "net/base/net_util.h"
18#include "net/http/http_response_headers.h"
19#include "net/url_request/url_fetcher.h"
20#include "url/url_constants.h"
21
22namespace {
23
24AutocompleteMatchType::Type GetAutocompleteMatchType(const std::string& type) {
25  if (type == "ENTITY")
26    return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY;
27  if (type == "INFINITE")
28    return AutocompleteMatchType::SEARCH_SUGGEST_INFINITE;
29  if (type == "PERSONALIZED_QUERY")
30    return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED;
31  if (type == "PROFILE")
32    return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE;
33  if (type == "NAVIGATION")
34    return AutocompleteMatchType::NAVSUGGEST;
35  if (type == "PERSONALIZED_NAVIGATION")
36    return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED;
37  return AutocompleteMatchType::SEARCH_SUGGEST;
38}
39
40}  // namespace
41
42// SearchSuggestionParser::Result ----------------------------------------------
43
44SearchSuggestionParser::Result::Result(bool from_keyword_provider,
45                                       int relevance,
46                                       bool relevance_from_server,
47                                       AutocompleteMatchType::Type type,
48                                       const std::string& deletion_url)
49    : from_keyword_provider_(from_keyword_provider),
50      type_(type),
51      relevance_(relevance),
52      relevance_from_server_(relevance_from_server),
53      received_after_last_keystroke_(true),
54      deletion_url_(deletion_url) {}
55
56SearchSuggestionParser::Result::~Result() {}
57
58// SearchSuggestionParser::SuggestResult ---------------------------------------
59
60SearchSuggestionParser::SuggestResult::SuggestResult(
61    const base::string16& suggestion,
62    AutocompleteMatchType::Type type,
63    const base::string16& match_contents,
64    const base::string16& match_contents_prefix,
65    const base::string16& annotation,
66    const base::string16& answer_contents,
67    const base::string16& answer_type,
68    const std::string& suggest_query_params,
69    const std::string& deletion_url,
70    bool from_keyword_provider,
71    int relevance,
72    bool relevance_from_server,
73    bool should_prefetch,
74    const base::string16& input_text)
75    : Result(from_keyword_provider,
76             relevance,
77             relevance_from_server,
78             type,
79             deletion_url),
80      suggestion_(suggestion),
81      match_contents_prefix_(match_contents_prefix),
82      annotation_(annotation),
83      suggest_query_params_(suggest_query_params),
84      answer_contents_(answer_contents),
85      answer_type_(answer_type),
86      should_prefetch_(should_prefetch) {
87  match_contents_ = match_contents;
88  DCHECK(!match_contents_.empty());
89  ClassifyMatchContents(true, input_text);
90}
91
92SearchSuggestionParser::SuggestResult::~SuggestResult() {}
93
94void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
95    const bool allow_bolding_all,
96    const base::string16& input_text) {
97  if (input_text.empty()) {
98    // In case of zero-suggest results, do not highlight matches.
99    match_contents_class_.push_back(
100        ACMatchClassification(0, ACMatchClassification::NONE));
101    return;
102  }
103
104  base::string16 lookup_text = input_text;
105  if (type_ == AutocompleteMatchType::SEARCH_SUGGEST_INFINITE) {
106    const size_t contents_index =
107        suggestion_.length() - match_contents_.length();
108    // Ensure the query starts with the input text, and ends with the match
109    // contents, and the input text has an overlap with contents.
110    if (StartsWith(suggestion_, input_text, true) &&
111        EndsWith(suggestion_, match_contents_, true) &&
112        (input_text.length() > contents_index)) {
113      lookup_text = input_text.substr(contents_index);
114    }
115  }
116  size_t lookup_position = match_contents_.find(lookup_text);
117  if (!allow_bolding_all && (lookup_position == base::string16::npos)) {
118    // Bail if the code below to update the bolding would bold the whole
119    // string.  Note that the string may already be entirely bolded; if
120    // so, leave it as is.
121    return;
122  }
123  match_contents_class_.clear();
124  // We do intra-string highlighting for suggestions - the suggested segment
125  // will be highlighted, e.g. for input_text = "you" the suggestion may be
126  // "youtube", so we'll bold the "tube" section: you*tube*.
127  if (input_text != match_contents_) {
128    if (lookup_position == base::string16::npos) {
129      // The input text is not a substring of the query string, e.g. input
130      // text is "slasdot" and the query string is "slashdot", so we bold the
131      // whole thing.
132      match_contents_class_.push_back(
133          ACMatchClassification(0, ACMatchClassification::MATCH));
134    } else {
135      // We don't iterate over the string here annotating all matches because
136      // it looks odd to have every occurrence of a substring that may be as
137      // short as a single character highlighted in a query suggestion result,
138      // e.g. for input text "s" and query string "southwest airlines", it
139      // looks odd if both the first and last s are highlighted.
140      if (lookup_position != 0) {
141        match_contents_class_.push_back(
142            ACMatchClassification(0, ACMatchClassification::MATCH));
143      }
144      match_contents_class_.push_back(
145          ACMatchClassification(lookup_position, ACMatchClassification::NONE));
146      size_t next_fragment_position = lookup_position + lookup_text.length();
147      if (next_fragment_position < match_contents_.length()) {
148        match_contents_class_.push_back(ACMatchClassification(
149            next_fragment_position, ACMatchClassification::MATCH));
150      }
151    }
152  } else {
153    // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either
154    // for the default provider or a keyword search provider.
155    match_contents_class_.push_back(
156        ACMatchClassification(0, ACMatchClassification::NONE));
157  }
158}
159
160int SearchSuggestionParser::SuggestResult::CalculateRelevance(
161    const AutocompleteInput& input,
162    bool keyword_provider_requested) const {
163  if (!from_keyword_provider_ && keyword_provider_requested)
164    return 100;
165  return ((input.type() == metrics::OmniboxInputType::URL) ? 300 : 600);
166}
167
168// SearchSuggestionParser::NavigationResult ------------------------------------
169
170SearchSuggestionParser::NavigationResult::NavigationResult(
171    const AutocompleteSchemeClassifier& scheme_classifier,
172    const GURL& url,
173    AutocompleteMatchType::Type type,
174    const base::string16& description,
175    const std::string& deletion_url,
176    bool from_keyword_provider,
177    int relevance,
178    bool relevance_from_server,
179    const base::string16& input_text,
180    const std::string& languages)
181    : Result(from_keyword_provider, relevance, relevance_from_server, type,
182             deletion_url),
183      url_(url),
184      formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning(
185          url, net::FormatUrl(url, languages,
186                              net::kFormatUrlOmitAll & ~net::kFormatUrlOmitHTTP,
187                              net::UnescapeRule::SPACES, NULL, NULL, NULL),
188          scheme_classifier)),
189      description_(description) {
190  DCHECK(url_.is_valid());
191  CalculateAndClassifyMatchContents(true, input_text, languages);
192}
193
194SearchSuggestionParser::NavigationResult::~NavigationResult() {}
195
196void
197SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents(
198    const bool allow_bolding_nothing,
199    const base::string16& input_text,
200    const std::string& languages) {
201  if (input_text.empty()) {
202    // In case of zero-suggest results, do not highlight matches.
203    match_contents_class_.push_back(
204        ACMatchClassification(0, ACMatchClassification::NONE));
205    return;
206  }
207
208  // First look for the user's input inside the formatted url as it would be
209  // without trimming the scheme, so we can find matches at the beginning of the
210  // scheme.
211  const URLPrefix* prefix =
212      URLPrefix::BestURLPrefix(formatted_url_, input_text);
213  size_t match_start = (prefix == NULL) ?
214      formatted_url_.find(input_text) : prefix->prefix.length();
215  bool trim_http = !AutocompleteInput::HasHTTPScheme(input_text) &&
216                   (!prefix || (match_start != 0));
217  const net::FormatUrlTypes format_types =
218      net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP);
219
220  base::string16 match_contents = net::FormatUrl(url_, languages, format_types,
221      net::UnescapeRule::SPACES, NULL, NULL, &match_start);
222  // If the first match in the untrimmed string was inside a scheme that we
223  // trimmed, look for a subsequent match.
224  if (match_start == base::string16::npos)
225    match_start = match_contents.find(input_text);
226  // Update |match_contents_| and |match_contents_class_| if it's allowed.
227  if (allow_bolding_nothing || (match_start != base::string16::npos)) {
228    match_contents_ = match_contents;
229    // Safe if |match_start| is npos; also safe if the input is longer than the
230    // remaining contents after |match_start|.
231    AutocompleteMatch::ClassifyLocationInString(match_start,
232        input_text.length(), match_contents_.length(),
233        ACMatchClassification::URL, &match_contents_class_);
234  }
235}
236
237int SearchSuggestionParser::NavigationResult::CalculateRelevance(
238    const AutocompleteInput& input,
239    bool keyword_provider_requested) const {
240  return (from_keyword_provider_ || !keyword_provider_requested) ? 800 : 150;
241}
242
243// SearchSuggestionParser::Results ---------------------------------------------
244
245SearchSuggestionParser::Results::Results()
246    : verbatim_relevance(-1),
247      field_trial_triggered(false),
248      relevances_from_server(false) {}
249
250SearchSuggestionParser::Results::~Results() {}
251
252void SearchSuggestionParser::Results::Clear() {
253  suggest_results.clear();
254  navigation_results.clear();
255  verbatim_relevance = -1;
256  metadata.clear();
257}
258
259bool SearchSuggestionParser::Results::HasServerProvidedScores() const {
260  if (verbatim_relevance >= 0)
261    return true;
262
263  // Right now either all results of one type will be server-scored or they will
264  // all be locally scored, but in case we change this later, we'll just check
265  // them all.
266  for (SuggestResults::const_iterator i(suggest_results.begin());
267       i != suggest_results.end(); ++i) {
268    if (i->relevance_from_server())
269      return true;
270  }
271  for (NavigationResults::const_iterator i(navigation_results.begin());
272       i != navigation_results.end(); ++i) {
273    if (i->relevance_from_server())
274      return true;
275  }
276
277  return false;
278}
279
280// SearchSuggestionParser ------------------------------------------------------
281
282// static
283std::string SearchSuggestionParser::ExtractJsonData(
284    const net::URLFetcher* source) {
285  const net::HttpResponseHeaders* const response_headers =
286      source->GetResponseHeaders();
287  std::string json_data;
288  source->GetResponseAsString(&json_data);
289
290  // JSON is supposed to be UTF-8, but some suggest service providers send
291  // JSON files in non-UTF-8 encodings.  The actual encoding is usually
292  // specified in the Content-Type header field.
293  if (response_headers) {
294    std::string charset;
295    if (response_headers->GetCharset(&charset)) {
296      base::string16 data_16;
297      // TODO(jungshik): Switch to CodePageToUTF8 after it's added.
298      if (base::CodepageToUTF16(json_data, charset.c_str(),
299                                base::OnStringConversionError::FAIL,
300                                &data_16))
301        json_data = base::UTF16ToUTF8(data_16);
302    }
303  }
304  return json_data;
305}
306
307// static
308scoped_ptr<base::Value> SearchSuggestionParser::DeserializeJsonData(
309    std::string json_data) {
310  // The JSON response should be an array.
311  for (size_t response_start_index = json_data.find("["), i = 0;
312       response_start_index != std::string::npos && i < 5;
313       response_start_index = json_data.find("[", 1), i++) {
314    // Remove any XSSI guards to allow for JSON parsing.
315    if (response_start_index > 0)
316      json_data.erase(0, response_start_index);
317
318    JSONStringValueSerializer deserializer(json_data);
319    deserializer.set_allow_trailing_comma(true);
320    int error_code = 0;
321    scoped_ptr<base::Value> data(deserializer.Deserialize(&error_code, NULL));
322    if (error_code == 0)
323      return data.Pass();
324  }
325  return scoped_ptr<base::Value>();
326}
327
328// static
329bool SearchSuggestionParser::ParseSuggestResults(
330    const base::Value& root_val,
331    const AutocompleteInput& input,
332    const AutocompleteSchemeClassifier& scheme_classifier,
333    int default_result_relevance,
334    const std::string& languages,
335    bool is_keyword_result,
336    Results* results) {
337  base::string16 query;
338  const base::ListValue* root_list = NULL;
339  const base::ListValue* results_list = NULL;
340
341  if (!root_val.GetAsList(&root_list) || !root_list->GetString(0, &query) ||
342      query != input.text() || !root_list->GetList(1, &results_list))
343    return false;
344
345  // 3rd element: Description list.
346  const base::ListValue* descriptions = NULL;
347  root_list->GetList(2, &descriptions);
348
349  // 4th element: Disregard the query URL list for now.
350
351  // Reset suggested relevance information.
352  results->verbatim_relevance = -1;
353
354  // 5th element: Optional key-value pairs from the Suggest server.
355  const base::ListValue* types = NULL;
356  const base::ListValue* relevances = NULL;
357  const base::ListValue* suggestion_details = NULL;
358  const base::DictionaryValue* extras = NULL;
359  int prefetch_index = -1;
360  if (root_list->GetDictionary(4, &extras)) {
361    extras->GetList("google:suggesttype", &types);
362
363    // Discard this list if its size does not match that of the suggestions.
364    if (extras->GetList("google:suggestrelevance", &relevances) &&
365        (relevances->GetSize() != results_list->GetSize()))
366      relevances = NULL;
367    extras->GetInteger("google:verbatimrelevance",
368                       &results->verbatim_relevance);
369
370    // Check if the active suggest field trial (if any) has triggered either
371    // for the default provider or keyword provider.
372    results->field_trial_triggered = false;
373    extras->GetBoolean("google:fieldtrialtriggered",
374                       &results->field_trial_triggered);
375
376    const base::DictionaryValue* client_data = NULL;
377    if (extras->GetDictionary("google:clientdata", &client_data) && client_data)
378      client_data->GetInteger("phi", &prefetch_index);
379
380    if (extras->GetList("google:suggestdetail", &suggestion_details) &&
381        suggestion_details->GetSize() != results_list->GetSize())
382      suggestion_details = NULL;
383
384    // Store the metadata that came with the response in case we need to pass it
385    // along with the prefetch query to Instant.
386    JSONStringValueSerializer json_serializer(&results->metadata);
387    json_serializer.Serialize(*extras);
388  }
389
390  // Clear the previous results now that new results are available.
391  results->suggest_results.clear();
392  results->navigation_results.clear();
393  results->answers_image_urls.clear();
394
395  base::string16 suggestion;
396  std::string type;
397  int relevance = default_result_relevance;
398  // Prohibit navsuggest in FORCED_QUERY mode.  Users wants queries, not URLs.
399  const bool allow_navsuggest =
400      input.type() != metrics::OmniboxInputType::FORCED_QUERY;
401  const base::string16& trimmed_input =
402      base::CollapseWhitespace(input.text(), false);
403  for (size_t index = 0; results_list->GetString(index, &suggestion); ++index) {
404    // Google search may return empty suggestions for weird input characters,
405    // they make no sense at all and can cause problems in our code.
406    if (suggestion.empty())
407      continue;
408
409    // Apply valid suggested relevance scores; discard invalid lists.
410    if (relevances != NULL && !relevances->GetInteger(index, &relevance))
411      relevances = NULL;
412    AutocompleteMatchType::Type match_type =
413        AutocompleteMatchType::SEARCH_SUGGEST;
414    if (types && types->GetString(index, &type))
415      match_type = GetAutocompleteMatchType(type);
416    const base::DictionaryValue* suggestion_detail = NULL;
417    std::string deletion_url;
418
419    if (suggestion_details &&
420        suggestion_details->GetDictionary(index, &suggestion_detail))
421      suggestion_detail->GetString("du", &deletion_url);
422
423    if ((match_type == AutocompleteMatchType::NAVSUGGEST) ||
424        (match_type == AutocompleteMatchType::NAVSUGGEST_PERSONALIZED)) {
425      // Do not blindly trust the URL coming from the server to be valid.
426      GURL url(
427          url_fixer::FixupURL(base::UTF16ToUTF8(suggestion), std::string()));
428      if (url.is_valid() && allow_navsuggest) {
429        base::string16 title;
430        if (descriptions != NULL)
431          descriptions->GetString(index, &title);
432        results->navigation_results.push_back(NavigationResult(
433            scheme_classifier, url, match_type, title, deletion_url,
434            is_keyword_result, relevance, relevances != NULL, input.text(),
435            languages));
436      }
437    } else {
438      base::string16 match_contents = suggestion;
439      base::string16 match_contents_prefix;
440      base::string16 annotation;
441      base::string16 answer_contents;
442      base::string16 answer_type;
443      std::string suggest_query_params;
444
445      if (suggestion_details) {
446        suggestion_details->GetDictionary(index, &suggestion_detail);
447        if (suggestion_detail) {
448          suggestion_detail->GetString("t", &match_contents);
449          suggestion_detail->GetString("mp", &match_contents_prefix);
450          // Error correction for bad data from server.
451          if (match_contents.empty())
452            match_contents = suggestion;
453          suggestion_detail->GetString("a", &annotation);
454          suggestion_detail->GetString("q", &suggest_query_params);
455
456          // Extract Answers, if provided.
457          const base::DictionaryValue* answer_json = NULL;
458          if (suggestion_detail->GetDictionary("ansa", &answer_json)) {
459            match_type = AutocompleteMatchType::SEARCH_SUGGEST_ANSWER;
460            GetAnswersImageURLs(answer_json, &results->answers_image_urls);
461            std::string contents;
462            base::JSONWriter::Write(answer_json, &contents);
463            answer_contents = base::UTF8ToUTF16(contents);
464            suggestion_detail->GetString("ansb", &answer_type);
465          }
466        }
467      }
468
469      bool should_prefetch = static_cast<int>(index) == prefetch_index;
470      // TODO(kochi): Improve calculator suggestion presentation.
471      results->suggest_results.push_back(SuggestResult(
472          base::CollapseWhitespace(suggestion, false), match_type,
473          base::CollapseWhitespace(match_contents, false),
474          match_contents_prefix, annotation, answer_contents, answer_type,
475          suggest_query_params, deletion_url, is_keyword_result, relevance,
476          relevances != NULL, should_prefetch, trimmed_input));
477    }
478  }
479  results->relevances_from_server = relevances != NULL;
480  return true;
481}
482
483// static
484void SearchSuggestionParser::GetAnswersImageURLs(
485    const base::DictionaryValue* answer_json,
486    std::vector<GURL>* urls) {
487  DCHECK(answer_json);
488
489  const base::ListValue* lines = NULL;
490  if (!answer_json->GetList("l", &lines) || !lines || lines->GetSize() == 0)
491    return;
492
493  for (base::ListValue::const_iterator iter = lines->begin();
494       iter != lines->end();
495       ++iter) {
496    const base::DictionaryValue* line = NULL;
497    if (!(*iter)->GetAsDictionary(&line) || !line)
498      continue;
499
500    std::string image_host_and_path;
501    if (!line->GetString("il.i.d", &image_host_and_path) ||
502        image_host_and_path.empty())
503      continue;
504    // Concatenate scheme and host/path using only ':' as separator. This is
505    // due to the results delivering strings of the form '//host/path', which
506    // is web-speak for "use the enclosing page's scheme", but not a valid path
507    // of an URL.
508    GURL image_url(
509        GURL(std::string(url::kHttpsScheme) + ":" + image_host_and_path));
510    if (image_url.is_valid())
511      urls->push_back(image_url);
512  }
513}
514