1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autocomplete/bookmark_provider.h"
6
7#include <algorithm>
8#include <string>
9#include <vector>
10
11#include "base/memory/ref_counted.h"
12#include "base/memory/scoped_ptr.h"
13#include "base/strings/string16.h"
14#include "base/strings/string_number_conversions.h"
15#include "base/strings/string_split.h"
16#include "base/strings/utf_string_conversions.h"
17#include "chrome/browser/autocomplete/chrome_autocomplete_scheme_classifier.h"
18#include "chrome/test/base/testing_profile.h"
19#include "components/bookmarks/browser/bookmark_match.h"
20#include "components/bookmarks/browser/bookmark_model.h"
21#include "components/bookmarks/test/test_bookmark_client.h"
22#include "components/metrics/proto/omnibox_event.pb.h"
23#include "components/omnibox/autocomplete_provider.h"
24#include "testing/gtest/include/gtest/gtest.h"
25
26using bookmarks::BookmarkMatch;
27
28// The bookmark corpus against which we will simulate searches.
29struct BookmarksTestInfo {
30  std::string title;
31  std::string url;
32} bookmark_provider_test_data[] = {
33  { "abc def", "http://www.catsanddogs.com/a" },
34  { "abcde", "http://www.catsanddogs.com/b" },
35  { "abcdef", "http://www.catsanddogs.com/c" },
36  { "carry carbon carefully", "http://www.catsanddogs.com/d" },
37  { "a definition", "http://www.catsanddogs.com/e" },
38  { "ghi jkl", "http://www.catsanddogs.com/f" },
39  { "jkl ghi", "http://www.catsanddogs.com/g" },
40  { "frankly frankly frank", "http://www.catsanddogs.com/h" },
41  { "foobar foobar", "http://www.foobar.com/" },
42  { "domain", "http://www.domain.com/http/" },
43  { "repeat", "http://www.repeat.com/1/repeat/2/" },
44  // For testing inline_autocompletion.
45  { "http://blah.com/", "http://blah.com/" },
46  { "http://fiddle.com/", "http://fiddle.com/" },
47  { "http://www.www.com/", "http://www.www.com/" },
48  { "chrome://version", "chrome://version" },
49  { "chrome://omnibox", "chrome://omnibox" },
50  // For testing ranking with different URLs.
51  { "achlorhydric featherheads resuscitates mockingbirds",
52    "http://www.manylongwords.com/1a" },
53  { "achlorhydric mockingbirds resuscitates featherhead",
54    "http://www.manylongwords.com/2b" },
55  { "featherhead resuscitates achlorhydric mockingbirds",
56    "http://www.manylongwords.com/3c" },
57  { "mockingbirds resuscitates featherheads achlorhydric",
58    "http://www.manylongwords.com/4d" },
59  // For testing URL boosting.  (URLs referenced multiple times are boosted.)
60  { "burning worms #1",  "http://www.burns.com/" },
61  { "burning worms #2",  "http://www.worms.com/" },
62  { "worming burns #10", "http://www.burns.com/" },
63  // For testing strange spacing in bookmark titles.
64  { " hello1  hello2  ", "http://whatever.com/" },
65  { "",                  "http://emptytitle.com/" },
66};
67
68class BookmarkProviderTest : public testing::Test {
69 public:
70  BookmarkProviderTest();
71
72 protected:
73  virtual void SetUp() OVERRIDE;
74
75  bookmarks::TestBookmarkClient client_;
76  scoped_ptr<TestingProfile> profile_;
77  scoped_ptr<BookmarkModel> model_;
78  scoped_refptr<BookmarkProvider> provider_;
79
80 private:
81  DISALLOW_COPY_AND_ASSIGN(BookmarkProviderTest);
82};
83
84BookmarkProviderTest::BookmarkProviderTest() {
85  model_ = client_.CreateModel();
86}
87
88void BookmarkProviderTest::SetUp() {
89  profile_.reset(new TestingProfile());
90  DCHECK(profile_.get());
91  provider_ = new BookmarkProvider(profile_.get());
92  DCHECK(provider_.get());
93  provider_->set_bookmark_model_for_testing(model_.get());
94
95  const BookmarkNode* other_node = model_->other_node();
96  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(bookmark_provider_test_data); ++i) {
97    const BookmarksTestInfo& cur(bookmark_provider_test_data[i]);
98    const GURL url(cur.url);
99    model_->AddURL(other_node, other_node->child_count(),
100                   base::ASCIIToUTF16(cur.title), url);
101  }
102}
103
104// Structures and functions supporting the BookmarkProviderTest.Positions
105// unit test.
106
107struct TestBookmarkPosition {
108  TestBookmarkPosition(size_t begin, size_t end)
109      : begin(begin), end(end) {}
110
111  size_t begin;
112  size_t end;
113};
114typedef std::vector<TestBookmarkPosition> TestBookmarkPositions;
115
116// Return |positions| as a formatted string for unit test diagnostic output.
117std::string TestBookmarkPositionsAsString(
118    const TestBookmarkPositions& positions) {
119  std::string position_string("{");
120  for (TestBookmarkPositions::const_iterator i = positions.begin();
121       i != positions.end(); ++i) {
122    if (i != positions.begin())
123      position_string += ", ";
124    position_string += "{" + base::IntToString(i->begin) + ", " +
125        base::IntToString(i->end) + "}";
126  }
127  position_string += "}\n";
128  return position_string;
129}
130
131// Return the positions in |matches| as a formatted string for unit test
132// diagnostic output.
133base::string16 MatchesAsString16(const ACMatches& matches) {
134  base::string16 matches_string;
135  for (ACMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) {
136    matches_string.append(base::ASCIIToUTF16("    '"));
137    matches_string.append(i->description);
138    matches_string.append(base::ASCIIToUTF16("'\n"));
139  }
140  return matches_string;
141}
142
143// Comparison function for sorting search terms by descending length.
144bool TestBookmarkPositionsEqual(const TestBookmarkPosition& pos_a,
145                                const TestBookmarkPosition& pos_b) {
146  return pos_a.begin == pos_b.begin && pos_a.end == pos_b.end;
147}
148
149// Convience function to make comparing ACMatchClassifications against the
150// test expectations structure easier.
151TestBookmarkPositions PositionsFromAutocompleteMatch(
152    const AutocompleteMatch& match) {
153  TestBookmarkPositions positions;
154  bool started = false;
155  size_t start = 0;
156  for (AutocompleteMatch::ACMatchClassifications::const_iterator
157       i = match.description_class.begin();
158       i != match.description_class.end(); ++i) {
159    if (i->style & AutocompleteMatch::ACMatchClassification::MATCH) {
160      // We have found the start of a match.
161      EXPECT_FALSE(started);
162      started = true;
163      start = i->offset;
164    } else if (started) {
165      // We have found the end of a match.
166      started = false;
167      positions.push_back(TestBookmarkPosition(start, i->offset));
168      start = 0;
169    }
170  }
171  // Record the final position if the last match goes to the end of the
172  // candidate string.
173  if (started)
174    positions.push_back(TestBookmarkPosition(start, match.description.size()));
175  return positions;
176}
177
178// Convience function to make comparing test expectations structure against the
179// actual ACMatchClassifications easier.
180TestBookmarkPositions PositionsFromExpectations(
181    const size_t expectations[9][2]) {
182  TestBookmarkPositions positions;
183  size_t i = 0;
184  // The array is zero-terminated in the [1]th element.
185  while (expectations[i][1]) {
186    positions.push_back(
187        TestBookmarkPosition(expectations[i][0], expectations[i][1]));
188    ++i;
189  }
190  return positions;
191}
192
193TEST_F(BookmarkProviderTest, Positions) {
194  // Simulate searches.
195  // Description of |positions|:
196  //   The first index represents the collection of positions for each expected
197  //   match. The count of the actual subarrays in each instance of |query_data|
198  //   must equal |match_count|. The second index represents each expected
199  //   match position. The third index represents the |start| and |end| of the
200  //   expected match's position within the |test_data|. This array must be
201  //   terminated by an entry with a value of '0' for |end|.
202  // Example:
203  //   Consider the line for 'def' below:
204  //     {"def", 2, {{{4, 7}, {XXX, 0}}, {{2, 5}, {11, 14}, {XXX, 0}}}},
205  //   There are two expected matches:
206  //     0. {{4, 7}, {XXX, 0}}
207  //     1. {{2, 5}, {11 ,14}, {XXX, 0}}
208  //   For the first match, [0], there is one match within the bookmark's title
209  //   expected, {4, 7}, which maps to the 'def' within "abc def". The 'XXX'
210  //   value is ignored. The second match, [1], indicates that two matches are
211  //   expected within the bookmark title "a definite definition". In each case,
212  //   the {XXX, 0} indicates the end of the subarray. Or:
213  //                 Match #1            Match #2
214  //                 ------------------  ----------------------------
215  //                  Pos1    Term        Pos1    Pos2      Term
216  //                  ------  --------    ------  --------  --------
217  //     {"def", 2, {{{4, 7}, {999, 0}}, {{2, 5}, {11, 14}, {999, 0}}}},
218  //
219  struct QueryData {
220    const std::string query;
221    const size_t match_count;  // This count must match the number of major
222                               // elements in the following |positions| array.
223    const size_t positions[99][9][2];
224  } query_data[] = {
225    // This first set is primarily for position detection validation.
226    {"abc",                   3, {{{0, 3}, {0, 0}},
227                                  {{0, 3}, {0, 0}},
228                                  {{0, 3}, {0, 0}}}},
229    {"abcde",                 2, {{{0, 5}, {0, 0}},
230                                  {{0, 5}, {0, 0}}}},
231    {"foo bar",               0, {{{0, 0}}}},
232    {"fooey bark",            0, {{{0, 0}}}},
233    {"def",                   2, {{{2, 5}, {0, 0}},
234                                  {{4, 7}, {0, 0}}}},
235    {"ghi jkl",               2, {{{0, 3}, {4, 7}, {0, 0}},
236                                  {{0, 3}, {4, 7}, {0, 0}}}},
237    // NB: GetBookmarksMatching(...) uses exact match for "a" in title or URL.
238    {"a",                     2, {{{0, 1}, {0, 0}},
239                                  {{0, 0}}}},
240    {"a d",                   0, {{{0, 0}}}},
241    {"carry carbon",          1, {{{0, 5}, {6, 12}, {0, 0}}}},
242    // NB: GetBookmarksMatching(...) sorts the match positions.
243    {"carbon carry",          1, {{{0, 5}, {6, 12}, {0, 0}}}},
244    {"arbon",                 0, {{{0, 0}}}},
245    {"ar",                    0, {{{0, 0}}}},
246    {"arry",                  0, {{{0, 0}}}},
247    // Quoted terms are single terms.
248    {"\"carry carbon\"",      1, {{{0, 12}, {0, 0}}}},
249    {"\"carry carbon\" care", 1, {{{0, 12}, {13, 17}, {0, 0}}}},
250    // Quoted terms require complete word matches.
251    {"\"carry carbo\"",       0, {{{0, 0}}}},
252    // This set uses duplicated and/or overlaps search terms in the title.
253    {"frank",                 1, {{{0, 5}, {8, 13}, {16, 21}, {0, 0}}}},
254    {"frankly",               1, {{{0, 7}, {8, 15}, {0, 0}}}},
255    {"frankly frankly",       1, {{{0, 7}, {8, 15}, {0, 0}}}},
256    {"foobar foo",            1, {{{0, 6}, {7, 13}, {0, 0}}}},
257    {"foo foobar",            1, {{{0, 6}, {7, 13}, {0, 0}}}},
258    // This ensures that leading whitespace in the title is removed.
259    {"hello",                 1, {{{0, 5}, {7, 12}, {0, 0}}}},
260    // This ensures that empty titles yield empty classifications.
261    {"emptytitle",            1, {}},
262  };
263
264  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
265    AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
266                            base::string16::npos, base::string16(), GURL(),
267                            metrics::OmniboxEventProto::INVALID_SPEC, false,
268                            false, false, true,
269                            ChromeAutocompleteSchemeClassifier(profile_.get()));
270    provider_->Start(input, false);
271    const ACMatches& matches(provider_->matches());
272    // Validate number of results is as expected.
273    EXPECT_LE(matches.size(), query_data[i].match_count)
274        << "One or more of the following matches were unexpected:\n"
275        << MatchesAsString16(matches)
276        << "For query '" << query_data[i].query << "'.";
277    EXPECT_GE(matches.size(), query_data[i].match_count)
278        << "One or more expected matches are missing. Matches found:\n"
279        << MatchesAsString16(matches)
280        << "for query '" << query_data[i].query << "'.";
281    // Validate positions within each match is as expected.
282    for (size_t j = 0; j < matches.size(); ++j) {
283      // Collect the expected positions as a vector, collect the match's
284      // classifications for match positions as a vector, then compare.
285      TestBookmarkPositions expected_positions(
286          PositionsFromExpectations(query_data[i].positions[j]));
287      TestBookmarkPositions actual_positions(
288          PositionsFromAutocompleteMatch(matches[j]));
289      EXPECT_TRUE(std::equal(expected_positions.begin(),
290                             expected_positions.end(),
291                             actual_positions.begin(),
292                             TestBookmarkPositionsEqual))
293          << "EXPECTED: " << TestBookmarkPositionsAsString(expected_positions)
294          << "ACTUAL:   " << TestBookmarkPositionsAsString(actual_positions)
295          << "    for query: '" << query_data[i].query << "'.";
296    }
297  }
298}
299
300TEST_F(BookmarkProviderTest, Rankings) {
301  // Simulate searches.
302  struct QueryData {
303    const std::string query;
304    // |match_count| must match the number of elements in the following
305    // |matches| array.
306    const size_t match_count;
307    // |matches| specifies the titles for all bookmarks expected to be matched
308    // by the |query|
309    const std::string matches[3];
310  } query_data[] = {
311    // Basic ranking test.
312    {"abc",       3, {"abcde",      // Most complete match.
313                      "abcdef",
314                      "abc def"}},  // Least complete match.
315    {"ghi",       2, {"ghi jkl",    // Matched earlier.
316                      "jkl ghi",    // Matched later.
317                      ""}},
318    // Rankings of exact-word matches with different URLs.
319    {"achlorhydric",
320                  3, {"achlorhydric mockingbirds resuscitates featherhead",
321                      "achlorhydric featherheads resuscitates mockingbirds",
322                      "featherhead resuscitates achlorhydric mockingbirds"}},
323    {"achlorhydric featherheads",
324                  2, {"achlorhydric featherheads resuscitates mockingbirds",
325                      "mockingbirds resuscitates featherheads achlorhydric",
326                      ""}},
327    {"mockingbirds resuscitates",
328                  3, {"mockingbirds resuscitates featherheads achlorhydric",
329                      "achlorhydric mockingbirds resuscitates featherhead",
330                      "featherhead resuscitates achlorhydric mockingbirds"}},
331    // Ranking of exact-word matches with URL boosts.
332    {"worms",     2, {"burning worms #1",    // boosted
333                      "burning worms #2",    // not boosted
334                      ""}},
335    // Ranking of prefix matches with URL boost.
336    {"burn worm", 3, {"burning worms #1",    // boosted
337                      "worming burns #10",   // boosted but longer title
338                      "burning worms #2"}},  // not boosted
339    // A query of "worm burn" will have the same results.
340    {"worm burn", 3, {"burning worms #1",    // boosted
341                      "worming burns #10",   // boosted but longer title
342                      "burning worms #2"}},  // not boosted
343  };
344
345  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
346    AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
347                            base::string16::npos, base::string16(), GURL(),
348                            metrics::OmniboxEventProto::INVALID_SPEC, false,
349                            false, false, true,
350                            ChromeAutocompleteSchemeClassifier(profile_.get()));
351    provider_->Start(input, false);
352    const ACMatches& matches(provider_->matches());
353    // Validate number and content of results is as expected.
354    for (size_t j = 0; j < std::max(query_data[i].match_count, matches.size());
355         ++j) {
356      EXPECT_LT(j, query_data[i].match_count) << "    Unexpected match '"
357          << base::UTF16ToUTF8(matches[j].description) << "' for query: '"
358          <<  query_data[i].query << "'.";
359      if (j >= query_data[i].match_count)
360        continue;
361      EXPECT_LT(j, matches.size()) << "    Missing match '"
362          << query_data[i].matches[j] << "' for query: '"
363          << query_data[i].query << "'.";
364      if (j >= matches.size())
365        continue;
366      EXPECT_EQ(query_data[i].matches[j],
367                base::UTF16ToUTF8(matches[j].description))
368          << "    Mismatch at [" << base::IntToString(j) << "] for query '"
369          << query_data[i].query << "'.";
370    }
371  }
372}
373
374TEST_F(BookmarkProviderTest, InlineAutocompletion) {
375  // Simulate searches.
376  struct QueryData {
377    const std::string query;
378    const std::string url;
379    const bool allowed_to_be_default_match;
380    const std::string inline_autocompletion;
381  } query_data[] = {
382    { "bla", "http://blah.com/", true, "h.com" },
383    { "blah ", "http://blah.com/", false, ".com" },
384    { "http://bl", "http://blah.com/", true, "ah.com" },
385    { "fiddle.c", "http://fiddle.com/", true, "om" },
386    { "www", "http://www.www.com/", true, ".com" },
387    { "chro", "chrome://version", true, "me://version" },
388    { "chrome://ve", "chrome://version", true, "rsion" },
389    { "chrome ver", "chrome://version", false, "" },
390    { "versi", "chrome://version", false, "" },
391    { "abou", "chrome://omnibox", false, "" },
392    { "about:om", "chrome://omnibox", true, "nibox" }
393    // Note: when adding a new URL to this test, be sure to add it to the list
394    // of bookmarks at the top of the file as well.  All items in this list
395    // need to be in the bookmarks list because BookmarkProvider's
396    // TitleMatchToACMatch() has an assertion that verifies the URL is
397    // actually bookmarked.
398  };
399
400  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
401    const std::string description = "for query=" + query_data[i].query +
402        " and url=" + query_data[i].url;
403    AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
404                            base::string16::npos, base::string16(), GURL(),
405                            metrics::OmniboxEventProto::INVALID_SPEC, false,
406                            false, false, true,
407                            ChromeAutocompleteSchemeClassifier(profile_.get()));
408    const base::string16 fixed_up_input(
409        provider_->FixupUserInput(input).second);
410    BookmarkNode node(GURL(query_data[i].url));
411    node.SetTitle(base::ASCIIToUTF16(query_data[i].url));
412    BookmarkMatch bookmark_match;
413    bookmark_match.node = &node;
414    const AutocompleteMatch& ac_match = provider_->BookmarkMatchToACMatch(
415        input, fixed_up_input, bookmark_match);
416    EXPECT_EQ(query_data[i].allowed_to_be_default_match,
417              ac_match.allowed_to_be_default_match) << description;
418    EXPECT_EQ(base::ASCIIToUTF16(query_data[i].inline_autocompletion),
419              ac_match.inline_autocompletion) << description;
420  }
421}
422
423TEST_F(BookmarkProviderTest, StripHttpAndAdjustOffsets) {
424  // Simulate searches.
425  struct QueryData {
426    const std::string query;
427    const std::string expected_contents;
428    // |expected_contents_class| is in format offset:style,offset:style,...
429    const std::string expected_contents_class;
430  } query_data[] = {
431    { "foo",       "www.foobar.com",             "0:1,4:3,7:1"           },
432    { "www foo",   "www.foobar.com",             "0:3,3:1,4:3,7:1"       },
433    { "foo www",   "www.foobar.com",             "0:3,3:1,4:3,7:1"       },
434    { "foo http",  "http://www.foobar.com",      "0:3,4:1,11:3,14:1"     },
435    { "blah",      "blah.com",                   "0:3,4:1"               },
436    { "http blah", "http://blah.com",            "0:3,4:1,7:3,11:1"      },
437    { "dom",       "www.domain.com/http/",       "0:1,4:3,7:1"           },
438    { "dom http",  "http://www.domain.com/http/",
439      "0:3,4:1,11:3,14:1,22:3,26:1"                                      },
440    { "rep",       "www.repeat.com/1/repeat/2/", "0:1,4:3,7:1,17:3,20:1" },
441    { "versi",     "chrome://version",           "0:1,9:3,14:1"          }
442  };
443
444  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
445    std::string description = "for query=" + query_data[i].query;
446    AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
447                            base::string16::npos, base::string16(), GURL(),
448                            metrics::OmniboxEventProto::INVALID_SPEC, false,
449                            false, false, true,
450                            ChromeAutocompleteSchemeClassifier(profile_.get()));
451    provider_->Start(input, false);
452    const ACMatches& matches(provider_->matches());
453    ASSERT_EQ(1U, matches.size()) << description;
454    const AutocompleteMatch& match = matches[0];
455    EXPECT_EQ(base::ASCIIToUTF16(query_data[i].expected_contents),
456              match.contents) << description;
457    std::vector<std::string> class_strings;
458    base::SplitString(
459        query_data[i].expected_contents_class, ',', &class_strings);
460    ASSERT_EQ(class_strings.size(), match.contents_class.size())
461        << description;
462    for (size_t i = 0; i < class_strings.size(); ++i) {
463      std::vector<std::string> chunks;
464      base::SplitString(class_strings[i], ':', &chunks);
465      ASSERT_EQ(2U, chunks.size()) << description;
466      size_t offset;
467      EXPECT_TRUE(base::StringToSizeT(chunks[0], &offset)) << description;
468      EXPECT_EQ(offset, match.contents_class[i].offset) << description;
469      int style;
470      EXPECT_TRUE(base::StringToInt(chunks[1], &style)) << description;
471      EXPECT_EQ(style, match.contents_class[i].style) << description;
472    }
473  }
474}
475