bookmark_provider_unittest.cc revision 0529e5d033099cbfc42635f6f6183833b09dff6e
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autocomplete/bookmark_provider.h"
6
7#include <algorithm>
8#include <string>
9#include <vector>
10
11#include "base/memory/ref_counted.h"
12#include "base/memory/scoped_ptr.h"
13#include "base/strings/string16.h"
14#include "base/strings/string_number_conversions.h"
15#include "base/strings/string_split.h"
16#include "base/strings/utf_string_conversions.h"
17#include "chrome/browser/autocomplete/autocomplete_provider.h"
18#include "chrome/browser/autocomplete/autocomplete_provider_listener.h"
19#include "chrome/browser/bookmarks/bookmark_model.h"
20#include "chrome/browser/bookmarks/bookmark_model_factory.h"
21#include "chrome/test/base/testing_profile.h"
22#include "components/bookmarks/core/browser/bookmark_match.h"
23#include "testing/gtest/include/gtest/gtest.h"
24
25// The bookmark corpus against which we will simulate searches.
26struct BookmarksTestInfo {
27  std::string title;
28  std::string url;
29} bookmark_provider_test_data[] = {
30  { "abc def", "http://www.catsanddogs.com/a" },
31  { "abcde", "http://www.catsanddogs.com/b" },
32  { "abcdef", "http://www.catsanddogs.com/c" },
33  { "a definition", "http://www.catsanddogs.com/d" },
34  { "carry carbon carefully", "http://www.catsanddogs.com/e" },
35  { "ghi jkl", "http://www.catsanddogs.com/f" },
36  { "jkl ghi", "http://www.catsanddogs.com/g" },
37  { "frankly frankly frank", "http://www.catsanddogs.com/h" },
38  { "foobar foobar", "http://www.foobar.com/" },
39  { "domain", "http://www.domain.com/http/" },
40  { "repeat", "http://www.repeat.com/1/repeat/2/" },
41  // For testing inline_autocompletion.
42  { "http://blah.com/", "http://blah.com/" },
43  { "http://fiddle.com/", "http://fiddle.com/" },
44  { "http://www.www.com/", "http://www.www.com/" },
45  { "chrome://version", "chrome://version" },
46  { "chrome://omnibox", "chrome://omnibox" },
47  // For testing ranking with different URLs.
48  {"achlorhydric featherheads resuscitates mockingbirds",
49   "http://www.featherheads.com/a" },
50  {"achlorhydric mockingbirds resuscitates featherhead",
51   "http://www.featherheads.com/b" },
52  {"featherhead resuscitates achlorhydric mockingbirds",
53   "http://www.featherheads.com/c" },
54  {"mockingbirds resuscitates featherheads achlorhydric",
55   "http://www.featherheads.com/d" },
56  // For testing URL boosting.
57  {"burning worms #1", "http://www.burned.com/" },
58  {"burning worms #2", "http://www.worms.com/" },
59  {"worming burns #10", "http://www.burned.com/" },
60  {"worming burns #20", "http://www.worms.com/" },
61  {"jive music", "http://www.worms.com/" },
62};
63
64class BookmarkProviderTest : public testing::Test,
65                             public AutocompleteProviderListener {
66 public:
67  BookmarkProviderTest();
68
69  // AutocompleteProviderListener: Not called.
70  virtual void OnProviderUpdate(bool updated_matches) OVERRIDE {}
71
72 protected:
73  virtual void SetUp() OVERRIDE;
74
75  scoped_ptr<TestingProfile> profile_;
76  scoped_ptr<BookmarkModel> model_;
77  scoped_refptr<BookmarkProvider> provider_;
78
79 private:
80  DISALLOW_COPY_AND_ASSIGN(BookmarkProviderTest);
81};
82
83BookmarkProviderTest::BookmarkProviderTest() {
84  model_.reset(new BookmarkModel(NULL, false));
85}
86
87void BookmarkProviderTest::SetUp() {
88  profile_.reset(new TestingProfile());
89  DCHECK(profile_.get());
90  provider_ = new BookmarkProvider(this, profile_.get());
91  DCHECK(provider_.get());
92  provider_->set_bookmark_model_for_testing(model_.get());
93
94  const BookmarkNode* other_node = model_->other_node();
95  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(bookmark_provider_test_data); ++i) {
96    const BookmarksTestInfo& cur(bookmark_provider_test_data[i]);
97    const GURL url(cur.url);
98    model_->AddURL(other_node, other_node->child_count(),
99                   base::ASCIIToUTF16(cur.title), url);
100  }
101}
102
103// Structures and functions supporting the BookmarkProviderTest.Positions
104// unit test.
105
106struct TestBookmarkPosition {
107  TestBookmarkPosition(size_t begin, size_t end)
108      : begin(begin), end(end) {}
109
110  size_t begin;
111  size_t end;
112};
113typedef std::vector<TestBookmarkPosition> TestBookmarkPositions;
114
115// Return |positions| as a formatted string for unit test diagnostic output.
116std::string TestBookmarkPositionsAsString(
117    const TestBookmarkPositions& positions) {
118  std::string position_string("{");
119  for (TestBookmarkPositions::const_iterator i = positions.begin();
120       i != positions.end(); ++i) {
121    if (i != positions.begin())
122      position_string += ", ";
123    position_string += "{" + base::IntToString(i->begin) + ", " +
124        base::IntToString(i->end) + "}";
125  }
126  position_string += "}\n";
127  return position_string;
128}
129
130// Return the positions in |matches| as a formatted string for unit test
131// diagnostic output.
132base::string16 MatchesAsString16(const ACMatches& matches) {
133  base::string16 matches_string;
134  for (ACMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) {
135    matches_string.append(base::ASCIIToUTF16("    '"));
136    matches_string.append(i->description);
137    matches_string.append(base::ASCIIToUTF16("'\n"));
138  }
139  return matches_string;
140}
141
142// Comparison function for sorting search terms by descending length.
143bool TestBookmarkPositionsEqual(const TestBookmarkPosition& pos_a,
144                                const TestBookmarkPosition& pos_b) {
145  return pos_a.begin == pos_b.begin && pos_a.end == pos_b.end;
146}
147
148// Convience function to make comparing ACMatchClassifications against the
149// test expectations structure easier.
150TestBookmarkPositions PositionsFromAutocompleteMatch(
151    const AutocompleteMatch& match) {
152  TestBookmarkPositions positions;
153  bool started = false;
154  size_t start = 0;
155  for (AutocompleteMatch::ACMatchClassifications::const_iterator
156       i = match.description_class.begin();
157       i != match.description_class.end(); ++i) {
158    if (i->style & AutocompleteMatch::ACMatchClassification::MATCH) {
159      // We have found the start of a match.
160      EXPECT_FALSE(started);
161      started = true;
162      start = i->offset;
163    } else if (started) {
164      // We have found the end of a match.
165      started = false;
166      positions.push_back(TestBookmarkPosition(start, i->offset));
167      start = 0;
168    }
169  }
170  // Record the final position if the last match goes to the end of the
171  // candidate string.
172  if (started)
173    positions.push_back(TestBookmarkPosition(start, match.description.size()));
174  return positions;
175}
176
177// Convience function to make comparing test expectations structure against the
178// actual ACMatchClassifications easier.
179TestBookmarkPositions PositionsFromExpectations(
180    const size_t expectations[9][2]) {
181  TestBookmarkPositions positions;
182  size_t i = 0;
183  // The array is zero-terminated in the [1]th element.
184  while (expectations[i][1]) {
185    positions.push_back(
186        TestBookmarkPosition(expectations[i][0], expectations[i][1]));
187    ++i;
188  }
189  return positions;
190}
191
192TEST_F(BookmarkProviderTest, Positions) {
193  // Simulate searches.
194  // Description of |positions|:
195  //   The first index represents the collection of positions for each expected
196  //   match. The count of the actual subarrays in each instance of |query_data|
197  //   must equal |match_count|. The second index represents each expected
198  //   match position. The third index represents the |start| and |end| of the
199  //   expected match's position within the |test_data|. This array must be
200  //   terminated by an entry with a value of '0' for |end|.
201  // Example:
202  //   Consider the line for 'def' below:
203  //     {"def", 2, {{{4, 7}, {XXX, 0}}, {{2, 5}, {11, 14}, {XXX, 0}}}},
204  //   There are two expected matches:
205  //     0. {{4, 7}, {XXX, 0}}
206  //     1. {{2, 5}, {11 ,14}, {XXX, 0}}
207  //   For the first match, [0], there is one match within the bookmark's title
208  //   expected, {4, 7}, which maps to the 'def' within "abc def". The 'XXX'
209  //   value is ignored. The second match, [1], indicates that two matches are
210  //   expected within the bookmark title "a definite definition". In each case,
211  //   the {XXX, 0} indicates the end of the subarray. Or:
212  //                 Match #1            Match #2
213  //                 ------------------  ----------------------------
214  //                  Pos1    Term        Pos1    Pos2      Term
215  //                  ------  --------    ------  --------  --------
216  //     {"def", 2, {{{4, 7}, {999, 0}}, {{2, 5}, {11, 14}, {999, 0}}}},
217  //
218  struct QueryData {
219    const std::string query;
220    const size_t match_count;  // This count must match the number of major
221                               // elements in the following |positions| array.
222    const size_t positions[99][9][2];
223  } query_data[] = {
224    // This first set is primarily for position detection validation.
225    {"abc",                   3, {{{0, 3}, {0, 0}},
226                                  {{0, 3}, {0, 0}},
227                                  {{0, 3}, {0, 0}}}},
228    {"abcde",                 2, {{{0, 5}, {0, 0}},
229                                  {{0, 5}, {0, 0}}}},
230    {"foo bar",               0, {{{0, 0}}}},
231    {"fooey bark",            0, {{{0, 0}}}},
232    {"def",                   2, {{{2, 5}, {0, 0}},
233                                  {{4, 7}, {0, 0}}}},
234    {"ghi jkl",               2, {{{0, 3}, {4, 7}, {0, 0}},
235                                  {{0, 3}, {4, 7}, {0, 0}}}},
236    // NB: GetBookmarksWithTitlesMatching(...) uses exact match for "a".
237    {"a",                     1, {{{0, 1}, {0, 0}}}},
238    {"a d",                   0, {{{0, 0}}}},
239    {"carry carbon",          1, {{{0, 5}, {6, 12}, {0, 0}}}},
240    // NB: GetBookmarksWithTitlesMatching(...) sorts the match positions.
241    {"carbon carry",          1, {{{0, 5}, {6, 12}, {0, 0}}}},
242    {"arbon",                 0, {{{0, 0}}}},
243    {"ar",                    0, {{{0, 0}}}},
244    {"arry",                  0, {{{0, 0}}}},
245    // Quoted terms are single terms.
246    {"\"carry carbon\"",      1, {{{0, 12}, {0, 0}}}},
247    {"\"carry carbon\" care", 1, {{{0, 12}, {13, 17}, {0, 0}}}},
248    // Quoted terms require complete word matches.
249    {"\"carry carbo\"",       0, {{{0, 0}}}},
250    // This set uses duplicated and/or overlaps search terms in the title.
251    {"frank",                 1, {{{0, 5}, {8, 13}, {16, 21}, {0, 0}}}},
252    {"frankly",               1, {{{0, 7}, {8, 15}, {0, 0}}}},
253    {"frankly frankly",       1, {{{0, 7}, {8, 15}, {0, 0}}}},
254    {"foobar foo",            1, {{{0, 6}, {7, 13}, {0, 0}}}},
255    {"foo foobar",            1, {{{0, 6}, {7, 13}, {0, 0}}}},
256  };
257
258  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
259    AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
260                            base::string16::npos, base::string16(), GURL(),
261                            AutocompleteInput::INVALID_SPEC, false, false,
262                            false, true);
263    provider_->Start(input, false);
264    const ACMatches& matches(provider_->matches());
265    // Validate number of results is as expected.
266    EXPECT_LE(matches.size(), query_data[i].match_count)
267        << "One or more of the following matches were unexpected:\n"
268        << MatchesAsString16(matches)
269        << "For query '" << query_data[i].query << "'.";
270    EXPECT_GE(matches.size(), query_data[i].match_count)
271        << "One or more expected matches are missing. Matches found:\n"
272        << MatchesAsString16(matches)
273        << "for query '" << query_data[i].query << "'.";
274    // Validate positions within each match is as expected.
275    for (size_t j = 0; j < matches.size(); ++j) {
276      // Collect the expected positions as a vector, collect the match's
277      // classifications for match positions as a vector, then compare.
278      TestBookmarkPositions expected_positions(
279          PositionsFromExpectations(query_data[i].positions[j]));
280      TestBookmarkPositions actual_positions(
281          PositionsFromAutocompleteMatch(matches[j]));
282      EXPECT_TRUE(std::equal(expected_positions.begin(),
283                             expected_positions.end(),
284                             actual_positions.begin(),
285                             TestBookmarkPositionsEqual))
286          << "EXPECTED: " << TestBookmarkPositionsAsString(expected_positions)
287          << "ACTUAL:   " << TestBookmarkPositionsAsString(actual_positions)
288          << "    for query: '" << query_data[i].query << "'.";
289    }
290  }
291}
292
293TEST_F(BookmarkProviderTest, Rankings) {
294  // Simulate searches.
295  struct QueryData {
296    const std::string query;
297    // |match_count| must match the number of elements in the following
298    // |matches| array.
299    const size_t match_count;
300    // |matches| specifies the titles for all bookmarks expected to be matched
301    // by the |query|
302    const std::string matches[99];
303  } query_data[] = {
304    // Basic ranking test.
305    {"abc",       3, {"abcde",      // Most complete match.
306                      "abcdef",
307                      "abc def"}},  // Least complete match.
308    {"ghi",       2, {"ghi jkl",    // Matched earlier.
309                      "jkl ghi"}},  // Matched later.
310    // Rankings of exact-word matches with different URLs.
311    {"achlorhydric",
312                  3, {"achlorhydric mockingbirds resuscitates featherhead",
313                      "achlorhydric featherheads resuscitates mockingbirds",
314                      "featherhead resuscitates achlorhydric mockingbirds"}},
315    {"achlorhydric featherheads",
316                  2, {"achlorhydric featherheads resuscitates mockingbirds",
317                      "mockingbirds resuscitates featherheads achlorhydric"}},
318    {"mockingbirds resuscitates",
319                  3, {"mockingbirds resuscitates featherheads achlorhydric",
320                      "achlorhydric mockingbirds resuscitates featherhead",
321                      "featherhead resuscitates achlorhydric mockingbirds"}},
322    // Ranking of exact-word matches with URL boost.
323    {"worms",     2, {"burning worms #2",    // boosted
324                      "burning worms #1"}},  // not boosted
325    // Ranking of prefix matches with URL boost. Note that a query of
326    // "worm burn" will have the same results.
327    {"burn worm", 3, {"burning worms #2",    // boosted
328                      "worming burns #20",   // boosted
329                      "burning worms #1"}},  // not boosted but shorter
330  };
331
332  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
333    AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
334                            base::string16::npos, base::string16(), GURL(),
335                            AutocompleteInput::INVALID_SPEC, false, false,
336                            false, true);
337    provider_->Start(input, false);
338    const ACMatches& matches(provider_->matches());
339    // Validate number and content of results is as expected.
340    for (size_t j = 0; j < std::max(query_data[i].match_count, matches.size());
341         ++j) {
342      EXPECT_LT(j, query_data[i].match_count) << "    Unexpected match '"
343          << base::UTF16ToUTF8(matches[j].description) << "' for query: '"
344          <<  query_data[i].query << "'.";
345      if (j >= query_data[i].match_count)
346        continue;
347      EXPECT_LT(j, matches.size()) << "    Missing match '"
348          << query_data[i].matches[j] << "' for query: '"
349          << query_data[i].query << "'.";
350      if (j >= matches.size())
351        continue;
352      EXPECT_EQ(query_data[i].matches[j],
353                base::UTF16ToUTF8(matches[j].description))
354          << "    Mismatch at [" << base::IntToString(j) << "] for query '"
355          << query_data[i].query << "'.";
356    }
357  }
358}
359
360TEST_F(BookmarkProviderTest, InlineAutocompletion) {
361  // Simulate searches.
362  struct QueryData {
363    const std::string query;
364    const std::string url;
365    const bool allowed_to_be_default_match;
366    const std::string inline_autocompletion;
367  } query_data[] = {
368    { "bla", "http://blah.com/", true, "h.com" },
369    { "blah ", "http://blah.com/", false, ".com" },
370    { "http://bl", "http://blah.com/", true, "ah.com" },
371    { "fiddle.c", "http://fiddle.com/", true, "om" },
372    { "www", "http://www.www.com/", true, ".com" },
373    { "chro", "chrome://version", true, "me://version" },
374    { "chrome://ve", "chrome://version", true, "rsion" },
375    { "chrome ver", "chrome://version", false, "" },
376    { "versi", "chrome://version", false, "" },
377    { "abou", "chrome://omnibox", false, "" },
378    { "about:om", "chrome://omnibox", true, "nibox" }
379    // Note: when adding a new URL to this test, be sure to add it to the list
380    // of bookmarks at the top of the file as well.  All items in this list
381    // need to be in the bookmarks list because BookmarkProvider's
382    // TitleMatchToACMatch() has an assertion that verifies the URL is
383    // actually bookmarked.
384  };
385
386  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
387    const std::string description = "for query=" + query_data[i].query +
388        " and url=" + query_data[i].url;
389    AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
390                            base::string16::npos, base::string16(), GURL(),
391                            AutocompleteInput::INVALID_SPEC, false, false,
392                            false, true);
393    AutocompleteInput fixed_up_input(input);
394    provider_->FixupUserInput(&fixed_up_input);
395    BookmarkNode node(GURL(query_data[i].url));
396    node.SetTitle(base::ASCIIToUTF16(query_data[i].url));
397    BookmarkMatch bookmark_match;
398    bookmark_match.node = &node;
399    const AutocompleteMatch& ac_match = provider_->BookmarkMatchToACMatch(
400        input, fixed_up_input, bookmark_match);
401    EXPECT_EQ(query_data[i].allowed_to_be_default_match,
402              ac_match.allowed_to_be_default_match) << description;
403    EXPECT_EQ(base::ASCIIToUTF16(query_data[i].inline_autocompletion),
404              ac_match.inline_autocompletion) << description;
405  }
406}
407
408TEST_F(BookmarkProviderTest, StripHttpAndAdjustOffsets) {
409  // Simulate searches.
410  struct QueryData {
411    const std::string query;
412    const std::string expected_contents;
413    // |expected_contents_class| is in format offset:style,offset:style,...
414    const std::string expected_contents_class;
415  } query_data[] = {
416    { "foo",       "www.foobar.com",             "0:1,4:3,7:1"           },
417    { "www foo",   "www.foobar.com",             "0:3,3:1,4:3,7:1"       },
418    { "foo www",   "www.foobar.com",             "0:3,3:1,4:3,7:1"       },
419    { "foo http",  "http://www.foobar.com",      "0:3,4:1,11:3,14:1"     },
420    { "blah",      "blah.com",                   "0:3,4:1"               },
421    { "http blah", "http://blah.com",            "0:3,4:1,7:3,11:1"      },
422    { "dom",       "www.domain.com/http/",       "0:1,4:3,7:1"           },
423    { "dom http",  "http://www.domain.com/http/",
424      "0:3,4:1,11:3,14:1,22:3,26:1"                                      },
425    { "rep",       "www.repeat.com/1/repeat/2/", "0:1,4:3,7:1,17:3,20:1" },
426    { "versi",     "chrome://version",           "0:1,9:3,14:1"          }
427  };
428
429  // Reload the bookmarks index with |index_urls| == true.
430  model_.reset(new BookmarkModel(NULL, true));
431  SetUp();
432
433  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
434    std::string description = "for query=" + query_data[i].query;
435    AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
436                            base::string16::npos, base::string16(), GURL(),
437                            AutocompleteInput::INVALID_SPEC, false, false,
438                            false, true);
439    provider_->Start(input, false);
440    const ACMatches& matches(provider_->matches());
441    ASSERT_EQ(1U, matches.size()) << description;
442    const AutocompleteMatch& match = matches[0];
443    EXPECT_EQ(base::ASCIIToUTF16(query_data[i].expected_contents),
444              match.contents) << description;
445    std::vector<std::string> class_strings;
446    base::SplitString(
447        query_data[i].expected_contents_class, ',', &class_strings);
448    ASSERT_EQ(class_strings.size(), match.contents_class.size())
449        << description;
450    for (size_t i = 0; i < class_strings.size(); ++i) {
451      std::vector<std::string> chunks;
452      base::SplitString(class_strings[i], ':', &chunks);
453      ASSERT_EQ(2U, chunks.size()) << description;
454      size_t offset;
455      EXPECT_TRUE(base::StringToSizeT(chunks[0], &offset)) << description;
456      EXPECT_EQ(offset, match.contents_class[i].offset) << description;
457      int style;
458      EXPECT_TRUE(base::StringToInt(chunks[1], &style)) << description;
459      EXPECT_EQ(style, match.contents_class[i].style) << description;
460    }
461  }
462}
463