bookmark_provider_unittest.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autocomplete/bookmark_provider.h"
6
7#include <algorithm>
8#include <string>
9#include <vector>
10
11#include "base/memory/ref_counted.h"
12#include "base/memory/scoped_ptr.h"
13#include "base/string16.h"
14#include "base/string_number_conversions.h"
15#include "base/utf_string_conversions.h"
16#include "chrome/browser/autocomplete/autocomplete_provider.h"
17#include "chrome/browser/autocomplete/autocomplete_provider_listener.h"
18#include "chrome/browser/bookmarks/bookmark_model.h"
19#include "chrome/browser/bookmarks/bookmark_model_factory.h"
20#include "chrome/test/base/testing_profile.h"
21#include "testing/gtest/include/gtest/gtest.h"
22
23// The bookmark corpus against which we will simulate searches.
24struct BookmarksTestInfo {
25  std::string title;
26  std::string url;
27} bookmark_provider_test_data[] = {
28  { "abc def", "http://www.catsanddogs.com/a" },
29  { "abcde", "http://www.catsanddogs.com/b" },
30  { "abcdef", "http://www.catsanddogs.com/c" },
31  { "a definition", "http://www.catsanddogs.com/d" },
32  { "carry carbon carefully", "http://www.catsanddogs.com/e" },
33  { "ghi jkl", "http://www.catsanddogs.com/f" },
34  { "jkl ghi", "http://www.catsanddogs.com/g" },
35  { "frankly frankly frank", "http://www.catsanddogs.com/h" },
36  { "foobar foobar", "http://www.foobar.com/" },
37  // For testing ranking with different URLs.
38  {"achlorhydric featherheads resuscitates mockingbirds",
39   "http://www.featherheads.com/a" },
40  {"achlorhydric mockingbirds resuscitates featherhead",
41   "http://www.featherheads.com/b" },
42  {"featherhead resuscitates achlorhydric mockingbirds",
43   "http://www.featherheads.com/c" },
44  {"mockingbirds resuscitates featherheads achlorhydric",
45   "http://www.featherheads.com/d" },
46  // For testing URL boosting.
47  {"burning worms #1", "http://www.burned.com/" },
48  {"burning worms #2", "http://www.worms.com/" },
49  {"worming burns #10", "http://www.burned.com/" },
50  {"worming burns #20", "http://www.worms.com/" },
51  {"jive music", "http://www.worms.com/" },
52};
53
54class BookmarkProviderTest : public testing::Test,
55                             public AutocompleteProviderListener {
56 public:
57  BookmarkProviderTest() : model_(new BookmarkModel(NULL)) {}
58
59  // AutocompleteProviderListener: Not called.
60  virtual void OnProviderUpdate(bool updated_matches) OVERRIDE {}
61
62 protected:
63  virtual void SetUp() OVERRIDE;
64
65  scoped_ptr<TestingProfile> profile_;
66  scoped_ptr<BookmarkModel> model_;
67  scoped_refptr<BookmarkProvider> provider_;
68
69 private:
70  DISALLOW_COPY_AND_ASSIGN(BookmarkProviderTest);
71};
72
73void BookmarkProviderTest::SetUp() {
74  profile_.reset(new TestingProfile());
75  DCHECK(profile_.get());
76  provider_ = new BookmarkProvider(this, profile_.get());
77  DCHECK(provider_);
78  provider_->set_bookmark_model_for_testing(model_.get());
79
80  const BookmarkNode* other_node = model_->other_node();
81  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(bookmark_provider_test_data); ++i) {
82    const BookmarksTestInfo& cur(bookmark_provider_test_data[i]);
83    const GURL url(cur.url);
84    model_->AddURL(other_node, other_node->child_count(),
85                   ASCIIToUTF16(cur.title), url);
86  }
87}
88
89// Structures and functions supporting the BookmarkProviderTest.Positions
90// unit test.
91
92struct TestBookmarkPosition {
93  TestBookmarkPosition(size_t begin, size_t end)
94      : begin(begin), end(end) {}
95
96  size_t begin;
97  size_t end;
98};
99typedef std::vector<TestBookmarkPosition> TestBookmarkPositions;
100
101// Return |positions| as a formatted string for unit test diagnostic output.
102std::string TestBookmarkPositionsAsString(
103    const TestBookmarkPositions& positions) {
104  std::string position_string("{");
105  for (TestBookmarkPositions::const_iterator i = positions.begin();
106       i != positions.end(); ++i) {
107    if (i != positions.begin())
108      position_string += ", ";
109    position_string += "{" + base::IntToString(i->begin) + ", " +
110        base::IntToString(i->end) + "}";
111  }
112  position_string += "}\n";
113  return position_string;
114}
115
116// Return the positions in |matches| as a formatted string for unit test
117// diagnostic output.
118string16 MatchesAsString16(const ACMatches& matches) {
119  string16 matches_string;
120  for (ACMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) {
121    matches_string.append(ASCIIToUTF16("    '"));
122    matches_string.append(i->description);
123    matches_string.append(ASCIIToUTF16("'\n"));
124  }
125  return matches_string;
126}
127
128// Comparison function for sorting search terms by descending length.
129bool TestBookmarkPositionsEqual(const TestBookmarkPosition& pos_a,
130                                const TestBookmarkPosition& pos_b) {
131  return pos_a.begin == pos_b.begin && pos_a.end == pos_b.end;
132}
133
134// Convience function to make comparing ACMatchClassifications against the
135// test expectations structure easier.
136TestBookmarkPositions PositionsFromAutocompleteMatch(
137    const AutocompleteMatch& match) {
138  TestBookmarkPositions positions;
139  bool started = false;
140  size_t start = 0;
141  for (AutocompleteMatch::ACMatchClassifications::const_iterator
142       i = match.description_class.begin();
143       i != match.description_class.end(); ++i) {
144    if (i->style & AutocompleteMatch::ACMatchClassification::MATCH) {
145      // We have found the start of a match.
146      EXPECT_FALSE(started);
147      started = true;
148      start = i->offset;
149    } else if (started) {
150      // We have found the end of a match.
151      started = false;
152      positions.push_back(TestBookmarkPosition(start, i->offset));
153      start = 0;
154    }
155  }
156  // Record the final position if the last match goes to the end of the
157  // candidate string.
158  if (started)
159    positions.push_back(TestBookmarkPosition(start, match.description.size()));
160  return positions;
161}
162
163// Convience function to make comparing test expectations structure against the
164// actual ACMatchClassifications easier.
165TestBookmarkPositions PositionsFromExpectations(
166    const size_t expectations[9][2]) {
167  TestBookmarkPositions positions;
168  size_t i = 0;
169  // The array is zero-terminated in the [1]th element.
170  while (expectations[i][1]) {
171    positions.push_back(
172        TestBookmarkPosition(expectations[i][0], expectations[i][1]));
173    ++i;
174  }
175  return positions;
176}
177
178TEST_F(BookmarkProviderTest, Positions) {
179  // Simulate searches.
180  // Description of |positions|:
181  //   The first index represents the collection of positions for each expected
182  //   match. The count of the actual subarrays in each instance of |query_data|
183  //   must equal |match_count|. The second index represents each expected
184  //   match position. The third index represents the |start| and |end| of the
185  //   expected match's position within the |test_data|. This array must be
186  //   terminated by an entry with a value of '0' for |end|.
187  // Example:
188  //   Consider the line for 'def' below:
189  //     {"def", 2, {{{4, 7}, {XXX, 0}}, {{2, 5}, {11, 14}, {XXX, 0}}}},
190  //   There are two expected matches:
191  //     0. {{4, 7}, {XXX, 0}}
192  //     1. {{2, 5}, {11 ,14}, {XXX, 0}}
193  //   For the first match, [0], there is one match within the bookmark's title
194  //   expected, {4, 7}, which maps to the 'def' within "abc def". The 'XXX'
195  //   value is ignored. The second match, [1], indicates that two matches are
196  //   expected within the bookmark title "a definite definition". In each case,
197  //   the {XXX, 0} indicates the end of the subarray. Or:
198  //                 Match #1            Match #2
199  //                 ------------------  ----------------------------
200  //                  Pos1    Term        Pos1    Pos2      Term
201  //                  ------  --------    ------  --------  --------
202  //     {"def", 2, {{{4, 7}, {999, 0}}, {{2, 5}, {11, 14}, {999, 0}}}},
203  //
204  struct QueryData {
205    const std::string query;
206    const size_t match_count;  // This count must match the number of major
207                               // elements in the following |positions| array.
208    const size_t positions[99][9][2];
209  } query_data[] = {
210    // This first set is primarily for position detection validation.
211    {"abc",                   3, {{{0, 3}, {0, 0}},
212                                  {{0, 3}, {0, 0}},
213                                  {{0, 3}, {0, 0}}}},
214    {"abcde",                 2, {{{0, 5}, {0, 0}},
215                                  {{0, 5}, {0, 0}}}},
216    {"foo bar",               0, {{{0, 0}}}},
217    {"fooey bark",            0, {{{0, 0}}}},
218    {"def",                   2, {{{2, 5}, {0, 0}},
219                                  {{4, 7}, {0, 0}}}},
220    {"ghi jkl",               2, {{{0, 3}, {4, 7}, {0, 0}},
221                                  {{0, 3}, {4, 7}, {0, 0}}}},
222    // NB: GetBookmarksWithTitlesMatching(...) uses exact match for "a".
223    {"a",                     1, {{{0, 1}, {0, 0}}}},
224    {"a d",                   0, {{{0, 0}}}},
225    {"carry carbon",          1, {{{0, 5}, {6, 12}, {0, 0}}}},
226    // NB: GetBookmarksWithTitlesMatching(...) sorts the match positions.
227    {"carbon carry",          1, {{{0, 5}, {6, 12}, {0, 0}}}},
228    {"arbon",                 0, {{{0, 0}}}},
229    {"ar",                    0, {{{0, 0}}}},
230    {"arry",                  0, {{{0, 0}}}},
231    // Quoted terms are single terms.
232    {"\"carry carbon\"",      1, {{{0, 12}, {0, 0}}}},
233    {"\"carry carbon\" care", 1, {{{0, 12}, {13, 17}, {0, 0}}}},
234    // Quoted terms require complete word matches.
235    {"\"carry carbo\"",       0, {{{0, 0}}}},
236    // This set uses duplicated and/or overlaps search terms in the title.
237    {"frank",                 1, {{{0, 5}, {8, 13}, {16, 21}, {0, 0}}}},
238    {"frankly",               1, {{{0, 7}, {8, 15}, {0, 0}}}},
239    {"frankly frankly",       1, {{{0, 7}, {8, 15}, {0, 0}}}},
240    {"foobar foo",            1, {{{0, 6}, {7, 13}, {0, 0}}}},
241    {"foo foobar",            1, {{{0, 6}, {7, 13}, {0, 0}}}},
242  };
243
244  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
245    AutocompleteInput input(ASCIIToUTF16(query_data[i].query),
246                            string16(), false, false,  false,
247                            AutocompleteInput::ALL_MATCHES);
248    provider_->Start(input, false);
249    const ACMatches& matches(provider_->matches());
250    // Validate number of results is as expected.
251    EXPECT_LE(matches.size(), query_data[i].match_count)
252        << "One or more of the following matches were unexpected:\n"
253        << MatchesAsString16(matches)
254        << "For query '" << query_data[i].query << "'.";
255    EXPECT_GE(matches.size(), query_data[i].match_count)
256        << "One or more expected matches are missing. Matches found:\n"
257        << MatchesAsString16(matches)
258        << "for query '" << query_data[i].query << "'.";
259    // Validate positions within each match is as expected.
260    for (size_t j = 0; j < matches.size(); ++j) {
261      // Collect the expected positions as a vector, collect the match's
262      // classifications for match positions as a vector, then compare.
263      TestBookmarkPositions expected_positions(
264          PositionsFromExpectations(query_data[i].positions[j]));
265      TestBookmarkPositions actual_positions(
266          PositionsFromAutocompleteMatch(matches[j]));
267      EXPECT_TRUE(std::equal(expected_positions.begin(),
268                             expected_positions.end(),
269                             actual_positions.begin(),
270                             TestBookmarkPositionsEqual))
271          << "EXPECTED: " << TestBookmarkPositionsAsString(expected_positions)
272          << "ACTUAL:   " << TestBookmarkPositionsAsString(actual_positions)
273          << "    for query: '" << query_data[i].query << "'.";
274    }
275  }
276}
277
278TEST_F(BookmarkProviderTest, Rankings) {
279  // Simulate searches.
280  struct QueryData {
281    const std::string query;
282    // |match_count| must match the number of elements in the following
283    // |matches| array.
284    const size_t match_count;
285    // |matches| specifies the titles for all bookmarks expected to be matched
286    // by the |query|
287    const std::string matches[99];
288  } query_data[] = {
289    // Basic ranking test.
290    {"abc",       3, {"abcde",      // Most complete match.
291                      "abcdef",
292                      "abc def"}},  // Least complete match.
293    {"ghi",       2, {"ghi jkl",    // Matched earlier.
294                      "jkl ghi"}},  // Matched later.
295    // Rankings of exact-word matches with different URLs.
296    {"achlorhydric",
297                  3, {"achlorhydric mockingbirds resuscitates featherhead",
298                      "achlorhydric featherheads resuscitates mockingbirds",
299                      "featherhead resuscitates achlorhydric mockingbirds"}},
300    {"achlorhydric featherheads",
301                  2, {"achlorhydric featherheads resuscitates mockingbirds",
302                      "mockingbirds resuscitates featherheads achlorhydric"}},
303    {"mockingbirds resuscitates",
304                  3, {"mockingbirds resuscitates featherheads achlorhydric",
305                      "achlorhydric mockingbirds resuscitates featherhead",
306                      "featherhead resuscitates achlorhydric mockingbirds"}},
307    // Ranking of exact-word matches with URL boost.
308    {"worms",     2, {"burning worms #2",    // boosted
309                      "burning worms #1"}},  // not boosted
310    // Ranking of prefix matches with URL boost. Note that a query of
311    // "worm burn" will have the same results.
312    {"burn worm", 3, {"burning worms #2",    // boosted
313                      "worming burns #20",   // boosted
314                      "burning worms #1"}},  // not boosted but shorter
315  };
316
317  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
318    AutocompleteInput input(ASCIIToUTF16(query_data[i].query),
319                            string16(), false, false,  false,
320                            AutocompleteInput::ALL_MATCHES);
321    provider_->Start(input, false);
322    const ACMatches& matches(provider_->matches());
323    // Validate number and content of results is as expected.
324    for (size_t j = 0; j < std::max(query_data[i].match_count, matches.size());
325         ++j) {
326      EXPECT_LT(j, query_data[i].match_count) << "    Unexpected match '"
327          << UTF16ToUTF8(matches[j].description) << "' for query: '"
328          <<  query_data[i].query << "'.";
329      if (j >= query_data[i].match_count)
330        continue;
331      EXPECT_LT(j, matches.size()) << "    Missing match '"
332          << query_data[i].matches[j] << "' for query: '"
333          << query_data[i].query << "'.";
334      if (j >= matches.size())
335        continue;
336      EXPECT_EQ(query_data[i].matches[j], UTF16ToUTF8(matches[j].description))
337          << "    Mismatch at [" << base::IntToString(j) << "] for query '"
338          << query_data[i].query << "'.";
339    }
340  }
341}
342