history_url_provider_unittest.cc revision 3345a6884c488ff3a535c2c9acdd33d74b37e311
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/file_util.h"
6#include "base/message_loop.h"
7#include "base/path_service.h"
8#include "base/string_util.h"
9#include "base/utf_string_conversions.h"
10#include "chrome/browser/autocomplete/history_url_provider.h"
11#include "chrome/browser/chrome_thread.h"
12#include "chrome/browser/history/history.h"
13#include "chrome/test/testing_profile.h"
14#include "testing/gtest/include/gtest/gtest.h"
15#if defined(OS_MACOSX)
16#include "base/mac_util.h"
17#endif
18
19using base::Time;
20using base::TimeDelta;
21
22struct TestURLInfo {
23  std::string url;
24  std::string title;
25  int visit_count;
26  int typed_count;
27} test_db[] = {
28  {"http://www.google.com/", "Google", 3, 3},
29
30  // High-quality pages should get a host synthesized as a lower-quality match.
31  {"http://slashdot.org/favorite_page.html", "Favorite page", 200, 100},
32
33  // Less popular pages should have hosts synthesized as higher-quality
34  // matches.
35  {"http://kerneltrap.org/not_very_popular.html", "Less popular", 4, 0},
36
37  // Unpopular pages should not appear in the results at all.
38  {"http://freshmeat.net/unpopular.html", "Unpopular", 1, 1},
39
40  // If a host has a match, we should pick it up during host synthesis.
41  {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2},
42  {"http://news.google.com/", "Google News", 1, 1},
43
44  // Suggested short URLs must be "good enough" and must match user input.
45  {"http://foo.com/", "Dir", 5, 5},
46  {"http://foo.com/dir/", "Dir", 2, 2},
47  {"http://foo.com/dir/another/", "Dir", 5, 1},
48  {"http://foo.com/dir/another/again/", "Dir", 10, 0},
49  {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2},
50
51  // We throw in a lot of extra URLs here to make sure we're testing the
52  // history database's query, not just the autocomplete provider.
53  {"http://startest.com/y/a", "A", 2, 2},
54  {"http://startest.com/y/b", "B", 5, 2},
55  {"http://startest.com/x/c", "C", 5, 2},
56  {"http://startest.com/x/d", "D", 5, 5},
57  {"http://startest.com/y/e", "E", 4, 2},
58  {"http://startest.com/y/f", "F", 3, 2},
59  {"http://startest.com/y/g", "G", 3, 2},
60  {"http://startest.com/y/h", "H", 3, 2},
61  {"http://startest.com/y/i", "I", 3, 2},
62  {"http://startest.com/y/j", "J", 3, 2},
63  {"http://startest.com/y/k", "K", 3, 2},
64  {"http://startest.com/y/l", "L", 3, 2},
65  {"http://startest.com/y/m", "M", 3, 2},
66
67  // A file: URL is useful for testing that fixup does the right thing w.r.t.
68  // the number of trailing slashes on the user's input.
69  {"file:///C:/foo.txt", "", 2, 2},
70
71  // Results with absurdly high typed_counts so that very generic queries like
72  // "http" will give consistent results even if more data is added above.
73  {"http://bogussite.com/a", "Bogus A", 10002, 10000},
74  {"http://bogussite.com/b", "Bogus B", 10001, 10000},
75  {"http://bogussite.com/c", "Bogus C", 10000, 10000},
76
77  // Domain name with number.
78  {"http://www.17173.com/", "Domain with number", 3, 3},
79
80  // URLs to test exact-matching behavior.
81  {"http://go/", "Intranet URL", 1, 1},
82  {"http://gooey/", "Intranet URL 2", 5, 5},
83
84  // URLs for testing offset adjustment.
85  {"http://www.\xEA\xB5\x90\xEC\x9C\xA1.kr/", "Korean", 2, 2},
86  {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2},
87  {"http://ms/c++%20style%20guide", "Style guide", 2, 2},
88
89  // URLs for testing ctrl-enter behavior.
90  {"http://binky/", "Intranet binky", 2, 2},
91  {"http://winky/", "Intranet winky", 2, 2},
92  {"http://www.winky.com/", "Internet winky", 5, 0},
93};
94
95class HistoryURLProviderTest : public testing::Test,
96                               public ACProviderListener {
97 public:
98  HistoryURLProviderTest()
99      : ui_thread_(ChromeThread::UI, &message_loop_),
100        file_thread_(ChromeThread::FILE, &message_loop_) {}
101
102  // ACProviderListener
103  virtual void OnProviderUpdate(bool updated_matches);
104
105 protected:
106  // testing::Test
107  virtual void SetUp() {
108    SetUpImpl(false);
109  }
110  virtual void TearDown();
111
112  // Does the real setup.
113  void SetUpImpl(bool no_db);
114
115  // Fills test data into the history system.
116  void FillData();
117
118  // Runs an autocomplete query on |text| and checks to see that the returned
119  // results' destination URLs match those provided.
120  void RunTest(const std::wstring text,
121               const std::wstring& desired_tld,
122               bool prevent_inline_autocomplete,
123               const std::string* expected_urls,
124               size_t num_results);
125
126  void RunAdjustOffsetTest(const std::wstring text, size_t expected_offset);
127
128  MessageLoopForUI message_loop_;
129  ChromeThread ui_thread_;
130  ChromeThread file_thread_;
131  ACMatches matches_;
132  scoped_ptr<TestingProfile> profile_;
133  HistoryService* history_service_;
134
135 private:
136  scoped_refptr<HistoryURLProvider> autocomplete_;
137};
138
139class HistoryURLProviderTestNoDB : public HistoryURLProviderTest {
140 protected:
141  virtual void SetUp() {
142    SetUpImpl(true);
143  }
144};
145
146void HistoryURLProviderTest::OnProviderUpdate(bool updated_matches) {
147  if (autocomplete_->done())
148    MessageLoop::current()->Quit();
149}
150
151void HistoryURLProviderTest::SetUpImpl(bool no_db) {
152  profile_.reset(new TestingProfile());
153  profile_->CreateHistoryService(true, no_db);
154  history_service_ = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
155
156  autocomplete_ = new HistoryURLProvider(this, profile_.get(), L"en-US,en,ko");
157
158  FillData();
159}
160
161void HistoryURLProviderTest::TearDown() {
162  autocomplete_ = NULL;
163}
164
165void HistoryURLProviderTest::FillData() {
166  // All visits are a long time ago (some tests require this since we do some
167  // special logic for things visited very recently). Note that this time must
168  // be more recent than the "archived history" threshold for the data to go
169  // into the main database.
170  //
171  // TODO(brettw) It would be nice if we could test this behavior, in which
172  // case the time would be specifed in the test_db structure.
173  Time visit_time = Time::Now() - TimeDelta::FromDays(80);
174
175  for (size_t i = 0; i < arraysize(test_db); ++i) {
176    const TestURLInfo& cur = test_db[i];
177    const GURL current_url(cur.url);
178    history_service_->AddPageWithDetails(current_url, UTF8ToUTF16(cur.title),
179                                         cur.visit_count, cur.typed_count,
180                                         visit_time, false,
181                                         history::SOURCE_BROWSED);
182  }
183}
184
185void HistoryURLProviderTest::RunTest(const std::wstring text,
186                                     const std::wstring& desired_tld,
187                                     bool prevent_inline_autocomplete,
188                                     const std::string* expected_urls,
189                                     size_t num_results) {
190  AutocompleteInput input(text, desired_tld, prevent_inline_autocomplete,
191                          false, false);
192  autocomplete_->Start(input, false);
193  if (!autocomplete_->done())
194    MessageLoop::current()->Run();
195
196  matches_ = autocomplete_->matches();
197  ASSERT_EQ(num_results, matches_.size()) << "Input text: " << text
198                                          << "\nTLD: \"" << desired_tld << "\"";
199  for (size_t i = 0; i < num_results; ++i)
200    EXPECT_EQ(expected_urls[i], matches_[i].destination_url.spec());
201}
202
203void HistoryURLProviderTest::RunAdjustOffsetTest(const std::wstring text,
204                                                 size_t expected_offset) {
205  AutocompleteInput input(text, std::wstring(), false, false, false);
206  autocomplete_->Start(input, false);
207  if (!autocomplete_->done())
208    MessageLoop::current()->Run();
209
210  matches_ = autocomplete_->matches();
211  ASSERT_GE(matches_.size(), 1U) << "Input text: " << text;
212  EXPECT_EQ(expected_offset, matches_[0].inline_autocomplete_offset);
213}
214
215TEST_F(HistoryURLProviderTest, PromoteShorterURLs) {
216  // Test that hosts get synthesized below popular pages.
217  const std::string expected_nonsynth[] = {
218    "http://slashdot.org/favorite_page.html",
219    "http://slashdot.org/",
220  };
221  RunTest(L"slash", std::wstring(), true, expected_nonsynth,
222          arraysize(expected_nonsynth));
223
224  // Test that hosts get synthesized above less popular pages.
225  const std::string expected_synth[] = {
226    "http://kerneltrap.org/",
227    "http://kerneltrap.org/not_very_popular.html",
228  };
229  RunTest(L"kernel", std::wstring(), true, expected_synth,
230          arraysize(expected_synth));
231
232  // Test that unpopular pages are ignored completely.
233  RunTest(L"fresh", std::wstring(), true, NULL, 0);
234
235  // Test that if we have a synthesized host that matches a suggestion, they
236  // get combined into one.
237  const std::string expected_combine[] = {
238    "http://news.google.com/",
239    "http://news.google.com/?ned=us&topic=n",
240  };
241  ASSERT_NO_FATAL_FAILURE(RunTest(L"news", std::wstring(), true,
242      expected_combine, arraysize(expected_combine)));
243  // The title should also have gotten set properly on the host for the
244  // synthesized one, since it was also in the results.
245  EXPECT_EQ(std::wstring(L"Google News"), matches_.front().description);
246
247  // Test that short URL matching works correctly as the user types more
248  // (several tests):
249  // The entry for foo.com is the best of all five foo.com* entries.
250  const std::string short_1[] = {
251    "http://foo.com/",
252    "http://foo.com/dir/another/again/myfile.html",
253    "http://foo.com/dir/",
254  };
255  RunTest(L"foo", std::wstring(), true, short_1, arraysize(short_1));
256
257  // When the user types the whole host, make sure we don't get two results for
258  // it.
259  const std::string short_2[] = {
260    "http://foo.com/",
261    "http://foo.com/dir/another/again/myfile.html",
262    "http://foo.com/dir/",
263    "http://foo.com/dir/another/",
264  };
265  RunTest(L"foo.com", std::wstring(), true, short_2, arraysize(short_2));
266  RunTest(L"foo.com/", std::wstring(), true, short_2, arraysize(short_2));
267
268  // The filename is the second best of the foo.com* entries, but there is a
269  // shorter URL that's "good enough".  The host doesn't match the user input
270  // and so should not appear.
271  const std::string short_3[] = {
272    "http://foo.com/d",
273    "http://foo.com/dir/another/",
274    "http://foo.com/dir/another/again/myfile.html",
275    "http://foo.com/dir/",
276  };
277  RunTest(L"foo.com/d", std::wstring(), true, short_3, arraysize(short_3));
278
279  // We shouldn't promote shorter URLs than the best if they're not good
280  // enough.
281  const std::string short_4[] = {
282    "http://foo.com/dir/another/a",
283    "http://foo.com/dir/another/again/myfile.html",
284    "http://foo.com/dir/another/again/",
285  };
286  RunTest(L"foo.com/dir/another/a", std::wstring(), true, short_4,
287          arraysize(short_4));
288
289  // Exact matches should always be best no matter how much more another match
290  // has been typed.
291  const std::string short_5a[] = {
292    "http://gooey/",
293    "http://www.google.com/",
294  };
295  const std::string short_5b[] = {
296    "http://go/",
297    "http://gooey/",
298    "http://www.google.com/",
299  };
300  RunTest(L"g", std::wstring(), false, short_5a, arraysize(short_5a));
301  RunTest(L"go", std::wstring(), false, short_5b, arraysize(short_5b));
302}
303
304TEST_F(HistoryURLProviderTest, CullRedirects) {
305  // URLs we will be using, plus the visit counts they will initially get
306  // (the redirect set below will also increment the visit counts). We want
307  // the results to be in A,B,C order. Note also that our visit counts are
308  // all high enough so that domain synthesizing won't get triggered.
309  struct RedirectCase {
310    const char* url;
311    int count;
312  };
313  static const RedirectCase redirect[] = {
314    {"http://redirects/A", 30},
315    {"http://redirects/B", 20},
316    {"http://redirects/C", 10}
317  };
318  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(redirect); i++) {
319    history_service_->AddPageWithDetails(GURL(redirect[i].url),
320                                         UTF8ToUTF16("Title"),
321                                         redirect[i].count, redirect[i].count,
322                                         Time::Now(), false,
323                                         history::SOURCE_BROWSED);
324  }
325
326  // Create a B->C->A redirect chain, but set the visit counts such that they
327  // will appear in A,B,C order in the results. The autocomplete query will
328  // search for the most recent visit when looking for redirects, so this will
329  // be found even though the previous visits had no redirects.
330  history::RedirectList redirects_to_a;
331  redirects_to_a.push_back(GURL(redirect[1].url));
332  redirects_to_a.push_back(GURL(redirect[2].url));
333  redirects_to_a.push_back(GURL(redirect[0].url));
334  history_service_->AddPage(GURL(redirect[0].url), NULL, 0, GURL(),
335                            PageTransition::TYPED, redirects_to_a,
336                            history::SOURCE_BROWSED, true);
337
338  // Because all the results are part of a redirect chain with other results,
339  // all but the first one (A) should be culled. We should get the default
340  // "what you typed" result, plus this one.
341  const std::wstring typing(L"http://redirects/");
342  const std::string expected_results[] = {
343    WideToUTF8(typing),
344    redirect[0].url};
345  RunTest(typing, std::wstring(), true, expected_results,
346          arraysize(expected_results));
347}
348
349TEST_F(HistoryURLProviderTest, WhatYouTyped) {
350  // Make sure we suggest a What You Typed match at the right times.
351  RunTest(L"wytmatch", std::wstring(), false, NULL, 0);
352  RunTest(L"wytmatch foo bar", std::wstring(), false, NULL, 0);
353  RunTest(L"wytmatch+foo+bar", std::wstring(), false, NULL, 0);
354  RunTest(L"wytmatch+foo+bar.com", std::wstring(), false, NULL, 0);
355
356  const std::string results_1[] = {"http://www.wytmatch.com/"};
357  RunTest(L"wytmatch", L"com", false, results_1, arraysize(results_1));
358
359  const std::string results_2[] = {"http://wytmatch%20foo%20bar/"};
360  RunTest(L"http://wytmatch foo bar", std::wstring(), false, results_2,
361          arraysize(results_2));
362
363  const std::string results_3[] = {"https://wytmatch%20foo%20bar/"};
364  RunTest(L"https://wytmatch foo bar", std::wstring(), false, results_3,
365          arraysize(results_3));
366
367  // Test the corner case where a user has fully typed a previously visited
368  // intranet address and is now hitting ctrl-enter, which completes to a
369  // previously unvisted internet domain.
370  const std::string binky_results[] = {"http://binky/"};
371  const std::string binky_com_results[] = {
372    "http://www.binky.com/",
373    "http://binky/",
374  };
375  RunTest(L"binky", std::wstring(), false, binky_results,
376          arraysize(binky_results));
377  RunTest(L"binky", L"com", false, binky_com_results,
378          arraysize(binky_com_results));
379
380  // Test the related case where a user has fully typed a previously visited
381  // intranet address and is now hitting ctrl-enter, which completes to a
382  // previously visted internet domain.
383  const std::string winky_results[] = {
384    "http://winky/",
385    "http://www.winky.com/",
386  };
387  const std::string winky_com_results[] = {
388    "http://www.winky.com/",
389    "http://winky/",
390  };
391  RunTest(L"winky", std::wstring(), false, winky_results,
392          arraysize(winky_results));
393  RunTest(L"winky", L"com", false, winky_com_results,
394          arraysize(winky_com_results));
395}
396
397TEST_F(HistoryURLProviderTest, Fixup) {
398  // Test for various past crashes we've had.
399  RunTest(L"\\", std::wstring(), false, NULL, 0);
400  RunTest(L"#", std::wstring(), false, NULL, 0);
401  RunTest(L"%20", std::wstring(), false, NULL, 0);
402  RunTest(L"\uff65@s", std::wstring(), false, NULL, 0);
403  RunTest(L"\u2015\u2015@ \uff7c", std::wstring(), false, NULL, 0);
404
405  // Fixing up "file:" should result in an inline autocomplete offset of just
406  // after "file:", not just after "file://".
407  const std::wstring input_1(L"file:");
408  const std::string fixup_1[] = {"file:///C:/foo.txt"};
409  ASSERT_NO_FATAL_FAILURE(RunTest(input_1, std::wstring(), false, fixup_1,
410                                  arraysize(fixup_1)));
411  EXPECT_EQ(input_1.length(), matches_.front().inline_autocomplete_offset);
412
413  // Fixing up "http:/" should result in an inline autocomplete offset of just
414  // after "http:/", not just after "http:".
415  const std::wstring input_2(L"http:/");
416  const std::string fixup_2[] = {
417    "http://bogussite.com/a",
418    "http://bogussite.com/b",
419    "http://bogussite.com/c",
420  };
421  ASSERT_NO_FATAL_FAILURE(RunTest(input_2, std::wstring(), false, fixup_2,
422                                  arraysize(fixup_2)));
423  EXPECT_EQ(input_2.length(), matches_.front().inline_autocomplete_offset);
424
425  // Adding a TLD to a small number like "56" should result in "www.56.com"
426  // rather than "0.0.0.56.com".
427  const std::string fixup_3[] = {"http://www.56.com/"};
428  RunTest(L"56", L"com", true, fixup_3, arraysize(fixup_3));
429
430  // An input looks like a IP address like "127.0.0.1" should result in
431  // "http://127.0.0.1/".
432  const std::string fixup_4[] = {"http://127.0.0.1/"};
433  RunTest(L"127.0.0.1", std::wstring(), false, fixup_4, arraysize(fixup_4));
434
435  // An number "17173" should result in "http://www.17173.com/" in db.
436  const std::string fixup_5[] = {"http://www.17173.com/"};
437  RunTest(L"17173", std::wstring(), false, fixup_5, arraysize(fixup_5));
438}
439
440TEST_F(HistoryURLProviderTest, AdjustOffset) {
441  RunAdjustOffsetTest(L"http://www.\uAD50\uC721", 13);
442  RunAdjustOffsetTest(L"http://spaces.com/path%20with%20spa", 31);
443  RunAdjustOffsetTest(L"http://ms/c++ s", 15);
444}
445
446TEST_F(HistoryURLProviderTestNoDB, NavigateWithoutDB) {
447  // Ensure that we will still produce matches for navigation when there is no
448  // database.
449  std::string navigation_1[] = {"http://test.com/"};
450  RunTest(L"test.com", std::wstring(), false, navigation_1,
451          arraysize(navigation_1));
452
453  std::string navigation_2[] = {"http://slash/"};
454  RunTest(L"slash", std::wstring(), false, navigation_2,
455          arraysize(navigation_2));
456
457  RunTest(L"this is a query", std::wstring(), false, NULL, 0);
458}
459