history_url_provider_unittest.cc revision 72a454cd3513ac24fbdd0e0cb9ad70b86a99b801
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/file_util.h"
6#include "base/message_loop.h"
7#include "base/path_service.h"
8#include "base/string_util.h"
9#include "base/utf_string_conversions.h"
10#include "chrome/browser/autocomplete/autocomplete_match.h"
11#include "chrome/browser/autocomplete/history_url_provider.h"
12#include "chrome/browser/browser_thread.h"
13#include "chrome/browser/history/history.h"
14#include "chrome/test/testing_profile.h"
15#include "testing/gtest/include/gtest/gtest.h"
16
17using base::Time;
18using base::TimeDelta;
19
20struct TestURLInfo {
21  std::string url;
22  std::string title;
23  int visit_count;
24  int typed_count;
25} test_db[] = {
26  {"http://www.google.com/", "Google", 3, 3},
27
28  // High-quality pages should get a host synthesized as a lower-quality match.
29  {"http://slashdot.org/favorite_page.html", "Favorite page", 200, 100},
30
31  // Less popular pages should have hosts synthesized as higher-quality
32  // matches.
33  {"http://kerneltrap.org/not_very_popular.html", "Less popular", 4, 0},
34
35  // Unpopular pages should not appear in the results at all.
36  {"http://freshmeat.net/unpopular.html", "Unpopular", 1, 1},
37
38  // If a host has a match, we should pick it up during host synthesis.
39  {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2},
40  {"http://news.google.com/", "Google News", 1, 1},
41
42  // Suggested short URLs must be "good enough" and must match user input.
43  {"http://foo.com/", "Dir", 5, 5},
44  {"http://foo.com/dir/", "Dir", 2, 2},
45  {"http://foo.com/dir/another/", "Dir", 5, 1},
46  {"http://foo.com/dir/another/again/", "Dir", 10, 0},
47  {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2},
48
49  // We throw in a lot of extra URLs here to make sure we're testing the
50  // history database's query, not just the autocomplete provider.
51  {"http://startest.com/y/a", "A", 2, 2},
52  {"http://startest.com/y/b", "B", 5, 2},
53  {"http://startest.com/x/c", "C", 5, 2},
54  {"http://startest.com/x/d", "D", 5, 5},
55  {"http://startest.com/y/e", "E", 4, 2},
56  {"http://startest.com/y/f", "F", 3, 2},
57  {"http://startest.com/y/g", "G", 3, 2},
58  {"http://startest.com/y/h", "H", 3, 2},
59  {"http://startest.com/y/i", "I", 3, 2},
60  {"http://startest.com/y/j", "J", 3, 2},
61  {"http://startest.com/y/k", "K", 3, 2},
62  {"http://startest.com/y/l", "L", 3, 2},
63  {"http://startest.com/y/m", "M", 3, 2},
64
65  // A file: URL is useful for testing that fixup does the right thing w.r.t.
66  // the number of trailing slashes on the user's input.
67  {"file:///C:/foo.txt", "", 2, 2},
68
69  // Results with absurdly high typed_counts so that very generic queries like
70  // "http" will give consistent results even if more data is added above.
71  {"http://bogussite.com/a", "Bogus A", 10002, 10000},
72  {"http://bogussite.com/b", "Bogus B", 10001, 10000},
73  {"http://bogussite.com/c", "Bogus C", 10000, 10000},
74
75  // Domain name with number.
76  {"http://www.17173.com/", "Domain with number", 3, 3},
77
78  // URLs to test exact-matching behavior.
79  {"http://go/", "Intranet URL", 1, 1},
80  {"http://gooey/", "Intranet URL 2", 5, 5},
81
82  // URLs for testing offset adjustment.
83  {"http://www.\xEA\xB5\x90\xEC\x9C\xA1.kr/", "Korean", 2, 2},
84  {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2},
85  {"http://ms/c++%20style%20guide", "Style guide", 2, 2},
86
87  // URLs for testing ctrl-enter behavior.
88  {"http://binky/", "Intranet binky", 2, 2},
89  {"http://winky/", "Intranet winky", 2, 2},
90  {"http://www.winky.com/", "Internet winky", 5, 0},
91};
92
93class HistoryURLProviderTest : public testing::Test,
94                               public ACProviderListener {
95 public:
96  HistoryURLProviderTest()
97      : ui_thread_(BrowserThread::UI, &message_loop_),
98        file_thread_(BrowserThread::FILE, &message_loop_) {}
99
100  // ACProviderListener
101  virtual void OnProviderUpdate(bool updated_matches);
102
103 protected:
104  // testing::Test
105  virtual void SetUp() {
106    SetUpImpl(false);
107  }
108  virtual void TearDown();
109
110  // Does the real setup.
111  void SetUpImpl(bool no_db);
112
113  // Fills test data into the history system.
114  void FillData();
115
116  // Runs an autocomplete query on |text| and checks to see that the returned
117  // results' destination URLs match those provided.
118  void RunTest(const string16 text,
119               const string16& desired_tld,
120               bool prevent_inline_autocomplete,
121               const std::string* expected_urls,
122               size_t num_results);
123
124  void RunAdjustOffsetTest(const string16 text, size_t expected_offset);
125
126  MessageLoopForUI message_loop_;
127  BrowserThread ui_thread_;
128  BrowserThread file_thread_;
129  ACMatches matches_;
130  scoped_ptr<TestingProfile> profile_;
131  HistoryService* history_service_;
132
133 private:
134  scoped_refptr<HistoryURLProvider> autocomplete_;
135};
136
137class HistoryURLProviderTestNoDB : public HistoryURLProviderTest {
138 protected:
139  virtual void SetUp() {
140    SetUpImpl(true);
141  }
142};
143
144void HistoryURLProviderTest::OnProviderUpdate(bool updated_matches) {
145  if (autocomplete_->done())
146    MessageLoop::current()->Quit();
147}
148
149void HistoryURLProviderTest::SetUpImpl(bool no_db) {
150  profile_.reset(new TestingProfile());
151  profile_->CreateHistoryService(true, no_db);
152  history_service_ = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
153
154  autocomplete_ = new HistoryURLProvider(this, profile_.get(), "en-US,en,ko");
155
156  FillData();
157}
158
159void HistoryURLProviderTest::TearDown() {
160  autocomplete_ = NULL;
161}
162
163void HistoryURLProviderTest::FillData() {
164  // All visits are a long time ago (some tests require this since we do some
165  // special logic for things visited very recently). Note that this time must
166  // be more recent than the "archived history" threshold for the data to go
167  // into the main database.
168  //
169  // TODO(brettw) It would be nice if we could test this behavior, in which
170  // case the time would be specifed in the test_db structure.
171  Time visit_time = Time::Now() - TimeDelta::FromDays(80);
172
173  for (size_t i = 0; i < arraysize(test_db); ++i) {
174    const TestURLInfo& cur = test_db[i];
175    const GURL current_url(cur.url);
176    history_service_->AddPageWithDetails(current_url, UTF8ToUTF16(cur.title),
177                                         cur.visit_count, cur.typed_count,
178                                         visit_time, false,
179                                         history::SOURCE_BROWSED);
180  }
181}
182
183void HistoryURLProviderTest::RunTest(const string16 text,
184                                     const string16& desired_tld,
185                                     bool prevent_inline_autocomplete,
186                                     const std::string* expected_urls,
187                                     size_t num_results) {
188  AutocompleteInput input(text, desired_tld, prevent_inline_autocomplete,
189                          false, true, false);
190  autocomplete_->Start(input, false);
191  if (!autocomplete_->done())
192    MessageLoop::current()->Run();
193
194  matches_ = autocomplete_->matches();
195  ASSERT_EQ(num_results, matches_.size()) << "Input text: " << text
196                                          << "\nTLD: \"" << desired_tld << "\"";
197  for (size_t i = 0; i < num_results; ++i)
198    EXPECT_EQ(expected_urls[i], matches_[i].destination_url.spec());
199}
200
201void HistoryURLProviderTest::RunAdjustOffsetTest(const string16 text,
202                                                 size_t expected_offset) {
203  AutocompleteInput input(text, string16(), false, false, true, false);
204  autocomplete_->Start(input, false);
205  if (!autocomplete_->done())
206    MessageLoop::current()->Run();
207
208  matches_ = autocomplete_->matches();
209  ASSERT_GE(matches_.size(), 1U) << "Input text: " << text;
210  EXPECT_EQ(expected_offset, matches_[0].inline_autocomplete_offset);
211}
212
213TEST_F(HistoryURLProviderTest, PromoteShorterURLs) {
214  // Test that hosts get synthesized below popular pages.
215  const std::string expected_nonsynth[] = {
216    "http://slashdot.org/favorite_page.html",
217    "http://slashdot.org/",
218  };
219  RunTest(ASCIIToUTF16("slash"), string16(), true, expected_nonsynth,
220          arraysize(expected_nonsynth));
221
222  // Test that hosts get synthesized above less popular pages.
223  const std::string expected_synth[] = {
224    "http://kerneltrap.org/",
225    "http://kerneltrap.org/not_very_popular.html",
226  };
227  RunTest(ASCIIToUTF16("kernel"), string16(), true, expected_synth,
228          arraysize(expected_synth));
229
230  // Test that unpopular pages are ignored completely.
231  RunTest(ASCIIToUTF16("fresh"), string16(), true, NULL, 0);
232
233  // Test that if we have a synthesized host that matches a suggestion, they
234  // get combined into one.
235  const std::string expected_combine[] = {
236    "http://news.google.com/",
237    "http://news.google.com/?ned=us&topic=n",
238  };
239  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("news"), string16(), true,
240      expected_combine, arraysize(expected_combine)));
241  // The title should also have gotten set properly on the host for the
242  // synthesized one, since it was also in the results.
243  EXPECT_EQ(ASCIIToUTF16("Google News"), matches_.front().description);
244
245  // Test that short URL matching works correctly as the user types more
246  // (several tests):
247  // The entry for foo.com is the best of all five foo.com* entries.
248  const std::string short_1[] = {
249    "http://foo.com/",
250    "http://foo.com/dir/another/again/myfile.html",
251    "http://foo.com/dir/",
252  };
253  RunTest(ASCIIToUTF16("foo"), string16(), true, short_1, arraysize(short_1));
254
255  // When the user types the whole host, make sure we don't get two results for
256  // it.
257  const std::string short_2[] = {
258    "http://foo.com/",
259    "http://foo.com/dir/another/again/myfile.html",
260    "http://foo.com/dir/",
261    "http://foo.com/dir/another/",
262  };
263  RunTest(ASCIIToUTF16("foo.com"), string16(), true, short_2,
264          arraysize(short_2));
265  RunTest(ASCIIToUTF16("foo.com/"), string16(), true, short_2,
266          arraysize(short_2));
267
268  // The filename is the second best of the foo.com* entries, but there is a
269  // shorter URL that's "good enough".  The host doesn't match the user input
270  // and so should not appear.
271  const std::string short_3[] = {
272    "http://foo.com/d",
273    "http://foo.com/dir/another/",
274    "http://foo.com/dir/another/again/myfile.html",
275    "http://foo.com/dir/",
276  };
277  RunTest(ASCIIToUTF16("foo.com/d"), string16(), true, short_3,
278          arraysize(short_3));
279
280  // We shouldn't promote shorter URLs than the best if they're not good
281  // enough.
282  const std::string short_4[] = {
283    "http://foo.com/dir/another/a",
284    "http://foo.com/dir/another/again/myfile.html",
285    "http://foo.com/dir/another/again/",
286  };
287  RunTest(ASCIIToUTF16("foo.com/dir/another/a"), string16(), true, short_4,
288          arraysize(short_4));
289
290  // Exact matches should always be best no matter how much more another match
291  // has been typed.
292  const std::string short_5a[] = {
293    "http://gooey/",
294    "http://www.google.com/",
295  };
296  const std::string short_5b[] = {
297    "http://go/",
298    "http://gooey/",
299    "http://www.google.com/",
300  };
301  RunTest(ASCIIToUTF16("g"), string16(), false, short_5a, arraysize(short_5a));
302  RunTest(ASCIIToUTF16("go"), string16(), false, short_5b, arraysize(short_5b));
303}
304
305TEST_F(HistoryURLProviderTest, CullRedirects) {
306  // URLs we will be using, plus the visit counts they will initially get
307  // (the redirect set below will also increment the visit counts). We want
308  // the results to be in A,B,C order. Note also that our visit counts are
309  // all high enough so that domain synthesizing won't get triggered.
310  struct RedirectCase {
311    const char* url;
312    int count;
313  };
314  static const RedirectCase redirect[] = {
315    {"http://redirects/A", 30},
316    {"http://redirects/B", 20},
317    {"http://redirects/C", 10}
318  };
319  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(redirect); i++) {
320    history_service_->AddPageWithDetails(GURL(redirect[i].url),
321                                         UTF8ToUTF16("Title"),
322                                         redirect[i].count, redirect[i].count,
323                                         Time::Now(), false,
324                                         history::SOURCE_BROWSED);
325  }
326
327  // Create a B->C->A redirect chain, but set the visit counts such that they
328  // will appear in A,B,C order in the results. The autocomplete query will
329  // search for the most recent visit when looking for redirects, so this will
330  // be found even though the previous visits had no redirects.
331  history::RedirectList redirects_to_a;
332  redirects_to_a.push_back(GURL(redirect[1].url));
333  redirects_to_a.push_back(GURL(redirect[2].url));
334  redirects_to_a.push_back(GURL(redirect[0].url));
335  history_service_->AddPage(GURL(redirect[0].url), NULL, 0, GURL(),
336                            PageTransition::TYPED, redirects_to_a,
337                            history::SOURCE_BROWSED, true);
338
339  // Because all the results are part of a redirect chain with other results,
340  // all but the first one (A) should be culled. We should get the default
341  // "what you typed" result, plus this one.
342  const string16 typing(ASCIIToUTF16("http://redirects/"));
343  const std::string expected_results[] = {
344    UTF16ToUTF8(typing),
345    redirect[0].url};
346  RunTest(typing, string16(), true, expected_results,
347          arraysize(expected_results));
348}
349
350TEST_F(HistoryURLProviderTest, WhatYouTyped) {
351  // Make sure we suggest a What You Typed match at the right times.
352  RunTest(ASCIIToUTF16("wytmatch"), string16(), false, NULL, 0);
353  RunTest(ASCIIToUTF16("wytmatch foo bar"), string16(), false, NULL, 0);
354  RunTest(ASCIIToUTF16("wytmatch+foo+bar"), string16(), false, NULL, 0);
355  RunTest(ASCIIToUTF16("wytmatch+foo+bar.com"), string16(), false, NULL, 0);
356
357  const std::string results_1[] = {"http://www.wytmatch.com/"};
358  RunTest(ASCIIToUTF16("wytmatch"), ASCIIToUTF16("com"), false, results_1,
359          arraysize(results_1));
360
361  const std::string results_2[] = {"http://wytmatch%20foo%20bar/"};
362  RunTest(ASCIIToUTF16("http://wytmatch foo bar"), string16(), false, results_2,
363          arraysize(results_2));
364
365  const std::string results_3[] = {"https://wytmatch%20foo%20bar/"};
366  RunTest(ASCIIToUTF16("https://wytmatch foo bar"), string16(), false,
367          results_3, arraysize(results_3));
368
369  // Test the corner case where a user has fully typed a previously visited
370  // intranet address and is now hitting ctrl-enter, which completes to a
371  // previously unvisted internet domain.
372  const std::string binky_results[] = {"http://binky/"};
373  const std::string binky_com_results[] = {
374    "http://www.binky.com/",
375    "http://binky/",
376  };
377  RunTest(ASCIIToUTF16("binky"), string16(), false, binky_results,
378          arraysize(binky_results));
379  RunTest(ASCIIToUTF16("binky"), ASCIIToUTF16("com"), false, binky_com_results,
380          arraysize(binky_com_results));
381
382  // Test the related case where a user has fully typed a previously visited
383  // intranet address and is now hitting ctrl-enter, which completes to a
384  // previously visted internet domain.
385  const std::string winky_results[] = {
386    "http://winky/",
387    "http://www.winky.com/",
388  };
389  const std::string winky_com_results[] = {
390    "http://www.winky.com/",
391    "http://winky/",
392  };
393  RunTest(ASCIIToUTF16("winky"), string16(), false, winky_results,
394          arraysize(winky_results));
395  RunTest(ASCIIToUTF16("winky"), ASCIIToUTF16("com"), false, winky_com_results,
396          arraysize(winky_com_results));
397}
398
399TEST_F(HistoryURLProviderTest, Fixup) {
400  // Test for various past crashes we've had.
401  RunTest(ASCIIToUTF16("\\"), string16(), false, NULL, 0);
402  RunTest(ASCIIToUTF16("#"), string16(), false, NULL, 0);
403  RunTest(ASCIIToUTF16("%20"), string16(), false, NULL, 0);
404  RunTest(WideToUTF16(L"\uff65@s"), string16(), false, NULL, 0);
405  RunTest(WideToUTF16(L"\u2015\u2015@ \uff7c"), string16(), false, NULL, 0);
406
407  // Fixing up "file:" should result in an inline autocomplete offset of just
408  // after "file:", not just after "file://".
409  const string16 input_1(ASCIIToUTF16("file:"));
410  const std::string fixup_1[] = {"file:///C:/foo.txt"};
411  ASSERT_NO_FATAL_FAILURE(RunTest(input_1, string16(), false, fixup_1,
412                                  arraysize(fixup_1)));
413  EXPECT_EQ(input_1.length(), matches_.front().inline_autocomplete_offset);
414
415  // Fixing up "http:/" should result in an inline autocomplete offset of just
416  // after "http:/", not just after "http:".
417  const string16 input_2(ASCIIToUTF16("http:/"));
418  const std::string fixup_2[] = {
419    "http://bogussite.com/a",
420    "http://bogussite.com/b",
421    "http://bogussite.com/c",
422  };
423  ASSERT_NO_FATAL_FAILURE(RunTest(input_2, string16(), false, fixup_2,
424                                  arraysize(fixup_2)));
425  EXPECT_EQ(input_2.length(), matches_.front().inline_autocomplete_offset);
426
427  // Adding a TLD to a small number like "56" should result in "www.56.com"
428  // rather than "0.0.0.56.com".
429  const std::string fixup_3[] = {"http://www.56.com/"};
430  RunTest(ASCIIToUTF16("56"), ASCIIToUTF16("com"), true, fixup_3,
431          arraysize(fixup_3));
432
433  // An input looks like a IP address like "127.0.0.1" should result in
434  // "http://127.0.0.1/".
435  const std::string fixup_4[] = {"http://127.0.0.1/"};
436  RunTest(ASCIIToUTF16("127.0.0.1"), string16(), false, fixup_4,
437          arraysize(fixup_4));
438
439  // An number "17173" should result in "http://www.17173.com/" in db.
440  const std::string fixup_5[] = {"http://www.17173.com/"};
441  RunTest(ASCIIToUTF16("17173"), string16(), false, fixup_5,
442          arraysize(fixup_5));
443}
444
445TEST_F(HistoryURLProviderTest, AdjustOffset) {
446  RunAdjustOffsetTest(WideToUTF16(L"http://www.\uAD50\uC721"), 13);
447  RunAdjustOffsetTest(ASCIIToUTF16("http://spaces.com/path%20with%20spa"), 31);
448  RunAdjustOffsetTest(ASCIIToUTF16("http://ms/c++ s"), 15);
449}
450
451TEST_F(HistoryURLProviderTestNoDB, NavigateWithoutDB) {
452  // Ensure that we will still produce matches for navigation when there is no
453  // database.
454  std::string navigation_1[] = {"http://test.com/"};
455  RunTest(ASCIIToUTF16("test.com"), string16(), false, navigation_1,
456          arraysize(navigation_1));
457
458  std::string navigation_2[] = {"http://slash/"};
459  RunTest(ASCIIToUTF16("slash"), string16(), false, navigation_2,
460          arraysize(navigation_2));
461
462  RunTest(ASCIIToUTF16("this is a query"), string16(), false, NULL, 0);
463}
464