1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/history/top_sites_cache.h"
6
7#include <set>
8
9#include "base/basictypes.h"
10#include "base/logging.h"
11#include "base/strings/string16.h"
12#include "base/strings/string_number_conversions.h"
13#include "base/strings/utf_string_conversions.h"
14#include "testing/gtest/include/gtest/gtest.h"
15
16namespace history {
17
18namespace {
19
20class TopSitesCacheTest : public testing::Test {
21 public:
22  TopSitesCacheTest() {
23  }
24
25 protected:
26  // Initializes |top_sites_| on |spec|, which is a list of URL strings with
27  // optional indents: indentated URLs redirect to the last non-indented URL.
28  // Titles are assigned as "Title 1", "Title 2", etc., in the order of
29  // appearance. See |kTopSitesSpecBasic| for an example. This function does not
30  // update |cache_| so you can manipulate |top_sites_| before you update it.
31  void BuildTopSites(const char** spec, size_t size);
32
33  // Initializes |top_sites_| and |cache_| based on |spec|.
34  void InitTopSiteCache(const char** spec, size_t size);
35
36  MostVisitedURLList top_sites_;
37  TopSitesCache cache_;
38
39 private:
40  DISALLOW_COPY_AND_ASSIGN(TopSitesCacheTest);
41};
42
43void TopSitesCacheTest::BuildTopSites(const char** spec, size_t size) {
44  std::set<std::string> urls_seen;
45  for (size_t i = 0; i < size; ++i) {
46    const char* spec_item = spec[i];
47    while (*spec_item && *spec_item == ' ')  // Eat indent.
48      ++spec_item;
49    if (urls_seen.find(spec_item) != urls_seen.end())
50      NOTREACHED() << "Duplicate URL found: " << spec_item;
51    urls_seen.insert(spec_item);
52    if (spec_item == spec[i]) {  // No indent: add new MostVisitedURL.
53      base::string16 title(base::ASCIIToUTF16("Title ") +
54                     base::Uint64ToString16(top_sites_.size() + 1));
55      top_sites_.push_back(MostVisitedURL(GURL(spec_item), title));
56    }
57    ASSERT_TRUE(!top_sites_.empty());
58    // Set up redirect to canonical URL. Canonical URL redirects to itself, too.
59    top_sites_.back().redirects.push_back(GURL(spec_item));
60  }
61}
62
63void TopSitesCacheTest::InitTopSiteCache(const char** spec, size_t size) {
64  BuildTopSites(spec, size);
65  cache_.SetTopSites(top_sites_);
66}
67
68const char* kTopSitesSpecBasic[] = {
69  "http://www.google.com",
70  "  http://www.gogle.com",  // Redirects.
71  "  http://www.gooogle.com",  // Redirects.
72  "http://www.youtube.com/a/b",
73  "  http://www.youtube.com/a/b?test=1",  // Redirects.
74  "https://www.google.com/",
75  "  https://www.gogle.com",  // Redirects.
76  "http://www.example.com:3141/",
77};
78
79TEST_F(TopSitesCacheTest, GetCanonicalURL) {
80  InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic));
81  struct {
82    const char* expected;
83    const char* query;
84  } test_cases[] = {
85    // Already is canonical: redirects.
86    {"http://www.google.com/", "http://www.google.com"},
87    // Exact match with stored URL: redirects.
88    {"http://www.google.com/", "http://www.gooogle.com"},
89    // Recognizes despite trailing "/": redirects
90    {"http://www.google.com/", "http://www.gooogle.com/"},
91    // Exact match with URL with query: redirects.
92    {"http://www.youtube.com/a/b", "http://www.youtube.com/a/b?test=1"},
93    // No match with URL with query: as-is.
94    {"http://www.youtube.com/a/b?test", "http://www.youtube.com/a/b?test"},
95    // Never-seen-before URL: as-is.
96    {"http://maps.google.com/", "http://maps.google.com/"},
97    // Changing port number, does not match: as-is.
98    {"http://www.example.com:1234/", "http://www.example.com:1234"},
99    // Smart enough to know that port 80 is HTTP: redirects.
100    {"http://www.google.com/", "http://www.gooogle.com:80"},
101    // Prefix should not work: as-is.
102    {"http://www.youtube.com/a", "http://www.youtube.com/a"},
103  };
104  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
105    std::string expected(test_cases[i].expected);
106    std::string query(test_cases[i].query);
107    EXPECT_EQ(expected, cache_.GetCanonicalURL(GURL(query)).spec())
108      << " for test_case[" << i << "]";
109  }
110}
111
112TEST_F(TopSitesCacheTest, IsKnownUrl) {
113  InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic));
114  // Matches.
115  EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com")));
116  EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.gooogle.com")));
117  EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com/")));
118
119  // Non-matches.
120  EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com?")));
121  EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.net")));
122  EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com/stuff")));
123  EXPECT_FALSE(cache_.IsKnownURL(GURL("https://www.gooogle.com")));
124  EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.youtube.com/a")));
125}
126
127const char* kTopSitesSpecPrefix[] = {
128  "http://www.google.com/",
129  "  http://www.google.com/test?q=3",  // Redirects.
130  "  http://www.google.com/test/y?d",  // Redirects.
131  "  http://www.chromium.org/a/b",  // Redirects.
132  "http://www.google.com/2",
133  "  http://www.google.com/test/q",  // Redirects.
134  "  http://www.google.com/test/y?b",  // Redirects.
135  "http://www.google.com/3",
136  "  http://www.google.com/testing",  // Redirects.
137  "http://www.google.com/test-hyphen",
138  "http://www.google.com/sh",
139  "  http://www.google.com/sh/1/2/3",  // Redirects.
140  "http://www.google.com/sh/1",
141};
142
143TEST_F(TopSitesCacheTest, GetCanonicalURLExactMatch) {
144  InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix));
145  for (size_t i = 0; i < arraysize(kTopSitesSpecPrefix); ++i) {
146    // Go through each entry in kTopSitesSpecPrefix, trimming space.
147    const char* s = kTopSitesSpecPrefix[i];
148    while (*s && *s == ' ')
149      ++s;
150    // Get the answer from direct lookup.
151    GURL stored_url(s);
152    GURL expected(cache_.GetCanonicalURL(stored_url));
153    // Test generalization.
154    GURL result(cache_.GetGeneralizedCanonicalURL(stored_url));
155    EXPECT_EQ(expected, result) << " for kTopSitesSpecPrefix[" << i << "]";
156  }
157}
158
159TEST_F(TopSitesCacheTest, GetGeneralizedCanonicalURL) {
160  InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix));
161  struct {
162    const char* expected;
163    const char* query;
164  } test_cases[] = {
165    // Exact match after trimming "?query": redirects.
166    {"http://www.google.com/", "http://www.google.com/test"},
167    // Same, but different code path: redirects.
168    {"http://www.google.com/", "http://www.google.com/test/y?e"},
169    {"http://www.google.com/", "http://www.google.com/test/y?c"},
170    // Same, but code path leads to different result: redirects.
171    {"http://www.google.com/2", "http://www.google.com/test/y?a"},
172    // Generalized match: redirects.
173    {"http://www.google.com/3", "http://www.google.com/3/1/4/1/5/9"},
174    // Generalized match with trailing "/": redirects.
175    {"http://www.google.com/3", "http://www.google.com/3/1/4/1/5/9/"},
176    // Unique generalization match: redirects.
177    {"http://www.google.com/", "http://www.chromium.org/a/b/c"},
178    // Multiple exact matches after trimming: redirects to first.
179    {"http://www.google.com/2", "http://www.google.com/test/y"},
180    // Multiple generalized matches: redirects to least general.
181    {"http://www.google.com/sh", "http://www.google.com/sh/1/2/3/4/"},
182    // Multiple generalized matches: redirects to least general.
183    {"http://www.google.com/sh", "http://www.google.com/sh/1/2/3/4/"},
184    // Competing generalized match: take the most specilized.
185    {"http://www.google.com/2", "http://www.google.com/test/q"},
186    // No generalized match, early element: fails.
187    {"", "http://www.a.com/"},
188    // No generalized match, intermediate element: fails.
189    {"", "http://www.e-is-between-chromium-and-google.com/"},
190    // No generalized match, late element: fails.
191    {"", "http://www.zzzzzzz.com/"},
192    // String prefix match but not URL-prefix match: fails.
193    {"", "http://www.chromium.org/a/beeswax"},
194    // String prefix match and URL-prefix match: redirects.
195    {"http://www.google.com/", "http://www.google.com/shhhhhh"},
196    // Different protocol: fails.
197    {"", "https://www.google.com/test"},
198    // Smart enough to know that port 80 is HTTP: redirects.
199    {"http://www.google.com/", "http://www.google.com:80/test"},
200    // Specialized match only: fails.
201    {"", "http://www.chromium.org/a"},
202  };
203  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
204    std::string expected(test_cases[i].expected);
205    std::string query(test_cases[i].query);
206    GURL result(cache_.GetGeneralizedCanonicalURL(GURL(query)));
207    EXPECT_EQ(expected, result.spec()) << " for test_case[" << i << "]";
208  }
209}
210
211// This tests a special case where there are 2 generalized matches, and both
212// should be checked to find the correct match.
213TEST_F(TopSitesCacheTest, GetPrefixCanonicalURLDiffByQuery) {
214  const char* top_sites_spec[] = {
215    "http://www.dest.com/1",
216    "  http://www.source.com/a?m=5",  // Redirects.
217    "http://www.dest.com/2",
218    "  http://www.source.com/a/t?q=3",  // Redirects.
219  };
220  InitTopSiteCache(top_sites_spec, arraysize(top_sites_spec));
221
222  struct {
223    const char* expected;
224    const char* query;
225  } test_cases[] = {
226    // Slightly before "http://www.source.com/a?m=5".
227    {"http://www.dest.com/1", "http://www.source.com/a?l=5"},
228    // Slightly after "http://www.source.com/a?m=5".
229    {"http://www.dest.com/1", "http://www.source.com/a?n=5"},
230    // Slightly before "http://www.source.com/a/t?q=3".
231    {"http://www.dest.com/2", "http://www.source.com/a/t?q=2"},
232    // Slightly after "http://www.source.com/a/t?q=3".
233    {"http://www.dest.com/2", "http://www.source.com/a/t?q=4"},
234  };
235
236  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
237    std::string expected(test_cases[i].expected);
238    std::string query(test_cases[i].query);
239    GURL result(cache_.GetGeneralizedCanonicalURL(GURL(query)));
240    EXPECT_EQ(expected, result.spec()) << " for test_case[" << i << "]";
241  }
242}
243
244// This test ensures forced URLs behave in the expected way.
245TEST_F(TopSitesCacheTest, CacheForcedURLs) {
246  // Forced URLs must always appear at the beginning of the list.
247  BuildTopSites(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic));
248  top_sites_[0].last_forced_time = base::Time::FromJsTime(1000);
249  top_sites_[1].last_forced_time =  base::Time::FromJsTime(2000);
250  cache_.SetTopSites(top_sites_);
251
252  EXPECT_EQ(2u, cache_.GetNumForcedURLs());
253  EXPECT_EQ(2u, cache_.GetNumNonForcedURLs());
254}
255
256}  // namespace
257
258}  // namespace history
259