1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/history/top_sites_cache.h" 6 7#include <set> 8 9#include "base/basictypes.h" 10#include "base/logging.h" 11#include "base/strings/string16.h" 12#include "base/strings/string_number_conversions.h" 13#include "base/strings/utf_string_conversions.h" 14#include "testing/gtest/include/gtest/gtest.h" 15 16namespace history { 17 18namespace { 19 20class TopSitesCacheTest : public testing::Test { 21 public: 22 TopSitesCacheTest() { 23 } 24 25 protected: 26 // Initializes |top_sites_| on |spec|, which is a list of URL strings with 27 // optional indents: indentated URLs redirect to the last non-indented URL. 28 // Titles are assigned as "Title 1", "Title 2", etc., in the order of 29 // appearance. See |kTopSitesSpecBasic| for an example. This function does not 30 // update |cache_| so you can manipulate |top_sites_| before you update it. 31 void BuildTopSites(const char** spec, size_t size); 32 33 // Initializes |top_sites_| and |cache_| based on |spec|. 34 void InitTopSiteCache(const char** spec, size_t size); 35 36 MostVisitedURLList top_sites_; 37 TopSitesCache cache_; 38 39 private: 40 DISALLOW_COPY_AND_ASSIGN(TopSitesCacheTest); 41}; 42 43void TopSitesCacheTest::BuildTopSites(const char** spec, size_t size) { 44 std::set<std::string> urls_seen; 45 for (size_t i = 0; i < size; ++i) { 46 const char* spec_item = spec[i]; 47 while (*spec_item && *spec_item == ' ') // Eat indent. 48 ++spec_item; 49 if (urls_seen.find(spec_item) != urls_seen.end()) 50 NOTREACHED() << "Duplicate URL found: " << spec_item; 51 urls_seen.insert(spec_item); 52 if (spec_item == spec[i]) { // No indent: add new MostVisitedURL. 53 base::string16 title(base::ASCIIToUTF16("Title ") + 54 base::Uint64ToString16(top_sites_.size() + 1)); 55 top_sites_.push_back(MostVisitedURL(GURL(spec_item), title)); 56 } 57 ASSERT_TRUE(!top_sites_.empty()); 58 // Set up redirect to canonical URL. Canonical URL redirects to itself, too. 59 top_sites_.back().redirects.push_back(GURL(spec_item)); 60 } 61} 62 63void TopSitesCacheTest::InitTopSiteCache(const char** spec, size_t size) { 64 BuildTopSites(spec, size); 65 cache_.SetTopSites(top_sites_); 66} 67 68const char* kTopSitesSpecBasic[] = { 69 "http://www.google.com", 70 " http://www.gogle.com", // Redirects. 71 " http://www.gooogle.com", // Redirects. 72 "http://www.youtube.com/a/b", 73 " http://www.youtube.com/a/b?test=1", // Redirects. 74 "https://www.google.com/", 75 " https://www.gogle.com", // Redirects. 76 "http://www.example.com:3141/", 77}; 78 79TEST_F(TopSitesCacheTest, GetCanonicalURL) { 80 InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic)); 81 struct { 82 const char* expected; 83 const char* query; 84 } test_cases[] = { 85 // Already is canonical: redirects. 86 {"http://www.google.com/", "http://www.google.com"}, 87 // Exact match with stored URL: redirects. 88 {"http://www.google.com/", "http://www.gooogle.com"}, 89 // Recognizes despite trailing "/": redirects 90 {"http://www.google.com/", "http://www.gooogle.com/"}, 91 // Exact match with URL with query: redirects. 92 {"http://www.youtube.com/a/b", "http://www.youtube.com/a/b?test=1"}, 93 // No match with URL with query: as-is. 94 {"http://www.youtube.com/a/b?test", "http://www.youtube.com/a/b?test"}, 95 // Never-seen-before URL: as-is. 96 {"http://maps.google.com/", "http://maps.google.com/"}, 97 // Changing port number, does not match: as-is. 98 {"http://www.example.com:1234/", "http://www.example.com:1234"}, 99 // Smart enough to know that port 80 is HTTP: redirects. 100 {"http://www.google.com/", "http://www.gooogle.com:80"}, 101 // Prefix should not work: as-is. 102 {"http://www.youtube.com/a", "http://www.youtube.com/a"}, 103 }; 104 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { 105 std::string expected(test_cases[i].expected); 106 std::string query(test_cases[i].query); 107 EXPECT_EQ(expected, cache_.GetCanonicalURL(GURL(query)).spec()) 108 << " for test_case[" << i << "]"; 109 } 110} 111 112TEST_F(TopSitesCacheTest, IsKnownUrl) { 113 InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic)); 114 // Matches. 115 EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com"))); 116 EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.gooogle.com"))); 117 EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com/"))); 118 119 // Non-matches. 120 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com?"))); 121 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.net"))); 122 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com/stuff"))); 123 EXPECT_FALSE(cache_.IsKnownURL(GURL("https://www.gooogle.com"))); 124 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.youtube.com/a"))); 125} 126 127const char* kTopSitesSpecPrefix[] = { 128 "http://www.google.com/", 129 " http://www.google.com/test?q=3", // Redirects. 130 " http://www.google.com/test/y?d", // Redirects. 131 " http://www.chromium.org/a/b", // Redirects. 132 "http://www.google.com/2", 133 " http://www.google.com/test/q", // Redirects. 134 " http://www.google.com/test/y?b", // Redirects. 135 "http://www.google.com/3", 136 " http://www.google.com/testing", // Redirects. 137 "http://www.google.com/test-hyphen", 138 "http://www.google.com/sh", 139 " http://www.google.com/sh/1/2/3", // Redirects. 140 "http://www.google.com/sh/1", 141}; 142 143TEST_F(TopSitesCacheTest, GetCanonicalURLExactMatch) { 144 InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix)); 145 for (size_t i = 0; i < arraysize(kTopSitesSpecPrefix); ++i) { 146 // Go through each entry in kTopSitesSpecPrefix, trimming space. 147 const char* s = kTopSitesSpecPrefix[i]; 148 while (*s && *s == ' ') 149 ++s; 150 // Get the answer from direct lookup. 151 GURL stored_url(s); 152 GURL expected(cache_.GetCanonicalURL(stored_url)); 153 // Test generalization. 154 GURL result(cache_.GetGeneralizedCanonicalURL(stored_url)); 155 EXPECT_EQ(expected, result) << " for kTopSitesSpecPrefix[" << i << "]"; 156 } 157} 158 159TEST_F(TopSitesCacheTest, GetGeneralizedCanonicalURL) { 160 InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix)); 161 struct { 162 const char* expected; 163 const char* query; 164 } test_cases[] = { 165 // Exact match after trimming "?query": redirects. 166 {"http://www.google.com/", "http://www.google.com/test"}, 167 // Same, but different code path: redirects. 168 {"http://www.google.com/", "http://www.google.com/test/y?e"}, 169 {"http://www.google.com/", "http://www.google.com/test/y?c"}, 170 // Same, but code path leads to different result: redirects. 171 {"http://www.google.com/2", "http://www.google.com/test/y?a"}, 172 // Generalized match: redirects. 173 {"http://www.google.com/3", "http://www.google.com/3/1/4/1/5/9"}, 174 // Generalized match with trailing "/": redirects. 175 {"http://www.google.com/3", "http://www.google.com/3/1/4/1/5/9/"}, 176 // Unique generalization match: redirects. 177 {"http://www.google.com/", "http://www.chromium.org/a/b/c"}, 178 // Multiple exact matches after trimming: redirects to first. 179 {"http://www.google.com/2", "http://www.google.com/test/y"}, 180 // Multiple generalized matches: redirects to least general. 181 {"http://www.google.com/sh", "http://www.google.com/sh/1/2/3/4/"}, 182 // Multiple generalized matches: redirects to least general. 183 {"http://www.google.com/sh", "http://www.google.com/sh/1/2/3/4/"}, 184 // Competing generalized match: take the most specilized. 185 {"http://www.google.com/2", "http://www.google.com/test/q"}, 186 // No generalized match, early element: fails. 187 {"", "http://www.a.com/"}, 188 // No generalized match, intermediate element: fails. 189 {"", "http://www.e-is-between-chromium-and-google.com/"}, 190 // No generalized match, late element: fails. 191 {"", "http://www.zzzzzzz.com/"}, 192 // String prefix match but not URL-prefix match: fails. 193 {"", "http://www.chromium.org/a/beeswax"}, 194 // String prefix match and URL-prefix match: redirects. 195 {"http://www.google.com/", "http://www.google.com/shhhhhh"}, 196 // Different protocol: fails. 197 {"", "https://www.google.com/test"}, 198 // Smart enough to know that port 80 is HTTP: redirects. 199 {"http://www.google.com/", "http://www.google.com:80/test"}, 200 // Specialized match only: fails. 201 {"", "http://www.chromium.org/a"}, 202 }; 203 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { 204 std::string expected(test_cases[i].expected); 205 std::string query(test_cases[i].query); 206 GURL result(cache_.GetGeneralizedCanonicalURL(GURL(query))); 207 EXPECT_EQ(expected, result.spec()) << " for test_case[" << i << "]"; 208 } 209} 210 211// This tests a special case where there are 2 generalized matches, and both 212// should be checked to find the correct match. 213TEST_F(TopSitesCacheTest, GetPrefixCanonicalURLDiffByQuery) { 214 const char* top_sites_spec[] = { 215 "http://www.dest.com/1", 216 " http://www.source.com/a?m=5", // Redirects. 217 "http://www.dest.com/2", 218 " http://www.source.com/a/t?q=3", // Redirects. 219 }; 220 InitTopSiteCache(top_sites_spec, arraysize(top_sites_spec)); 221 222 struct { 223 const char* expected; 224 const char* query; 225 } test_cases[] = { 226 // Slightly before "http://www.source.com/a?m=5". 227 {"http://www.dest.com/1", "http://www.source.com/a?l=5"}, 228 // Slightly after "http://www.source.com/a?m=5". 229 {"http://www.dest.com/1", "http://www.source.com/a?n=5"}, 230 // Slightly before "http://www.source.com/a/t?q=3". 231 {"http://www.dest.com/2", "http://www.source.com/a/t?q=2"}, 232 // Slightly after "http://www.source.com/a/t?q=3". 233 {"http://www.dest.com/2", "http://www.source.com/a/t?q=4"}, 234 }; 235 236 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { 237 std::string expected(test_cases[i].expected); 238 std::string query(test_cases[i].query); 239 GURL result(cache_.GetGeneralizedCanonicalURL(GURL(query))); 240 EXPECT_EQ(expected, result.spec()) << " for test_case[" << i << "]"; 241 } 242} 243 244// This test ensures forced URLs behave in the expected way. 245TEST_F(TopSitesCacheTest, CacheForcedURLs) { 246 // Forced URLs must always appear at the beginning of the list. 247 BuildTopSites(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic)); 248 top_sites_[0].last_forced_time = base::Time::FromJsTime(1000); 249 top_sites_[1].last_forced_time = base::Time::FromJsTime(2000); 250 cache_.SetTopSites(top_sites_); 251 252 EXPECT_EQ(2u, cache_.GetNumForcedURLs()); 253 EXPECT_EQ(2u, cache_.GetNumNonForcedURLs()); 254} 255 256} // namespace 257 258} // namespace history 259