15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_ 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <map> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <utility> 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/ref_counted.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/history/history_types.h" 13d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)#include "chrome/browser/history/url_utils.h" 144e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)#include "url/gurl.h" 154e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 164e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)class GURL; 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace history { 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 20d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// TopSiteCache caches thumbnails for visited pages. Retrieving thumbnails from 21d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// a given input URL is a two-stage process: 22d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// 23d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// input URL --(map 1)--> canonical URL --(map 2)--> image. 24d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// 254e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// (map 1) searches for an URL in |canonical_urls_| that "matches" (see below) 264e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// input URL. If found, canonical URL assigned to the result. Otherwise the 274e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// input URL is considered to already be a canonical URL. 28d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// 29d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// (map 2) simply looks up canonical URL in |images_|. 30d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// 314e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// The rule to "match" URL in |canonical_urls_| always favors exact match. 324e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// - In GetCanonicalURL(), exact match is the only case examined. 334e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// - In GetGeneralizedCanonicalURL(), we also perform "generalized" URL matches, 344e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// i.e., stored URLs in |canonical_urls_| that are prefixes of input URL, 354e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// ignoring "?query#ref". 364e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// For the latter two "URL prefix matches", we prefer the match that is closest 374e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// to input URL, w.r.t. path hierarchy. 38d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TopSitesCache caches the top sites and thumbnails for TopSites. 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class TopSitesCache { 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) TopSitesCache(); 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~TopSitesCache(); 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 45f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Set the top sites. In |top_sites| all forced URLs must appear before 46f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // non-forced URLs. This is only checked in debug. 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void SetTopSites(const MostVisitedURLList& top_sites); 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const MostVisitedURLList& top_sites() const { return top_sites_; } 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The thumbnails. 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void SetThumbnails(const URLToImagesMap& images); 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const URLToImagesMap& images() const { return images_; } 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns the thumbnail as an Image for the specified url. This adds an entry 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // for |url| if one has not yet been added. 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Images* GetImage(const GURL& url); 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Fetches the thumbnail for the specified url. Returns true if there is a 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // thumbnail for the specified url. It is possible for a URL to be in TopSites 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // but not have an thumbnail. 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool GetPageThumbnail(const GURL& url, 624e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) scoped_refptr<base::RefCountedMemory>* bytes) const; 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Fetches the thumbnail score for the specified url. Returns true if 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // there is a thumbnail score for the specified url. 664e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) bool GetPageThumbnailScore(const GURL& url, ThumbnailScore* score) const; 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns the canonical URL for |url|. 694e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) const GURL& GetCanonicalURL(const GURL& url) const; 704e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 71f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Searches for a URL in |canonical_urls_| that is a URL prefix of |url|. 72f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Prefers an exact match if it exists, or the least generalized match while 73f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // ignoring "?query#ref". Returns the resulting canonical URL if match is 74f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // found, otherwise returns an empty GURL. 754e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) GURL GetGeneralizedCanonicalURL(const GURL& url) const; 76d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true if |url| is known. 784e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) bool IsKnownURL(const GURL& url) const; 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns the index into |top_sites_| for |url|. 814e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) size_t GetURLIndex(const GURL& url) const; 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 83f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Returns the number of non-forced URLs in the cache. 84f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) size_t GetNumNonForcedURLs() const; 85f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 86f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Returns the number of forced URLs in the cache. 87f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) size_t GetNumForcedURLs() const; 88f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The entries in CanonicalURLs, see CanonicalURLs for details. The second 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // argument gives the index of the URL into MostVisitedURLs redirects. 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) typedef std::pair<MostVisitedURL*, size_t> CanonicalURLEntry; 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Comparator used for CanonicalURLs. 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) class CanonicalURLComparator { 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool operator()(const CanonicalURLEntry& e1, 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const CanonicalURLEntry& e2) const { 99d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(), 100d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) e2.first->redirects[e2.second].spec()); 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1044e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) // Creates the object needed to form std::map queries into |canonical_urls_|, 1054e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) // wrapping all required temporary data to allow inlining. 1064e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) class CanonicalURLQuery { 1074e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) public: 1084e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) explicit CanonicalURLQuery(const GURL& url); 1094e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) ~CanonicalURLQuery(); 1104e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) const CanonicalURLEntry& entry() { return entry_; } 1114e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1124e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) private: 1134e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) MostVisitedURL most_visited_url_; 1144e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) CanonicalURLEntry entry_; 1154e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) }; 1164e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // This is used to map from redirect url to the MostVisitedURL the redirect is 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // from. Ideally this would be map<GURL, size_t> (second param indexing into 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // top_sites_), but this results in duplicating all redirect urls. As some 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // sites have a lot of redirects, we instead use the MostVisitedURL* and the 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // index of the redirect as the key, and the index into top_sites_ as the 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // value. This way we aren't duplicating GURLs. CanonicalURLComparator 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // enforces the ordering as if we were using GURLs. 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) typedef std::map<CanonicalURLEntry, size_t, 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CanonicalURLComparator> CanonicalURLs; 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 127f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Count the number of forced URLs. 128f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void CountForcedURLs(); 129f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Generates the set of canonical urls from |top_sites_|. 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void GenerateCanonicalURLs(); 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Stores a set of redirects. This is used by GenerateCanonicalURLs. 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void StoreRedirectChain(const RedirectList& redirects, size_t destination); 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 136d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) // Returns the iterator into |canonical_urls_| for the |url|. 1374e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) CanonicalURLs::const_iterator GetCanonicalURLsIterator(const GURL& url) const; 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1394e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) // Returns the GURL corresponding to an iterator in |canonical_urls_|. 1404e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) const GURL& GetURLFromIterator(CanonicalURLs::const_iterator it) const; 141d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) 142f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // The number of top sites with forced URLs. 143f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) size_t num_forced_urls_; 144f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 145f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // The top sites. This list must always contain the forced URLs first followed 146f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // by the non-forced URLs. This is not strictly enforced but is checked in 147f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // debug. 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MostVisitedURLList top_sites_; 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The images. These map from canonical url to image. 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) URLToImagesMap images_; 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Generated from the redirects to and from the most visited pages. See 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // description above typedef for details. 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CanonicalURLs canonical_urls_; 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1571e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) // Helper to clear "?query#ref" from any GURL. This is set in the constructor 1584e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) // and never modified after. 1594e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) GURL::Replacements clear_query_ref_; 1604e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1614e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) // Helper to clear "/path?query#ref" from any GURL. This is set in the 1621e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) // constructor and never modified after. 1634e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) GURL::Replacements clear_path_query_ref_; 1644e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_COPY_AND_ASSIGN(TopSitesCache); 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace history 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_ 171