15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <map>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <utility>
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/ref_counted.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/history/history_types.h"
13d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)#include "chrome/browser/history/url_utils.h"
144e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)#include "url/gurl.h"
154e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
164e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)class GURL;
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace history {
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
20d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// TopSiteCache caches thumbnails for visited pages. Retrieving thumbnails from
21d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// a given input URL is a two-stage process:
22d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)//
23d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)//   input URL --(map 1)--> canonical URL --(map 2)--> image.
24d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)//
254e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// (map 1) searches for an URL in |canonical_urls_| that "matches" (see below)
264e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// input URL. If found, canonical URL assigned to the result. Otherwise the
274e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// input URL is considered to already be a canonical URL.
28d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)//
29d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// (map 2) simply looks up canonical URL in |images_|.
30d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)//
314e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// The rule to "match" URL in |canonical_urls_| always favors exact match.
324e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// - In GetCanonicalURL(), exact match is the only case examined.
334e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// - In GetGeneralizedCanonicalURL(), we also perform "generalized" URL matches,
344e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)//   i.e., stored URLs in |canonical_urls_| that are prefixes of input URL,
354e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)//   ignoring "?query#ref".
364e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// For the latter two "URL prefix matches", we prefer the match that is closest
374e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// to input URL, w.r.t. path hierarchy.
38d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TopSitesCache caches the top sites and thumbnails for TopSites.
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class TopSitesCache {
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TopSitesCache();
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~TopSitesCache();
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
45f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Set the top sites. In |top_sites| all forced URLs must appear before
46f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // non-forced URLs. This is only checked in debug.
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void SetTopSites(const MostVisitedURLList& top_sites);
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const MostVisitedURLList& top_sites() const { return top_sites_; }
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The thumbnails.
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void SetThumbnails(const URLToImagesMap& images);
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const URLToImagesMap& images() const { return images_; }
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the thumbnail as an Image for the specified url. This adds an entry
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // for |url| if one has not yet been added.
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Images* GetImage(const GURL& url);
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Fetches the thumbnail for the specified url. Returns true if there is a
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // thumbnail for the specified url. It is possible for a URL to be in TopSites
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // but not have an thumbnail.
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool GetPageThumbnail(const GURL& url,
624e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)                        scoped_refptr<base::RefCountedMemory>* bytes) const;
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Fetches the thumbnail score for the specified url. Returns true if
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // there is a thumbnail score for the specified url.
664e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  bool GetPageThumbnailScore(const GURL& url, ThumbnailScore* score) const;
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the canonical URL for |url|.
694e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  const GURL& GetCanonicalURL(const GURL& url) const;
704e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
71f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Searches for a URL in |canonical_urls_| that is a URL prefix of |url|.
72f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Prefers an exact match if it exists, or the least generalized match while
73f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // ignoring "?query#ref". Returns the resulting canonical URL if match is
74f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // found, otherwise returns an empty GURL.
754e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  GURL GetGeneralizedCanonicalURL(const GURL& url) const;
76d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns true if |url| is known.
784e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  bool IsKnownURL(const GURL& url) const;
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the index into |top_sites_| for |url|.
814e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  size_t GetURLIndex(const GURL& url) const;
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
83f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Returns the number of non-forced URLs in the cache.
84f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  size_t GetNumNonForcedURLs() const;
85f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
86f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Returns the number of forced URLs in the cache.
87f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  size_t GetNumForcedURLs() const;
88f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The entries in CanonicalURLs, see CanonicalURLs for details. The second
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // argument gives the index of the URL into MostVisitedURLs redirects.
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::pair<MostVisitedURL*, size_t> CanonicalURLEntry;
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Comparator used for CanonicalURLs.
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  class CanonicalURLComparator {
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   public:
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool operator()(const CanonicalURLEntry& e1,
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    const CanonicalURLEntry& e2) const {
99d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)      return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(),
100d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)                                       e2.first->redirects[e2.second].spec());
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1044e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // Creates the object needed to form std::map queries into |canonical_urls_|,
1054e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // wrapping all required temporary data to allow inlining.
1064e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  class CanonicalURLQuery {
1074e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)   public:
1084e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    explicit CanonicalURLQuery(const GURL& url);
1094e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    ~CanonicalURLQuery();
1104e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    const CanonicalURLEntry& entry() { return entry_; }
1114e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1124e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)   private:
1134e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    MostVisitedURL most_visited_url_;
1144e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    CanonicalURLEntry entry_;
1154e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  };
1164e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This is used to map from redirect url to the MostVisitedURL the redirect is
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // from. Ideally this would be map<GURL, size_t> (second param indexing into
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // top_sites_), but this results in duplicating all redirect urls. As some
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // sites have a lot of redirects, we instead use the MostVisitedURL* and the
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // index of the redirect as the key, and the index into top_sites_ as the
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // value. This way we aren't duplicating GURLs. CanonicalURLComparator
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // enforces the ordering as if we were using GURLs.
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::map<CanonicalURLEntry, size_t,
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   CanonicalURLComparator> CanonicalURLs;
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
127f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Count the number of forced URLs.
128f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  void CountForcedURLs();
129f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Generates the set of canonical urls from |top_sites_|.
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void GenerateCanonicalURLs();
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Stores a set of redirects. This is used by GenerateCanonicalURLs.
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void StoreRedirectChain(const RedirectList& redirects, size_t destination);
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
136d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)  // Returns the iterator into |canonical_urls_| for the |url|.
1374e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  CanonicalURLs::const_iterator GetCanonicalURLsIterator(const GURL& url) const;
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1394e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // Returns the GURL corresponding to an iterator in |canonical_urls_|.
1404e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  const GURL& GetURLFromIterator(CanonicalURLs::const_iterator it) const;
141d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)
142f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // The number of top sites with forced URLs.
143f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  size_t num_forced_urls_;
144f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
145f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // The top sites. This list must always contain the forced URLs first followed
146f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // by the non-forced URLs. This is not strictly enforced but is checked in
147f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // debug.
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  MostVisitedURLList top_sites_;
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The images. These map from canonical url to image.
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  URLToImagesMap images_;
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Generated from the redirects to and from the most visited pages. See
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // description above typedef for details.
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CanonicalURLs canonical_urls_;
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1571e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  // Helper to clear "?query#ref" from any GURL. This is set in the constructor
1584e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // and never modified after.
1594e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  GURL::Replacements clear_query_ref_;
1604e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1614e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // Helper to clear "/path?query#ref" from any GURL. This is set in the
1621e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  // constructor and never modified after.
1634e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  GURL::Replacements clear_path_query_ref_;
1644e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(TopSitesCache);
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace history
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
171