top_sites_cache.h revision 1e9bf3e0803691d0a228da41fc608347b6db4340
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <map>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <utility>
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/ref_counted.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/history/history_types.h"
13d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)#include "chrome/browser/history/url_utils.h"
144e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)#include "url/gurl.h"
154e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
164e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)class GURL;
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace history {
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
20d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// TopSiteCache caches thumbnails for visited pages. Retrieving thumbnails from
21d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// a given input URL is a two-stage process:
22d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)//
23d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)//   input URL --(map 1)--> canonical URL --(map 2)--> image.
24d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)//
254e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// (map 1) searches for an URL in |canonical_urls_| that "matches" (see below)
264e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// input URL. If found, canonical URL assigned to the result. Otherwise the
274e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// input URL is considered to already be a canonical URL.
28d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)//
29d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)// (map 2) simply looks up canonical URL in |images_|.
30d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)//
314e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// The rule to "match" URL in |canonical_urls_| always favors exact match.
324e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// - In GetCanonicalURL(), exact match is the only case examined.
334e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// - In GetSpecializedCanonicalURL(), we also perform "specialized" URL matches,
344e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)//   i.e., stored URLs in |canonical_urls_| of which the input URL is a
354e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)//   URL prefix, ignoring "?query#ref".
364e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// - In GetGeneralizedCanonicalURL(), we also perform "generalized" URL matches,
374e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)//   i.e., stored URLs in |canonical_urls_| that are prefixes of input URL,
384e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)//   ignoring "?query#ref".
394e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// For the latter two "URL prefix matches", we prefer the match that is closest
404e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)// to input URL, w.r.t. path hierarchy.
41d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TopSitesCache caches the top sites and thumbnails for TopSites.
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class TopSitesCache {
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TopSitesCache();
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~TopSitesCache();
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The top sites.
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void SetTopSites(const MostVisitedURLList& top_sites);
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const MostVisitedURLList& top_sites() const { return top_sites_; }
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The thumbnails.
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void SetThumbnails(const URLToImagesMap& images);
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const URLToImagesMap& images() const { return images_; }
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the thumbnail as an Image for the specified url. This adds an entry
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // for |url| if one has not yet been added.
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Images* GetImage(const GURL& url);
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Fetches the thumbnail for the specified url. Returns true if there is a
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // thumbnail for the specified url. It is possible for a URL to be in TopSites
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // but not have an thumbnail.
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool GetPageThumbnail(const GURL& url,
644e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)                        scoped_refptr<base::RefCountedMemory>* bytes) const;
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Fetches the thumbnail score for the specified url. Returns true if
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // there is a thumbnail score for the specified url.
684e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  bool GetPageThumbnailScore(const GURL& url, ThumbnailScore* score) const;
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the canonical URL for |url|.
714e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  const GURL& GetCanonicalURL(const GURL& url) const;
724e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
734e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // Searches for a URL in |canonical_urls_| that has |url| as a URL prefix.
744e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // Prefers an exact match if it exists, or the least specialized match while
754e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // ignoring "?query#ref". Returns the result to if match is found, otherwise
764e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // returns an empty GURL.
774e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  GURL GetSpecializedCanonicalURL(const GURL& url) const;
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
794e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // Similar to GetSpecializedCanonicalURL(), but searches for a URL in
804e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // |canonical_urls_| that is a URL prefix of |url|, and leaset generalized.
814e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  GURL GetGeneralizedCanonicalURL(const GURL& url) const;
82d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns true if |url| is known.
844e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  bool IsKnownURL(const GURL& url) const;
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the index into |top_sites_| for |url|.
874e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  size_t GetURLIndex(const GURL& url) const;
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The entries in CanonicalURLs, see CanonicalURLs for details. The second
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // argument gives the index of the URL into MostVisitedURLs redirects.
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::pair<MostVisitedURL*, size_t> CanonicalURLEntry;
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Comparator used for CanonicalURLs.
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  class CanonicalURLComparator {
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   public:
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool operator()(const CanonicalURLEntry& e1,
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    const CanonicalURLEntry& e2) const {
99d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)      return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(),
100d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)                                       e2.first->redirects[e2.second].spec());
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1044e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // Creates the object needed to form std::map queries into |canonical_urls_|,
1054e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // wrapping all required temporary data to allow inlining.
1064e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  class CanonicalURLQuery {
1074e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)   public:
1084e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    explicit CanonicalURLQuery(const GURL& url);
1094e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    ~CanonicalURLQuery();
1104e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    const CanonicalURLEntry& entry() { return entry_; }
1114e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1124e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)   private:
1134e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    MostVisitedURL most_visited_url_;
1144e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    CanonicalURLEntry entry_;
1154e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  };
1164e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This is used to map from redirect url to the MostVisitedURL the redirect is
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // from. Ideally this would be map<GURL, size_t> (second param indexing into
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // top_sites_), but this results in duplicating all redirect urls. As some
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // sites have a lot of redirects, we instead use the MostVisitedURL* and the
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // index of the redirect as the key, and the index into top_sites_ as the
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // value. This way we aren't duplicating GURLs. CanonicalURLComparator
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // enforces the ordering as if we were using GURLs.
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::map<CanonicalURLEntry, size_t,
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   CanonicalURLComparator> CanonicalURLs;
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Generates the set of canonical urls from |top_sites_|.
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void GenerateCanonicalURLs();
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Stores a set of redirects. This is used by GenerateCanonicalURLs.
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void StoreRedirectChain(const RedirectList& redirects, size_t destination);
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
133d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)  // Returns the iterator into |canonical_urls_| for the |url|.
1344e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  CanonicalURLs::const_iterator GetCanonicalURLsIterator(const GURL& url) const;
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1364e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // Returns the GURL corresponding to an iterator in |canonical_urls_|.
1374e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  const GURL& GetURLFromIterator(CanonicalURLs::const_iterator it) const;
138d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The top sites.
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  MostVisitedURLList top_sites_;
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The images. These map from canonical url to image.
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  URLToImagesMap images_;
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Generated from the redirects to and from the most visited pages. See
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // description above typedef for details.
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CanonicalURLs canonical_urls_;
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1491e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  // Helper to clear "?query#ref" from any GURL. This is set in the constructor
1504e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // and never modified after.
1514e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  GURL::Replacements clear_query_ref_;
1524e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1534e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // Helper to clear "/path?query#ref" from any GURL. This is set in the
1541e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)  // constructor and never modified after.
1554e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  GURL::Replacements clear_path_query_ref_;
1564e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(TopSitesCache);
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace history
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
163