1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
6#define CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
7
8#include <map>
9#include <utility>
10
11#include "base/memory/ref_counted.h"
12#include "chrome/browser/history/url_utils.h"
13#include "components/history/core/browser/history_types.h"
14#include "url/gurl.h"
15
16class GURL;
17
18namespace history {
19
20// TopSiteCache caches thumbnails for visited pages. Retrieving thumbnails from
21// a given input URL is a two-stage process:
22//
23//   input URL --(map 1)--> canonical URL --(map 2)--> image.
24//
25// (map 1) searches for an URL in |canonical_urls_| that "matches" (see below)
26// input URL. If found, canonical URL assigned to the result. Otherwise the
27// input URL is considered to already be a canonical URL.
28//
29// (map 2) simply looks up canonical URL in |images_|.
30//
31// The rule to "match" URL in |canonical_urls_| always favors exact match.
32// - In GetCanonicalURL(), exact match is the only case examined.
33// - In GetGeneralizedCanonicalURL(), we also perform "generalized" URL matches,
34//   i.e., stored URLs in |canonical_urls_| that are prefixes of input URL,
35//   ignoring "?query#ref".
36// For the latter two "URL prefix matches", we prefer the match that is closest
37// to input URL, w.r.t. path hierarchy.
38
39// TopSitesCache caches the top sites and thumbnails for TopSites.
40class TopSitesCache {
41 public:
42  TopSitesCache();
43  ~TopSitesCache();
44
45  // Set the top sites. In |top_sites| all forced URLs must appear before
46  // non-forced URLs. This is only checked in debug.
47  void SetTopSites(const MostVisitedURLList& top_sites);
48  const MostVisitedURLList& top_sites() const { return top_sites_; }
49
50  // The thumbnails.
51  void SetThumbnails(const URLToImagesMap& images);
52  const URLToImagesMap& images() const { return images_; }
53
54  // Returns the thumbnail as an Image for the specified url. This adds an entry
55  // for |url| if one has not yet been added.
56  Images* GetImage(const GURL& url);
57
58  // Fetches the thumbnail for the specified url. Returns true if there is a
59  // thumbnail for the specified url. It is possible for a URL to be in TopSites
60  // but not have an thumbnail.
61  bool GetPageThumbnail(const GURL& url,
62                        scoped_refptr<base::RefCountedMemory>* bytes) const;
63
64  // Fetches the thumbnail score for the specified url. Returns true if
65  // there is a thumbnail score for the specified url.
66  bool GetPageThumbnailScore(const GURL& url, ThumbnailScore* score) const;
67
68  // Returns the canonical URL for |url|.
69  const GURL& GetCanonicalURL(const GURL& url) const;
70
71  // Searches for a URL in |canonical_urls_| that is a URL prefix of |url|.
72  // Prefers an exact match if it exists, or the least generalized match while
73  // ignoring "?query#ref". Returns the resulting canonical URL if match is
74  // found, otherwise returns an empty GURL.
75  GURL GetGeneralizedCanonicalURL(const GURL& url) const;
76
77  // Returns true if |url| is known.
78  bool IsKnownURL(const GURL& url) const;
79
80  // Returns the index into |top_sites_| for |url|.
81  size_t GetURLIndex(const GURL& url) const;
82
83  // Returns the number of non-forced URLs in the cache.
84  size_t GetNumNonForcedURLs() const;
85
86  // Returns the number of forced URLs in the cache.
87  size_t GetNumForcedURLs() const;
88
89 private:
90  // The entries in CanonicalURLs, see CanonicalURLs for details. The second
91  // argument gives the index of the URL into MostVisitedURLs redirects.
92  typedef std::pair<MostVisitedURL*, size_t> CanonicalURLEntry;
93
94  // Comparator used for CanonicalURLs.
95  class CanonicalURLComparator {
96   public:
97    bool operator()(const CanonicalURLEntry& e1,
98                    const CanonicalURLEntry& e2) const {
99      return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(),
100                                       e2.first->redirects[e2.second].spec());
101    }
102  };
103
104  // Creates the object needed to form std::map queries into |canonical_urls_|,
105  // wrapping all required temporary data to allow inlining.
106  class CanonicalURLQuery {
107   public:
108    explicit CanonicalURLQuery(const GURL& url);
109    ~CanonicalURLQuery();
110    const CanonicalURLEntry& entry() { return entry_; }
111
112   private:
113    MostVisitedURL most_visited_url_;
114    CanonicalURLEntry entry_;
115  };
116
117  // This is used to map from redirect url to the MostVisitedURL the redirect is
118  // from. Ideally this would be map<GURL, size_t> (second param indexing into
119  // top_sites_), but this results in duplicating all redirect urls. As some
120  // sites have a lot of redirects, we instead use the MostVisitedURL* and the
121  // index of the redirect as the key, and the index into top_sites_ as the
122  // value. This way we aren't duplicating GURLs. CanonicalURLComparator
123  // enforces the ordering as if we were using GURLs.
124  typedef std::map<CanonicalURLEntry, size_t,
125                   CanonicalURLComparator> CanonicalURLs;
126
127  // Count the number of forced URLs.
128  void CountForcedURLs();
129
130  // Generates the set of canonical urls from |top_sites_|.
131  void GenerateCanonicalURLs();
132
133  // Stores a set of redirects. This is used by GenerateCanonicalURLs.
134  void StoreRedirectChain(const RedirectList& redirects, size_t destination);
135
136  // Returns the iterator into |canonical_urls_| for the |url|.
137  CanonicalURLs::const_iterator GetCanonicalURLsIterator(const GURL& url) const;
138
139  // Returns the GURL corresponding to an iterator in |canonical_urls_|.
140  const GURL& GetURLFromIterator(CanonicalURLs::const_iterator it) const;
141
142  // The number of top sites with forced URLs.
143  size_t num_forced_urls_;
144
145  // The top sites. This list must always contain the forced URLs first followed
146  // by the non-forced URLs. This is not strictly enforced but is checked in
147  // debug.
148  MostVisitedURLList top_sites_;
149
150  // The images. These map from canonical url to image.
151  URLToImagesMap images_;
152
153  // Generated from the redirects to and from the most visited pages. See
154  // description above typedef for details.
155  CanonicalURLs canonical_urls_;
156
157  // Helper to clear "?query#ref" from any GURL. This is set in the constructor
158  // and never modified after.
159  GURL::Replacements clear_query_ref_;
160
161  // Helper to clear "/path?query#ref" from any GURL. This is set in the
162  // constructor and never modified after.
163  GURL::Replacements clear_path_query_ref_;
164
165  DISALLOW_COPY_AND_ASSIGN(TopSitesCache);
166};
167
168}  // namespace history
169
170#endif  // CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
171