predictor.h revision 513209b27ff55e2841eac0e4120199c23acce758
1// Copyright (c) 2006-2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// A Predictor object is instantiated once in the browser process, and manages
6// both preresolution of hostnames, as well as TCP/IP preconnection to expected
7// subresources.
8// Most hostname lists are provided by the renderer processes, and include URLs
9// that *might* be used in the near future by the browsing user.  One goal of
10// this class is to cause the underlying DNS structure to lookup a hostname
11// before it is really needed, and hence reduce latency in the standard lookup
12// paths.
13// Subresource relationships are usually acquired from the referrer field in a
14// navigation.  A subresource URL may be associated with a referrer URL.  Later
15// navigations may, if the likelihood of needing the subresource is high enough,
16// cause this module to speculatively create a TCP/IP connection. If there is
17// only a low likelihood, then a DNS pre-resolution operation may be performed.
18
19#ifndef CHROME_BROWSER_NET_PREDICTOR_H_
20#define CHROME_BROWSER_NET_PREDICTOR_H_
21#pragma once
22
23#include <map>
24#include <queue>
25#include <set>
26#include <string>
27
28#include "base/gtest_prod_util.h"
29#include "base/ref_counted.h"
30#include "chrome/browser/net/url_info.h"
31#include "chrome/browser/net/referrer.h"
32#include "chrome/common/net/predictor_common.h"
33#include "net/base/host_port_pair.h"
34
35class ListValue;
36
37namespace net {
38class HostResolver;
39}  // namespace net
40
41namespace chrome_browser_net {
42
43typedef chrome_common_net::UrlList UrlList;
44typedef chrome_common_net::NameList NameList;
45typedef std::map<GURL, UrlInfo> Results;
46
47// Note that Predictor is not thread safe, and must only be called from
48// the IO thread. Failure to do so will result in a DCHECK at runtime.
49class Predictor : public base::RefCountedThreadSafe<Predictor> {
50 public:
51  // A version number for prefs that are saved. This should be incremented when
52  // we change the format so that we discard old data.
53  enum { PREDICTOR_REFERRER_VERSION = 2 };
54
55  // Depending on the expected_subresource_use_, we may either make a TCP/IP
56  // preconnection, or merely pre-resolve the hostname via DNS (or even do
57  // nothing).  The following are the threasholds for taking those actions.
58  static const double kPreconnectWorthyExpectedValue;
59  static const double kDNSPreresolutionWorthyExpectedValue;
60  // Values of expected_subresource_use_ that are less than the following
61  // threshold will be discarded when we Trim() the values, such as is done when
62  // the process ends, and some values are persisted.
63  static const double kPersistWorthyExpectedValue;
64
65  // |max_concurrent| specifies how many concurrent (parallel) prefetches will
66  // be performed. Host lookups will be issued through |host_resolver|.
67  Predictor(net::HostResolver* host_resolver,
68            base::TimeDelta max_queue_delay_ms, size_t max_concurrent,
69            bool preconnect_enabled);
70
71  // Cancel pending requests and prevent new ones from being made.
72  void Shutdown();
73
74  // In some circumstances, for privacy reasons, all results should be
75  // discarded.  This method gracefully handles that activity.
76  // Destroy all our internal state, which shows what names we've looked up, and
77  // how long each has taken, etc. etc.  We also destroy records of suggesses
78  // (cache hits etc.).
79  void DiscardAllResults();
80
81  // Add hostname(s) to the queue for processing.
82  void ResolveList(const UrlList& urls,
83                   UrlInfo::ResolutionMotivation motivation);
84  void Resolve(const GURL& url,
85               UrlInfo::ResolutionMotivation motivation);
86
87  // Instigate pre-connection to any URLs, or pre-resolution of related host,
88  // that we predict will be needed after this navigation (typically
89  // more-embedded resources on a page).  This method will actually post a task
90  // to do the actual work, so as not to jump ahead of the frame navigation that
91  // instigated this activity.
92  void PredictFrameSubresources(const GURL& url);
93
94  // The Omnibox has proposed a given url to the user, and if it is a search
95  // URL, then it also indicates that this is preconnectable (i.e., we could
96  // preconnect to the search server).
97  void AnticipateOmniboxUrl(const GURL& url, bool preconnectable);
98
99  // Preconnect a URL and all of its subresource domains.
100  void PreconnectUrlAndSubresources(const GURL& url);
101
102  // Record details of a navigation so that we can preresolve the host name
103  // ahead of time the next time the users navigates to the indicated host.
104  // Should only be called when urls are distinct, and they should already be
105  // canonicalized to not have a path.
106  void LearnFromNavigation(const GURL& referring_url, const GURL& target_url);
107
108  // Dump HTML table containing list of referrers for about:dns.
109  void GetHtmlReferrerLists(std::string* output);
110
111  // Dump the list of currently known referrer domains and related prefetchable
112  // domains.
113  void GetHtmlInfo(std::string* output);
114
115  // Discard any referrer for which all the suggested host names are currently
116  // annotated with no user latency reduction.  Also scale down (diminish) the
117  // total benefit of those that did help, so that their reported contribution
118  // wll go done by a factor of 2 each time we trim (moving the referrer closer
119  // to being discarded at a future Trim).
120  void TrimReferrers();
121
122  // Construct a ListValue object that contains all the data in the referrers_
123  // so that it can be persisted in a pref.
124  void SerializeReferrers(ListValue* referral_list);
125
126  // Process a ListValue that contains all the data from a previous reference
127  // list, as constructed by SerializeReferrers(), and add all the identified
128  // values into the current referrer list.
129  void DeserializeReferrers(const ListValue& referral_list);
130
131  void DeserializeReferrersThenDelete(ListValue* referral_list);
132
133  // For unit test code only.
134  size_t max_concurrent_dns_lookups() const {
135    return max_concurrent_dns_lookups_;
136  }
137
138  // Flag setting to use preconnection instead of just DNS pre-fetching.
139  bool preconnect_enabled() const { return preconnect_enabled_; }
140
141  // Put URL in canonical form, including a scheme, host, and port.
142  // Returns GURL::EmptyGURL() if the scheme is not http/https or if the url
143  // cannot be otherwise canonicalized.
144  static GURL CanonicalizeUrl(const GURL& url);
145
146 private:
147  friend class base::RefCountedThreadSafe<Predictor>;
148  FRIEND_TEST_ALL_PREFIXES(PredictorTest, BenefitLookupTest);
149  FRIEND_TEST_ALL_PREFIXES(PredictorTest, ShutdownWhenResolutionIsPendingTest);
150  FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTest);
151  FRIEND_TEST_ALL_PREFIXES(PredictorTest, ConcurrentLookupTest);
152  FRIEND_TEST_ALL_PREFIXES(PredictorTest, MassiveConcurrentLookupTest);
153  FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueuePushPopTest);
154  FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueueReorderTest);
155  friend class WaitForResolutionHelper;  // For testing.
156
157  class LookupRequest;
158
159  // A simple priority queue for handling host names.
160  // Some names that are queued up have |motivation| that requires very rapid
161  // handling.  For example, a sub-resource name lookup MUST be done before the
162  // actual sub-resource is fetched.  In contrast, a name that was speculatively
163  // noted in a page has to be resolved before the user "gets around to"
164  // clicking on a link.  By tagging (with a motivation) each push we make into
165  // this FIFO queue, the queue can re-order the more important names to service
166  // them sooner (relative to some low priority background resolutions).
167  class HostNameQueue {
168   public:
169    HostNameQueue();
170    ~HostNameQueue();
171    void Push(const GURL& url,
172              UrlInfo::ResolutionMotivation motivation);
173    bool IsEmpty() const;
174    GURL Pop();
175
176  private:
177    // The names in the queue that should be serviced (popped) ASAP.
178    std::queue<GURL> rush_queue_;
179    // The names in the queue that should only be serviced when rush_queue is
180    // empty.
181    std::queue<GURL> background_queue_;
182
183  DISALLOW_COPY_AND_ASSIGN(HostNameQueue);
184  };
185
186  // A map that is keyed with the host/port that we've learned were the cause
187  // of loading additional URLs.  The list of additional targets is held
188  // in a Referrer instance, which is a value in this map.
189  typedef std::map<GURL, Referrer> Referrers;
190
191  ~Predictor();
192
193  // Perform actual resolution or preconnection to subresources now.  This is
194  // an internal worker method that is reached via a post task from
195  // PredictFrameSubresources().
196  void PrepareFrameSubresources(const GURL& url);
197
198  // Only for testing. Returns true if hostname has been successfully resolved
199  // (name found).
200  bool WasFound(const GURL& url) const {
201    Results::const_iterator it(results_.find(url));
202    return (it != results_.end()) &&
203            it->second.was_found();
204  }
205
206  // Only for testing. Return how long was the resolution
207  // or UrlInfo::kNullDuration if it hasn't been resolved yet.
208  base::TimeDelta GetResolutionDuration(const GURL& url) {
209    if (results_.find(url) == results_.end())
210      return UrlInfo::kNullDuration;
211    return results_[url].resolve_duration();
212  }
213
214  // Only for testing;
215  size_t peak_pending_lookups() const { return peak_pending_lookups_; }
216
217  // Access method for use by async lookup request to pass resolution result.
218  void OnLookupFinished(LookupRequest* request, const GURL& url, bool found);
219
220  // Underlying method for both async and synchronous lookup to update state.
221  void LookupFinished(LookupRequest* request,
222                      const GURL& url, bool found);
223
224  // Queue hostname for resolution.  If queueing was done, return the pointer
225  // to the queued instance, otherwise return NULL.
226  UrlInfo* AppendToResolutionQueue(const GURL& url,
227      UrlInfo::ResolutionMotivation motivation);
228
229  // Check to see if too much queuing delay has been noted for the given info,
230  // which indicates that there is "congestion" or growing delay in handling the
231  // resolution of names.  Rather than letting this congestion potentially grow
232  // without bounds, we abandon our queued efforts at pre-resolutions in such a
233  // case.
234  // To do this, we will recycle |info|, as well as all queued items, back to
235  // the state they had before they were queued up.  We can't do anything about
236  // the resolutions we've already sent off for processing on another thread, so
237  // we just let them complete.  On a slow system, subject to congestion, this
238  // will greatly reduce the number of resolutions done, but it will assure that
239  // any resolutions that are done, are in a timely and hence potentially
240  // helpful manner.
241  bool CongestionControlPerformed(UrlInfo* info);
242
243  // Take lookup requests from work_queue_ and tell HostResolver to look them up
244  // asynchronously, provided we don't exceed concurrent resolution limit.
245  void StartSomeQueuedResolutions();
246
247  // work_queue_ holds a list of names we need to look up.
248  HostNameQueue work_queue_;
249
250  // results_ contains information for existing/prior prefetches.
251  Results results_;
252
253  // For each URL that we might navigate to (that we've "learned about")
254  // we have a Referrer list. Each Referrer list has all hostnames we need to
255  // pre-resolve when there is a navigation to the orginial hostname.
256  Referrers referrers_;
257
258  std::set<LookupRequest*> pending_lookups_;
259
260  // For testing, to verify that we don't exceed the limit.
261  size_t peak_pending_lookups_;
262
263  // When true, we don't make new lookup requests.
264  bool shutdown_;
265
266  // The number of concurrent speculative lookups currently allowed to be sent
267  // to the resolver.  Any additional lookups will be queued to avoid exceeding
268  // this value.  The queue is a priority queue that will accelerate
269  // sub-resource speculation, and retard resolutions suggested by page scans.
270  const size_t max_concurrent_dns_lookups_;
271
272  // The maximum queueing delay that is acceptable before we enter congestion
273  // reduction mode, and discard all queued (but not yet assigned) resolutions.
274  const base::TimeDelta max_dns_queue_delay_;
275
276  // The host resolver we warm DNS entries for.
277  net::HostResolver* const host_resolver_;
278
279  // Are we currently using preconnection, rather than just DNS resolution, for
280  // subresources and omni-box search URLs.
281  bool preconnect_enabled_;
282
283  // Most recent suggestion from Omnibox provided via AnticipateOmniboxUrl().
284  std::string last_omnibox_host_;
285
286  // The time when the last preresolve was done for last_omnibox_host_.
287  base::TimeTicks last_omnibox_preresolve_;
288
289  // The number of consecutive requests to AnticipateOmniboxUrl() that suggested
290  // preconnecting (because it was to a search service).
291  int consecutive_omnibox_preconnect_count_;
292
293  // The time when the last preconnection was requested to a search service.
294  base::TimeTicks last_omnibox_preconnect_;
295
296  DISALLOW_COPY_AND_ASSIGN(Predictor);
297};
298
299}  // namespace chrome_browser_net
300
301#endif  // CHROME_BROWSER_NET_PREDICTOR_H_
302