predictor.h revision 513209b27ff55e2841eac0e4120199c23acce758
1// Copyright (c) 2006-2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// A Predictor object is instantiated once in the browser process, and manages 6// both preresolution of hostnames, as well as TCP/IP preconnection to expected 7// subresources. 8// Most hostname lists are provided by the renderer processes, and include URLs 9// that *might* be used in the near future by the browsing user. One goal of 10// this class is to cause the underlying DNS structure to lookup a hostname 11// before it is really needed, and hence reduce latency in the standard lookup 12// paths. 13// Subresource relationships are usually acquired from the referrer field in a 14// navigation. A subresource URL may be associated with a referrer URL. Later 15// navigations may, if the likelihood of needing the subresource is high enough, 16// cause this module to speculatively create a TCP/IP connection. If there is 17// only a low likelihood, then a DNS pre-resolution operation may be performed. 18 19#ifndef CHROME_BROWSER_NET_PREDICTOR_H_ 20#define CHROME_BROWSER_NET_PREDICTOR_H_ 21#pragma once 22 23#include <map> 24#include <queue> 25#include <set> 26#include <string> 27 28#include "base/gtest_prod_util.h" 29#include "base/ref_counted.h" 30#include "chrome/browser/net/url_info.h" 31#include "chrome/browser/net/referrer.h" 32#include "chrome/common/net/predictor_common.h" 33#include "net/base/host_port_pair.h" 34 35class ListValue; 36 37namespace net { 38class HostResolver; 39} // namespace net 40 41namespace chrome_browser_net { 42 43typedef chrome_common_net::UrlList UrlList; 44typedef chrome_common_net::NameList NameList; 45typedef std::map<GURL, UrlInfo> Results; 46 47// Note that Predictor is not thread safe, and must only be called from 48// the IO thread. Failure to do so will result in a DCHECK at runtime. 49class Predictor : public base::RefCountedThreadSafe<Predictor> { 50 public: 51 // A version number for prefs that are saved. This should be incremented when 52 // we change the format so that we discard old data. 53 enum { PREDICTOR_REFERRER_VERSION = 2 }; 54 55 // Depending on the expected_subresource_use_, we may either make a TCP/IP 56 // preconnection, or merely pre-resolve the hostname via DNS (or even do 57 // nothing). The following are the threasholds for taking those actions. 58 static const double kPreconnectWorthyExpectedValue; 59 static const double kDNSPreresolutionWorthyExpectedValue; 60 // Values of expected_subresource_use_ that are less than the following 61 // threshold will be discarded when we Trim() the values, such as is done when 62 // the process ends, and some values are persisted. 63 static const double kPersistWorthyExpectedValue; 64 65 // |max_concurrent| specifies how many concurrent (parallel) prefetches will 66 // be performed. Host lookups will be issued through |host_resolver|. 67 Predictor(net::HostResolver* host_resolver, 68 base::TimeDelta max_queue_delay_ms, size_t max_concurrent, 69 bool preconnect_enabled); 70 71 // Cancel pending requests and prevent new ones from being made. 72 void Shutdown(); 73 74 // In some circumstances, for privacy reasons, all results should be 75 // discarded. This method gracefully handles that activity. 76 // Destroy all our internal state, which shows what names we've looked up, and 77 // how long each has taken, etc. etc. We also destroy records of suggesses 78 // (cache hits etc.). 79 void DiscardAllResults(); 80 81 // Add hostname(s) to the queue for processing. 82 void ResolveList(const UrlList& urls, 83 UrlInfo::ResolutionMotivation motivation); 84 void Resolve(const GURL& url, 85 UrlInfo::ResolutionMotivation motivation); 86 87 // Instigate pre-connection to any URLs, or pre-resolution of related host, 88 // that we predict will be needed after this navigation (typically 89 // more-embedded resources on a page). This method will actually post a task 90 // to do the actual work, so as not to jump ahead of the frame navigation that 91 // instigated this activity. 92 void PredictFrameSubresources(const GURL& url); 93 94 // The Omnibox has proposed a given url to the user, and if it is a search 95 // URL, then it also indicates that this is preconnectable (i.e., we could 96 // preconnect to the search server). 97 void AnticipateOmniboxUrl(const GURL& url, bool preconnectable); 98 99 // Preconnect a URL and all of its subresource domains. 100 void PreconnectUrlAndSubresources(const GURL& url); 101 102 // Record details of a navigation so that we can preresolve the host name 103 // ahead of time the next time the users navigates to the indicated host. 104 // Should only be called when urls are distinct, and they should already be 105 // canonicalized to not have a path. 106 void LearnFromNavigation(const GURL& referring_url, const GURL& target_url); 107 108 // Dump HTML table containing list of referrers for about:dns. 109 void GetHtmlReferrerLists(std::string* output); 110 111 // Dump the list of currently known referrer domains and related prefetchable 112 // domains. 113 void GetHtmlInfo(std::string* output); 114 115 // Discard any referrer for which all the suggested host names are currently 116 // annotated with no user latency reduction. Also scale down (diminish) the 117 // total benefit of those that did help, so that their reported contribution 118 // wll go done by a factor of 2 each time we trim (moving the referrer closer 119 // to being discarded at a future Trim). 120 void TrimReferrers(); 121 122 // Construct a ListValue object that contains all the data in the referrers_ 123 // so that it can be persisted in a pref. 124 void SerializeReferrers(ListValue* referral_list); 125 126 // Process a ListValue that contains all the data from a previous reference 127 // list, as constructed by SerializeReferrers(), and add all the identified 128 // values into the current referrer list. 129 void DeserializeReferrers(const ListValue& referral_list); 130 131 void DeserializeReferrersThenDelete(ListValue* referral_list); 132 133 // For unit test code only. 134 size_t max_concurrent_dns_lookups() const { 135 return max_concurrent_dns_lookups_; 136 } 137 138 // Flag setting to use preconnection instead of just DNS pre-fetching. 139 bool preconnect_enabled() const { return preconnect_enabled_; } 140 141 // Put URL in canonical form, including a scheme, host, and port. 142 // Returns GURL::EmptyGURL() if the scheme is not http/https or if the url 143 // cannot be otherwise canonicalized. 144 static GURL CanonicalizeUrl(const GURL& url); 145 146 private: 147 friend class base::RefCountedThreadSafe<Predictor>; 148 FRIEND_TEST_ALL_PREFIXES(PredictorTest, BenefitLookupTest); 149 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ShutdownWhenResolutionIsPendingTest); 150 FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTest); 151 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ConcurrentLookupTest); 152 FRIEND_TEST_ALL_PREFIXES(PredictorTest, MassiveConcurrentLookupTest); 153 FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueuePushPopTest); 154 FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueueReorderTest); 155 friend class WaitForResolutionHelper; // For testing. 156 157 class LookupRequest; 158 159 // A simple priority queue for handling host names. 160 // Some names that are queued up have |motivation| that requires very rapid 161 // handling. For example, a sub-resource name lookup MUST be done before the 162 // actual sub-resource is fetched. In contrast, a name that was speculatively 163 // noted in a page has to be resolved before the user "gets around to" 164 // clicking on a link. By tagging (with a motivation) each push we make into 165 // this FIFO queue, the queue can re-order the more important names to service 166 // them sooner (relative to some low priority background resolutions). 167 class HostNameQueue { 168 public: 169 HostNameQueue(); 170 ~HostNameQueue(); 171 void Push(const GURL& url, 172 UrlInfo::ResolutionMotivation motivation); 173 bool IsEmpty() const; 174 GURL Pop(); 175 176 private: 177 // The names in the queue that should be serviced (popped) ASAP. 178 std::queue<GURL> rush_queue_; 179 // The names in the queue that should only be serviced when rush_queue is 180 // empty. 181 std::queue<GURL> background_queue_; 182 183 DISALLOW_COPY_AND_ASSIGN(HostNameQueue); 184 }; 185 186 // A map that is keyed with the host/port that we've learned were the cause 187 // of loading additional URLs. The list of additional targets is held 188 // in a Referrer instance, which is a value in this map. 189 typedef std::map<GURL, Referrer> Referrers; 190 191 ~Predictor(); 192 193 // Perform actual resolution or preconnection to subresources now. This is 194 // an internal worker method that is reached via a post task from 195 // PredictFrameSubresources(). 196 void PrepareFrameSubresources(const GURL& url); 197 198 // Only for testing. Returns true if hostname has been successfully resolved 199 // (name found). 200 bool WasFound(const GURL& url) const { 201 Results::const_iterator it(results_.find(url)); 202 return (it != results_.end()) && 203 it->second.was_found(); 204 } 205 206 // Only for testing. Return how long was the resolution 207 // or UrlInfo::kNullDuration if it hasn't been resolved yet. 208 base::TimeDelta GetResolutionDuration(const GURL& url) { 209 if (results_.find(url) == results_.end()) 210 return UrlInfo::kNullDuration; 211 return results_[url].resolve_duration(); 212 } 213 214 // Only for testing; 215 size_t peak_pending_lookups() const { return peak_pending_lookups_; } 216 217 // Access method for use by async lookup request to pass resolution result. 218 void OnLookupFinished(LookupRequest* request, const GURL& url, bool found); 219 220 // Underlying method for both async and synchronous lookup to update state. 221 void LookupFinished(LookupRequest* request, 222 const GURL& url, bool found); 223 224 // Queue hostname for resolution. If queueing was done, return the pointer 225 // to the queued instance, otherwise return NULL. 226 UrlInfo* AppendToResolutionQueue(const GURL& url, 227 UrlInfo::ResolutionMotivation motivation); 228 229 // Check to see if too much queuing delay has been noted for the given info, 230 // which indicates that there is "congestion" or growing delay in handling the 231 // resolution of names. Rather than letting this congestion potentially grow 232 // without bounds, we abandon our queued efforts at pre-resolutions in such a 233 // case. 234 // To do this, we will recycle |info|, as well as all queued items, back to 235 // the state they had before they were queued up. We can't do anything about 236 // the resolutions we've already sent off for processing on another thread, so 237 // we just let them complete. On a slow system, subject to congestion, this 238 // will greatly reduce the number of resolutions done, but it will assure that 239 // any resolutions that are done, are in a timely and hence potentially 240 // helpful manner. 241 bool CongestionControlPerformed(UrlInfo* info); 242 243 // Take lookup requests from work_queue_ and tell HostResolver to look them up 244 // asynchronously, provided we don't exceed concurrent resolution limit. 245 void StartSomeQueuedResolutions(); 246 247 // work_queue_ holds a list of names we need to look up. 248 HostNameQueue work_queue_; 249 250 // results_ contains information for existing/prior prefetches. 251 Results results_; 252 253 // For each URL that we might navigate to (that we've "learned about") 254 // we have a Referrer list. Each Referrer list has all hostnames we need to 255 // pre-resolve when there is a navigation to the orginial hostname. 256 Referrers referrers_; 257 258 std::set<LookupRequest*> pending_lookups_; 259 260 // For testing, to verify that we don't exceed the limit. 261 size_t peak_pending_lookups_; 262 263 // When true, we don't make new lookup requests. 264 bool shutdown_; 265 266 // The number of concurrent speculative lookups currently allowed to be sent 267 // to the resolver. Any additional lookups will be queued to avoid exceeding 268 // this value. The queue is a priority queue that will accelerate 269 // sub-resource speculation, and retard resolutions suggested by page scans. 270 const size_t max_concurrent_dns_lookups_; 271 272 // The maximum queueing delay that is acceptable before we enter congestion 273 // reduction mode, and discard all queued (but not yet assigned) resolutions. 274 const base::TimeDelta max_dns_queue_delay_; 275 276 // The host resolver we warm DNS entries for. 277 net::HostResolver* const host_resolver_; 278 279 // Are we currently using preconnection, rather than just DNS resolution, for 280 // subresources and omni-box search URLs. 281 bool preconnect_enabled_; 282 283 // Most recent suggestion from Omnibox provided via AnticipateOmniboxUrl(). 284 std::string last_omnibox_host_; 285 286 // The time when the last preresolve was done for last_omnibox_host_. 287 base::TimeTicks last_omnibox_preresolve_; 288 289 // The number of consecutive requests to AnticipateOmniboxUrl() that suggested 290 // preconnecting (because it was to a search service). 291 int consecutive_omnibox_preconnect_count_; 292 293 // The time when the last preconnection was requested to a search service. 294 base::TimeTicks last_omnibox_preconnect_; 295 296 DISALLOW_COPY_AND_ASSIGN(Predictor); 297}; 298 299} // namespace chrome_browser_net 300 301#endif // CHROME_BROWSER_NET_PREDICTOR_H_ 302