predictor.h revision 4e180b6a0b4720a9b8e9e959a882386f690f08ff
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// A Predictor object is instantiated once in the browser process, and manages 6// both preresolution of hostnames, as well as TCP/IP preconnection to expected 7// subresources. 8// Most hostname lists are provided by the renderer processes, and include URLs 9// that *might* be used in the near future by the browsing user. One goal of 10// this class is to cause the underlying DNS structure to lookup a hostname 11// before it is really needed, and hence reduce latency in the standard lookup 12// paths. 13// Subresource relationships are usually acquired from the referrer field in a 14// navigation. A subresource URL may be associated with a referrer URL. Later 15// navigations may, if the likelihood of needing the subresource is high enough, 16// cause this module to speculatively create a TCP/IP connection. If there is 17// only a low likelihood, then a DNS pre-resolution operation may be performed. 18 19#ifndef CHROME_BROWSER_NET_PREDICTOR_H_ 20#define CHROME_BROWSER_NET_PREDICTOR_H_ 21 22#include <map> 23#include <queue> 24#include <set> 25#include <string> 26#include <vector> 27 28#include "base/gtest_prod_util.h" 29#include "base/memory/scoped_ptr.h" 30#include "base/memory/weak_ptr.h" 31#include "chrome/browser/net/referrer.h" 32#include "chrome/browser/net/timed_cache.h" 33#include "chrome/browser/net/url_info.h" 34#include "chrome/common/net/predictor_common.h" 35#include "net/base/host_port_pair.h" 36 37class IOThread; 38class PrefService; 39class Profile; 40 41namespace base { 42class ListValue; 43class WaitableEvent; 44} 45 46namespace net { 47class HostResolver; 48class URLRequestContextGetter; 49} 50 51namespace user_prefs { 52class PrefRegistrySyncable; 53} 54 55namespace chrome_browser_net { 56 57typedef chrome_common_net::UrlList UrlList; 58typedef chrome_common_net::NameList NameList; 59typedef std::map<GURL, UrlInfo> Results; 60 61// Predictor is constructed during Profile construction (on the UI thread), 62// but it is destroyed on the IO thread when ProfileIOData goes away. All of 63// its core state and functionality happens on the IO thread. The only UI 64// methods are initialization / shutdown related (including preconnect 65// initialization), or convenience methods that internally forward calls to 66// the IO thread. 67class Predictor { 68 public: 69 // A version number for prefs that are saved. This should be incremented when 70 // we change the format so that we discard old data. 71 static const int kPredictorReferrerVersion; 72 73 // Given that the underlying Chromium resolver defaults to a total maximum of 74 // 8 paralell resolutions, we will avoid any chance of starving navigational 75 // resolutions by limiting the number of paralell speculative resolutions. 76 // This is used in the field trials and testing. 77 // TODO(jar): Move this limitation into the resolver. 78 static const size_t kMaxSpeculativeParallelResolves; 79 80 // To control the congestion avoidance system, we need an estimate of how 81 // many speculative requests may arrive at once. Since we currently only 82 // keep 8 subresource names for each frame, we'll use that as our basis. 83 // Note that when scanning search results lists, we might actually get 10 at 84 // a time, and wikipedia can often supply (during a page scan) upwards of 50. 85 // In those odd cases, we may discard some of the later speculative requests 86 // mistakenly assuming that the resolutions took too long. 87 static const int kTypicalSpeculativeGroupSize; 88 89 // The next constant specifies an amount of queueing delay that is 90 // "too large," and indicative of problems with resolutions (perhaps due to 91 // an overloaded router, or such). When we exceed this delay, congestion 92 // avoidance will kick in and all speculations in the queue will be discarded. 93 static const int kMaxSpeculativeResolveQueueDelayMs; 94 95 // We don't bother learning to preconnect via a GET if the original URL 96 // navigation was so long ago, that a preconnection would have been dropped 97 // anyway. We believe most servers will drop the connection in 10 seconds, so 98 // we currently estimate this time-till-drop at 10 seconds. 99 // TODO(jar): We should do a persistent field trial to validate/optimize this. 100 static const int kMaxUnusedSocketLifetimeSecondsWithoutAGet; 101 102 // |max_concurrent| specifies how many concurrent (parallel) prefetches will 103 // be performed. Host lookups will be issued through |host_resolver|. 104 explicit Predictor(bool preconnect_enabled); 105 106 virtual ~Predictor(); 107 108 // This function is used to create a predictor. For testing, we can create 109 // a version which does a simpler shutdown. 110 static Predictor* CreatePredictor(bool preconnect_enabled, 111 bool simple_shutdown); 112 113 static void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry); 114 115 // ------------- Start UI thread methods. 116 117 virtual void InitNetworkPredictor(PrefService* user_prefs, 118 PrefService* local_state, 119 IOThread* io_thread, 120 net::URLRequestContextGetter* getter); 121 122 // The Omnibox has proposed a given url to the user, and if it is a search 123 // URL, then it also indicates that this is preconnectable (i.e., we could 124 // preconnect to the search server). 125 void AnticipateOmniboxUrl(const GURL& url, bool preconnectable); 126 127 // Preconnect a URL and all of its subresource domains. 128 void PreconnectUrlAndSubresources(const GURL& url, 129 const GURL& first_party_for_cookies); 130 131 static UrlList GetPredictedUrlListAtStartup(PrefService* user_prefs, 132 PrefService* local_state); 133 134 static void set_max_queueing_delay(int max_queueing_delay_ms); 135 136 static void set_max_parallel_resolves(size_t max_parallel_resolves); 137 138 virtual void ShutdownOnUIThread(); 139 140 // ------------- End UI thread methods. 141 142 // ------------- Start IO thread methods. 143 144 // Cancel pending requests and prevent new ones from being made. 145 void Shutdown(); 146 147 // In some circumstances, for privacy reasons, all results should be 148 // discarded. This method gracefully handles that activity. 149 // Destroy all our internal state, which shows what names we've looked up, and 150 // how long each has taken, etc. etc. We also destroy records of suggesses 151 // (cache hits etc.). 152 void DiscardAllResults(); 153 154 // Add hostname(s) to the queue for processing. 155 void ResolveList(const UrlList& urls, 156 UrlInfo::ResolutionMotivation motivation); 157 158 void Resolve(const GURL& url, UrlInfo::ResolutionMotivation motivation); 159 160 // Record details of a navigation so that we can preresolve the host name 161 // ahead of time the next time the users navigates to the indicated host. 162 // Should only be called when urls are distinct, and they should already be 163 // canonicalized to not have a path. 164 void LearnFromNavigation(const GURL& referring_url, const GURL& target_url); 165 166 // When displaying info in about:dns, the following API is called. 167 static void PredictorGetHtmlInfo(Predictor* predictor, std::string* output); 168 169 // Dump HTML table containing list of referrers for about:dns. 170 void GetHtmlReferrerLists(std::string* output); 171 172 // Dump the list of currently known referrer domains and related prefetchable 173 // domains for about:dns. 174 void GetHtmlInfo(std::string* output); 175 176 // Discards any referrer for which all the suggested host names are currently 177 // annotated with negligible expected-use. Scales down (diminishes) the 178 // expected-use of those that remain, so that their use will go down by a 179 // factor each time we trim (moving the referrer closer to being discarded in 180 // a future call). 181 // The task is performed synchronously and completes before returing. 182 void TrimReferrersNow(); 183 184 // Construct a ListValue object that contains all the data in the referrers_ 185 // so that it can be persisted in a pref. 186 void SerializeReferrers(base::ListValue* referral_list); 187 188 // Process a ListValue that contains all the data from a previous reference 189 // list, as constructed by SerializeReferrers(), and add all the identified 190 // values into the current referrer list. 191 void DeserializeReferrers(const base::ListValue& referral_list); 192 193 void DeserializeReferrersThenDelete(base::ListValue* referral_list); 194 195 void DiscardInitialNavigationHistory(); 196 197 void FinalizeInitializationOnIOThread( 198 const std::vector<GURL>& urls_to_prefetch, 199 base::ListValue* referral_list, 200 IOThread* io_thread, 201 bool predictor_enabled); 202 203 // During startup, we learn what the first N urls visited are, and then 204 // resolve the associated hosts ASAP during our next startup. 205 void LearnAboutInitialNavigation(const GURL& url); 206 207 // Renderer bundles up list and sends to this browser API via IPC. 208 // TODO(jar): Use UrlList instead to include port and scheme. 209 void DnsPrefetchList(const NameList& hostnames); 210 211 // May be called from either the IO or UI thread and will PostTask 212 // to the IO thread if necessary. 213 void DnsPrefetchMotivatedList(const UrlList& urls, 214 UrlInfo::ResolutionMotivation motivation); 215 216 // May be called from either the IO or UI thread and will PostTask 217 // to the IO thread if necessary. 218 void SaveStateForNextStartupAndTrim(PrefService* prefs); 219 220 void SaveDnsPrefetchStateForNextStartupAndTrim( 221 base::ListValue* startup_list, 222 base::ListValue* referral_list, 223 base::WaitableEvent* completion); 224 225 // May be called from either the IO or UI thread and will PostTask 226 // to the IO thread if necessary. 227 void EnablePredictor(bool enable); 228 229 void EnablePredictorOnIOThread(bool enable); 230 231 // May be called from either the IO or UI thread and will PostTask 232 // to the IO thread if necessary. 233 void PreconnectUrl(const GURL& url, const GURL& first_party_for_cookies, 234 UrlInfo::ResolutionMotivation motivation, int count); 235 236 void PreconnectUrlOnIOThread(const GURL& url, 237 const GURL& first_party_for_cookies, 238 UrlInfo::ResolutionMotivation motivation, 239 int count); 240 241 void RecordPreconnectTrigger(const GURL& url); 242 243 void RecordPreconnectNavigationStat(const std::vector<GURL>& url_chain, 244 bool is_subresource); 245 246 void RecordLinkNavigation(const GURL& url); 247 248 // ------------- End IO thread methods. 249 250 // The following methods may be called on either the IO or UI threads. 251 252 // Instigate pre-connection to any URLs, or pre-resolution of related host, 253 // that we predict will be needed after this navigation (typically 254 // more-embedded resources on a page). This method will actually post a task 255 // to do the actual work, so as not to jump ahead of the frame navigation that 256 // instigated this activity. 257 void PredictFrameSubresources(const GURL& url, 258 const GURL& first_party_for_cookies); 259 260 // Put URL in canonical form, including a scheme, host, and port. 261 // Returns GURL::EmptyGURL() if the scheme is not http/https or if the url 262 // cannot be otherwise canonicalized. 263 static GURL CanonicalizeUrl(const GURL& url); 264 265 // Used for testing. 266 void SetHostResolver(net::HostResolver* host_resolver) { 267 host_resolver_ = host_resolver; 268 } 269 // Used for testing. 270 size_t max_concurrent_dns_lookups() const { 271 return max_concurrent_dns_lookups_; 272 } 273 // Used for testing. 274 void SetShutdown(bool shutdown) { 275 shutdown_ = shutdown; 276 } 277 278 // Flag setting to use preconnection instead of just DNS pre-fetching. 279 bool preconnect_enabled() const { 280 return preconnect_enabled_; 281 } 282 283 // Flag setting for whether we are prefetching dns lookups. 284 bool predictor_enabled() const { 285 return predictor_enabled_; 286 } 287 288 289 private: 290 FRIEND_TEST_ALL_PREFIXES(PredictorTest, BenefitLookupTest); 291 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ShutdownWhenResolutionIsPendingTest); 292 FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTest); 293 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ConcurrentLookupTest); 294 FRIEND_TEST_ALL_PREFIXES(PredictorTest, MassiveConcurrentLookupTest); 295 FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueuePushPopTest); 296 FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueueReorderTest); 297 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ReferrerSerializationTrimTest); 298 friend class WaitForResolutionHelper; // For testing. 299 300 class LookupRequest; 301 302 // A simple priority queue for handling host names. 303 // Some names that are queued up have |motivation| that requires very rapid 304 // handling. For example, a sub-resource name lookup MUST be done before the 305 // actual sub-resource is fetched. In contrast, a name that was speculatively 306 // noted in a page has to be resolved before the user "gets around to" 307 // clicking on a link. By tagging (with a motivation) each push we make into 308 // this FIFO queue, the queue can re-order the more important names to service 309 // them sooner (relative to some low priority background resolutions). 310 class HostNameQueue { 311 public: 312 HostNameQueue(); 313 ~HostNameQueue(); 314 void Push(const GURL& url, 315 UrlInfo::ResolutionMotivation motivation); 316 bool IsEmpty() const; 317 GURL Pop(); 318 319 private: 320 // The names in the queue that should be serviced (popped) ASAP. 321 std::queue<GURL> rush_queue_; 322 // The names in the queue that should only be serviced when rush_queue is 323 // empty. 324 std::queue<GURL> background_queue_; 325 326 DISALLOW_COPY_AND_ASSIGN(HostNameQueue); 327 }; 328 329 // The InitialObserver monitors navigations made by the network stack. This 330 // is only used to identify startup time resolutions (for re-resolution 331 // during our next process startup). 332 // TODO(jar): Consider preconnecting at startup, which may be faster than 333 // waiting for render process to start and request a connection. 334 class InitialObserver { 335 public: 336 InitialObserver(); 337 ~InitialObserver(); 338 // Recording of when we observed each navigation. 339 typedef std::map<GURL, base::TimeTicks> FirstNavigations; 340 341 // Potentially add a new URL to our startup list. 342 void Append(const GURL& url, Predictor* predictor); 343 344 // Get an HTML version of our current planned first_navigations_. 345 void GetFirstResolutionsHtml(std::string* output); 346 347 // Persist the current first_navigations_ for storage in a list. 348 void GetInitialDnsResolutionList(base::ListValue* startup_list); 349 350 // Discards all initial loading history. 351 void DiscardInitialNavigationHistory() { first_navigations_.clear(); } 352 353 private: 354 // List of the first N URL resolutions observed in this run. 355 FirstNavigations first_navigations_; 356 357 // The number of URLs we'll save for pre-resolving at next startup. 358 static const size_t kStartupResolutionCount = 10; 359 }; 360 361 // A map that is keyed with the host/port that we've learned were the cause 362 // of loading additional URLs. The list of additional targets is held 363 // in a Referrer instance, which is a value in this map. 364 typedef std::map<GURL, Referrer> Referrers; 365 366 // Depending on the expected_subresource_use_, we may either make a TCP/IP 367 // preconnection, or merely pre-resolve the hostname via DNS (or even do 368 // nothing). The following are the threasholds for taking those actions. 369 static const double kPreconnectWorthyExpectedValue; 370 static const double kDNSPreresolutionWorthyExpectedValue; 371 // Referred hosts with a subresource_use_rate_ that are less than the 372 // following threshold will be discarded when we Trim() the list. 373 static const double kDiscardableExpectedValue; 374 // During trimming operation to discard hosts for which we don't have likely 375 // subresources, we multiply the expected_subresource_use_ value by the 376 // following ratio until that value is less than kDiscardableExpectedValue. 377 // This number should always be less than 1, an more than 0. 378 static const double kReferrerTrimRatio; 379 380 // Interval between periodic trimming of our whole referrer list. 381 // We only do a major trimming about once an hour, and then only when the user 382 // is actively browsing. 383 static const int64 kDurationBetweenTrimmingsHours; 384 // Interval between incremental trimmings (to avoid inducing Jank). 385 static const int64 kDurationBetweenTrimmingIncrementsSeconds; 386 // Number of referring URLs processed in an incremental trimming. 387 static const size_t kUrlsTrimmedPerIncrement; 388 389 // Only for testing. Returns true if hostname has been successfully resolved 390 // (name found). 391 bool WasFound(const GURL& url) const { 392 Results::const_iterator it(results_.find(url)); 393 return (it != results_.end()) && 394 it->second.was_found(); 395 } 396 397 // Only for testing. Return how long was the resolution 398 // or UrlInfo::NullDuration() if it hasn't been resolved yet. 399 base::TimeDelta GetResolutionDuration(const GURL& url) { 400 if (results_.find(url) == results_.end()) 401 return UrlInfo::NullDuration(); 402 return results_[url].resolve_duration(); 403 } 404 405 // Only for testing; 406 size_t peak_pending_lookups() const { return peak_pending_lookups_; } 407 408 // ------------- Start IO thread methods. 409 410 // Perform actual resolution or preconnection to subresources now. This is 411 // an internal worker method that is reached via a post task from 412 // PredictFrameSubresources(). 413 void PrepareFrameSubresources(const GURL& url, 414 const GURL& first_party_for_cookies); 415 416 // Access method for use by async lookup request to pass resolution result. 417 void OnLookupFinished(LookupRequest* request, const GURL& url, bool found); 418 419 // Underlying method for both async and synchronous lookup to update state. 420 void LookupFinished(LookupRequest* request, 421 const GURL& url, bool found); 422 423 // Queue hostname for resolution. If queueing was done, return the pointer 424 // to the queued instance, otherwise return NULL. 425 UrlInfo* AppendToResolutionQueue(const GURL& url, 426 UrlInfo::ResolutionMotivation motivation); 427 428 // Check to see if too much queuing delay has been noted for the given info, 429 // which indicates that there is "congestion" or growing delay in handling the 430 // resolution of names. Rather than letting this congestion potentially grow 431 // without bounds, we abandon our queued efforts at pre-resolutions in such a 432 // case. 433 // To do this, we will recycle |info|, as well as all queued items, back to 434 // the state they had before they were queued up. We can't do anything about 435 // the resolutions we've already sent off for processing on another thread, so 436 // we just let them complete. On a slow system, subject to congestion, this 437 // will greatly reduce the number of resolutions done, but it will assure that 438 // any resolutions that are done, are in a timely and hence potentially 439 // helpful manner. 440 bool CongestionControlPerformed(UrlInfo* info); 441 442 // Take lookup requests from work_queue_ and tell HostResolver to look them up 443 // asynchronously, provided we don't exceed concurrent resolution limit. 444 void StartSomeQueuedResolutions(); 445 446 // Performs trimming similar to TrimReferrersNow(), except it does it as a 447 // series of short tasks by posting continuations again an again until done. 448 void TrimReferrers(); 449 450 // Loads urls_being_trimmed_ from keys of current referrers_. 451 void LoadUrlsForTrimming(); 452 453 // Posts a task to do additional incremental trimming of referrers_. 454 void PostIncrementalTrimTask(); 455 456 // Calls Trim() on some or all of urls_being_trimmed_. 457 // If it does not process all the URLs in that vector, it posts a task to 458 // continue with them shortly (i.e., it yeilds and continues). 459 void IncrementalTrimReferrers(bool trim_all_now); 460 461 // ------------- End IO thread methods. 462 463 scoped_ptr<InitialObserver> initial_observer_; 464 465 // Reference to URLRequestContextGetter from the Profile which owns the 466 // predictor. Used by Preconnect. 467 scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_; 468 469 // Status of speculative DNS resolution and speculative TCP/IP connection 470 // feature. 471 bool predictor_enabled_; 472 473 // work_queue_ holds a list of names we need to look up. 474 HostNameQueue work_queue_; 475 476 // results_ contains information for existing/prior prefetches. 477 Results results_; 478 479 std::set<LookupRequest*> pending_lookups_; 480 481 // For testing, to verify that we don't exceed the limit. 482 size_t peak_pending_lookups_; 483 484 // When true, we don't make new lookup requests. 485 bool shutdown_; 486 487 // The number of concurrent speculative lookups currently allowed to be sent 488 // to the resolver. Any additional lookups will be queued to avoid exceeding 489 // this value. The queue is a priority queue that will accelerate 490 // sub-resource speculation, and retard resolutions suggested by page scans. 491 const size_t max_concurrent_dns_lookups_; 492 493 // The maximum queueing delay that is acceptable before we enter congestion 494 // reduction mode, and discard all queued (but not yet assigned) resolutions. 495 const base::TimeDelta max_dns_queue_delay_; 496 497 // The host resolver we warm DNS entries for. 498 net::HostResolver* host_resolver_; 499 500 // Are we currently using preconnection, rather than just DNS resolution, for 501 // subresources and omni-box search URLs. 502 bool preconnect_enabled_; 503 504 // Most recent suggestion from Omnibox provided via AnticipateOmniboxUrl(). 505 std::string last_omnibox_host_; 506 507 // The time when the last preresolve was done for last_omnibox_host_. 508 base::TimeTicks last_omnibox_preresolve_; 509 510 // The number of consecutive requests to AnticipateOmniboxUrl() that suggested 511 // preconnecting (because it was to a search service). 512 int consecutive_omnibox_preconnect_count_; 513 514 // The time when the last preconnection was requested to a search service. 515 base::TimeTicks last_omnibox_preconnect_; 516 517 class PreconnectUsage; 518 scoped_ptr<PreconnectUsage> preconnect_usage_; 519 520 // For each URL that we might navigate to (that we've "learned about") 521 // we have a Referrer list. Each Referrer list has all hostnames we might 522 // need to pre-resolve or pre-connect to when there is a navigation to the 523 // orginial hostname. 524 Referrers referrers_; 525 526 // List of URLs in referrers_ currently being trimmed (scaled down to 527 // eventually be aged out of use). 528 std::vector<GURL> urls_being_trimmed_; 529 530 // A time after which we need to do more trimming of referrers. 531 base::TimeTicks next_trim_time_; 532 533 scoped_ptr<base::WeakPtrFactory<Predictor> > weak_factory_; 534 535 DISALLOW_COPY_AND_ASSIGN(Predictor); 536}; 537 538// This version of the predictor is used for testing. 539class SimplePredictor : public Predictor { 540 public: 541 explicit SimplePredictor(bool preconnect_enabled) 542 : Predictor(preconnect_enabled) {} 543 virtual ~SimplePredictor() {} 544 virtual void InitNetworkPredictor( 545 PrefService* user_prefs, 546 PrefService* local_state, 547 IOThread* io_thread, 548 net::URLRequestContextGetter* getter) OVERRIDE; 549 virtual void ShutdownOnUIThread() OVERRIDE; 550}; 551 552} // namespace chrome_browser_net 553 554#endif // CHROME_BROWSER_NET_PREDICTOR_H_ 555