predictor.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2006-2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/net/predictor.h" 6 7#include <algorithm> 8#include <set> 9#include <sstream> 10 11#include "base/compiler_specific.h" 12#include "base/histogram.h" 13#include "base/stats_counters.h" 14#include "base/string_util.h" 15#include "base/time.h" 16#include "chrome/browser/chrome_thread.h" 17#include "chrome/browser/net/preconnect.h" 18#include "net/base/address_list.h" 19#include "net/base/completion_callback.h" 20#include "net/base/host_port_pair.h" 21#include "net/base/host_resolver.h" 22#include "net/base/net_errors.h" 23#include "net/base/net_log.h" 24 25using base::TimeDelta; 26 27namespace chrome_browser_net { 28 29class Predictor::LookupRequest { 30 public: 31 LookupRequest(Predictor* predictor, 32 net::HostResolver* host_resolver, 33 const GURL& url) 34 : ALLOW_THIS_IN_INITIALIZER_LIST( 35 net_callback_(this, &LookupRequest::OnLookupFinished)), 36 predictor_(predictor), 37 url_(url), 38 resolver_(host_resolver) { 39 } 40 41 // Return underlying network resolver status. 42 // net::OK ==> Host was found synchronously. 43 // net:ERR_IO_PENDING ==> Network will callback later with result. 44 // anything else ==> Host was not found synchronously. 45 int Start() { 46 net::HostResolver::RequestInfo resolve_info(url_.host(), 47 url_.EffectiveIntPort()); 48 49 // Make a note that this is a speculative resolve request. This allows us 50 // to separate it from real navigations in the observer's callback, and 51 // lets the HostResolver know it can de-prioritize it. 52 resolve_info.set_is_speculative(true); 53 return resolver_.Resolve( 54 resolve_info, &addresses_, &net_callback_, net::BoundNetLog()); 55 } 56 57 private: 58 void OnLookupFinished(int result) { 59 predictor_->OnLookupFinished(this, url_, result == net::OK); 60 } 61 62 // HostResolver will call us using this callback when resolution is complete. 63 net::CompletionCallbackImpl<LookupRequest> net_callback_; 64 65 Predictor* predictor_; // The predictor which started us. 66 67 const GURL url_; // Hostname to resolve. 68 net::SingleRequestHostResolver resolver_; 69 net::AddressList addresses_; 70 71 DISALLOW_COPY_AND_ASSIGN(LookupRequest); 72}; 73 74Predictor::Predictor(net::HostResolver* host_resolver, 75 base::TimeDelta max_dns_queue_delay, 76 size_t max_concurrent, 77 bool preconnect_enabled) 78 : peak_pending_lookups_(0), 79 shutdown_(false), 80 max_concurrent_dns_lookups_(max_concurrent), 81 max_dns_queue_delay_(max_dns_queue_delay), 82 host_resolver_(host_resolver), 83 preconnect_enabled_(preconnect_enabled) { 84 Referrer::SetUsePreconnectValuations(preconnect_enabled); 85} 86 87Predictor::~Predictor() { 88 DCHECK(shutdown_); 89} 90 91void Predictor::Shutdown() { 92 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 93 DCHECK(!shutdown_); 94 shutdown_ = true; 95 96 std::set<LookupRequest*>::iterator it; 97 for (it = pending_lookups_.begin(); it != pending_lookups_.end(); ++it) 98 delete *it; 99} 100 101// Overloaded Resolve() to take a vector of names. 102void Predictor::ResolveList(const UrlList& urls, 103 UrlInfo::ResolutionMotivation motivation) { 104 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 105 106 for (UrlList::const_iterator it = urls.begin(); it < urls.end(); ++it) { 107 AppendToResolutionQueue(*it, motivation); 108 } 109} 110 111// Basic Resolve() takes an invidual name, and adds it 112// to the queue. 113void Predictor::Resolve(const GURL& url, 114 UrlInfo::ResolutionMotivation motivation) { 115 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 116 if (!url.has_host()) 117 return; 118 AppendToResolutionQueue(url, motivation); 119} 120 121bool Predictor::AccruePrefetchBenefits(const GURL& referrer, 122 UrlInfo* navigation_info) { 123 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 124 GURL url = navigation_info->url(); 125 Results::iterator it = results_.find(url); 126 if (it == results_.end()) { 127 // Use UMA histogram to quantify potential future gains here. 128 UMA_HISTOGRAM_LONG_TIMES("DNS.UnexpectedResolutionL", 129 navigation_info->resolve_duration()); 130 navigation_info->DLogResultsStats("DNS UnexpectedResolution"); 131 132 LearnFromNavigation(referrer, navigation_info->url()); 133 return false; 134 } 135 UrlInfo& prefetched_host_info(it->second); 136 137 // Sometimes a host is used as a subresource by several referrers, so it is 138 // in our list, but was never motivated by a page-link-scan. In that case, it 139 // really is an "unexpected" navigation, and we should tally it, and augment 140 // our referrers_. 141 bool referrer_based_prefetch = !prefetched_host_info.was_linked(); 142 if (referrer_based_prefetch) { 143 // This wasn't the first time this host refered to *some* referrer. 144 LearnFromNavigation(referrer, navigation_info->url()); 145 } 146 147 DnsBenefit benefit = prefetched_host_info.AccruePrefetchBenefits( 148 navigation_info); 149 switch (benefit) { 150 case PREFETCH_NAME_FOUND: 151 case PREFETCH_NAME_NONEXISTANT: 152 dns_cache_hits_.push_back(*navigation_info); 153 if (referrer_based_prefetch) { 154 if (referrer.has_host()) { 155 referrers_[referrer].AccrueValue( 156 navigation_info->benefits_remaining(), url); 157 } 158 } 159 return true; 160 161 case PREFETCH_CACHE_EVICTION: 162 cache_eviction_map_[url] = *navigation_info; 163 return false; 164 165 case PREFETCH_NO_BENEFIT: 166 // Prefetch never hit the network. Name was pre-cached. 167 return false; 168 169 default: 170 NOTREACHED(); 171 return false; 172 } 173} 174 175void Predictor::LearnFromNavigation(const GURL& referring_url, 176 const GURL& target_url) { 177 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 178 if (referring_url.has_host() && 179 referring_url != target_url) { 180 DCHECK(referring_url == referring_url.GetWithEmptyPath()); 181 referrers_[referring_url].SuggestHost(target_url); 182 } 183} 184 185void Predictor::PredictSubresources(const GURL& url) { 186 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 187 Referrers::iterator it = referrers_.find(url); 188 if (referrers_.end() == it) 189 return; 190 Referrer* referrer = &(it->second); 191 referrer->IncrementUseCount(); 192 for (Referrer::iterator future_url = referrer->begin(); 193 future_url != referrer->end(); ++future_url) { 194 UrlInfo* queued_info = AppendToResolutionQueue( 195 future_url->first, 196 UrlInfo::LEARNED_REFERAL_MOTIVATED); 197 if (queued_info) 198 queued_info->SetReferringHostname(url); 199 } 200} 201 202void Predictor::PredictFrameSubresources(const GURL& url) { 203 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 204 DCHECK(url.GetWithEmptyPath() == url); 205 Referrers::iterator it = referrers_.find(url); 206 if (referrers_.end() == it) 207 return; 208 Referrer* referrer = &(it->second); 209 referrer->IncrementUseCount(); 210 for (Referrer::iterator future_url = referrer->begin(); 211 future_url != referrer->end(); ++future_url) { 212 if (future_url->second.IsPreconnectWorthDoing()) 213 Preconnect::PreconnectOnIOThread(future_url->first); 214 } 215} 216 217// Provide sort order so all .com's are together, etc. 218struct RightToLeftStringSorter { 219 bool operator()(const net::HostPortPair& left, 220 const net::HostPortPair& right) const { 221 return string_compare(left.host, right.host); 222 } 223 bool operator()(const GURL& left, 224 const GURL& right) const { 225 return string_compare(left.host(), right.host()); 226 } 227 228 static bool string_compare(const std::string& left_host, 229 const std::string right_host) { 230 if (left_host == right_host) return true; 231 size_t left_already_matched = left_host.size(); 232 size_t right_already_matched = right_host.size(); 233 234 // Ensure both strings have characters. 235 if (!left_already_matched) return true; 236 if (!right_already_matched) return false; 237 238 // Watch for trailing dot, so we'll always be safe to go one beyond dot. 239 if ('.' == left_host[left_already_matched - 1]) { 240 if ('.' != right_host[right_already_matched - 1]) 241 return true; 242 // Both have dots at end of string. 243 --left_already_matched; 244 --right_already_matched; 245 } else { 246 if ('.' == right_host[right_already_matched - 1]) 247 return false; 248 } 249 250 while (1) { 251 if (!left_already_matched) return true; 252 if (!right_already_matched) return false; 253 254 size_t left_length, right_length; 255 size_t left_start = left_host.find_last_of('.', left_already_matched - 1); 256 if (std::string::npos == left_start) { 257 left_length = left_already_matched; 258 left_already_matched = left_start = 0; 259 } else { 260 left_length = left_already_matched - left_start; 261 left_already_matched = left_start; 262 ++left_start; // Don't compare the dot. 263 } 264 size_t right_start = right_host.find_last_of('.', 265 right_already_matched - 1); 266 if (std::string::npos == right_start) { 267 right_length = right_already_matched; 268 right_already_matched = right_start = 0; 269 } else { 270 right_length = right_already_matched - right_start; 271 right_already_matched = right_start; 272 ++right_start; // Don't compare the dot. 273 } 274 275 int diff = left_host.compare(left_start, left_host.size(), 276 right_host, right_start, right_host.size()); 277 if (diff > 0) return false; 278 if (diff < 0) return true; 279 } 280 } 281}; 282 283void Predictor::GetHtmlReferrerLists(std::string* output) { 284 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 285 if (referrers_.empty()) 286 return; 287 288 // TODO(jar): Remove any plausible JavaScript from names before displaying. 289 290 typedef std::set<GURL, struct RightToLeftStringSorter> 291 SortedNames; 292 SortedNames sorted_names; 293 294 for (Referrers::iterator it = referrers_.begin(); 295 referrers_.end() != it; ++it) 296 sorted_names.insert(it->first); 297 298 output->append("<br><table border>"); 299 output->append( 300 "<tr><th>Host for Page</th>" 301 "<th>Page Load<br>Count</th>" 302 "<th>Subresource<br>Navigations</th>" 303 "<th>Subresource<br>PreConnects</th>" 304 "<th>Expected<br>Connects</th>" 305 "<th>DNS<br>Savings</th>" 306 "<th>Subresource Spec</th></tr>"); 307 308 for (SortedNames::iterator it = sorted_names.begin(); 309 sorted_names.end() != it; ++it) { 310 Referrer* referrer = &(referrers_[*it]); 311 bool first_set_of_futures = true; 312 for (Referrer::iterator future_url = referrer->begin(); 313 future_url != referrer->end(); ++future_url) { 314 output->append("<tr align=right>"); 315 if (first_set_of_futures) 316 StringAppendF(output, "<td rowspan=%d>%s</td><td rowspan=%d>%d</td>", 317 static_cast<int>(referrer->size()), 318 it->spec().c_str(), 319 static_cast<int>(referrer->size()), 320 static_cast<int>(referrer->use_count())); 321 first_set_of_futures = false; 322 StringAppendF(output, 323 "<td>%d</td><td>%d</td><td>%2.3f</td><td>%dms</td><td>%s</td></tr>", 324 static_cast<int>(future_url->second.navigation_count()), 325 static_cast<int>(future_url->second.preconnection_count()), 326 static_cast<double>(future_url->second.subresource_use_rate()), 327 static_cast<int>(future_url->second.latency().InMilliseconds()), 328 future_url->first.spec().c_str()); 329 } 330 } 331 output->append("</table>"); 332} 333 334void Predictor::GetHtmlInfo(std::string* output) { 335 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 336 // Local lists for calling UrlInfo 337 UrlInfo::DnsInfoTable cache_hits; 338 UrlInfo::DnsInfoTable cache_evictions; 339 UrlInfo::DnsInfoTable name_not_found; 340 UrlInfo::DnsInfoTable network_hits; 341 UrlInfo::DnsInfoTable already_cached; 342 343 // Get copies of all useful data. 344 typedef std::map<GURL, UrlInfo, RightToLeftStringSorter> 345 Snapshot; 346 Snapshot snapshot; 347 { 348 // UrlInfo supports value semantics, so we can do a shallow copy. 349 for (Results::iterator it(results_.begin()); it != results_.end(); it++) { 350 snapshot[it->first] = it->second; 351 } 352 for (Results::iterator it(cache_eviction_map_.begin()); 353 it != cache_eviction_map_.end(); 354 it++) { 355 cache_evictions.push_back(it->second); 356 } 357 // Reverse list as we copy cache hits, so that new hits are at the top. 358 size_t index = dns_cache_hits_.size(); 359 while (index > 0) { 360 index--; 361 cache_hits.push_back(dns_cache_hits_[index]); 362 } 363 } 364 365 // Partition the UrlInfo's into categories. 366 for (Snapshot::iterator it(snapshot.begin()); it != snapshot.end(); it++) { 367 if (it->second.was_nonexistant()) { 368 name_not_found.push_back(it->second); 369 continue; 370 } 371 if (!it->second.was_found()) 372 continue; // Still being processed. 373 if (TimeDelta() != it->second.benefits_remaining()) { 374 network_hits.push_back(it->second); // With no benefit yet. 375 continue; 376 } 377 if (UrlInfo::kMaxNonNetworkDnsLookupDuration > 378 it->second.resolve_duration()) { 379 already_cached.push_back(it->second); 380 continue; 381 } 382 // Remaining case is where prefetch benefit was significant, and was used. 383 // Since we shot those cases as historical hits, we won't bother here. 384 } 385 386 bool brief = false; 387#ifdef NDEBUG 388 brief = true; 389#endif // NDEBUG 390 391 // Call for display of each table, along with title. 392 UrlInfo::GetHtmlTable(cache_hits, 393 "Prefetching DNS records produced benefits for ", false, output); 394 UrlInfo::GetHtmlTable(cache_evictions, 395 "Cache evictions negated DNS prefetching benefits for ", brief, output); 396 UrlInfo::GetHtmlTable(network_hits, 397 "Prefetching DNS records was not yet beneficial for ", brief, output); 398 UrlInfo::GetHtmlTable(already_cached, 399 "Previously cached resolutions were found for ", brief, output); 400 UrlInfo::GetHtmlTable(name_not_found, 401 "Prefetching DNS records revealed non-existance for ", brief, output); 402} 403 404UrlInfo* Predictor::AppendToResolutionQueue( 405 const GURL& url, 406 UrlInfo::ResolutionMotivation motivation) { 407 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 408 DCHECK(url.has_host()); 409 410 if (shutdown_) 411 return NULL; 412 413 UrlInfo* info = &results_[url]; 414 info->SetUrl(url); // Initialize or DCHECK. 415 // TODO(jar): I need to discard names that have long since expired. 416 // Currently we only add to the domain map :-/ 417 418 DCHECK(info->HasUrl(url)); 419 420 if (!info->NeedsDnsUpdate()) { 421 info->DLogResultsStats("DNS PrefetchNotUpdated"); 422 return NULL; 423 } 424 425 info->SetQueuedState(motivation); 426 work_queue_.Push(url, motivation); 427 StartSomeQueuedResolutions(); 428 return info; 429} 430 431void Predictor::StartSomeQueuedResolutions() { 432 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 433 434 while (!work_queue_.IsEmpty() && 435 pending_lookups_.size() < max_concurrent_dns_lookups_) { 436 const GURL url(work_queue_.Pop()); 437 UrlInfo* info = &results_[url]; 438 DCHECK(info->HasUrl(url)); 439 info->SetAssignedState(); 440 441 if (CongestionControlPerformed(info)) { 442 DCHECK(work_queue_.IsEmpty()); 443 return; 444 } 445 446 LookupRequest* request = new LookupRequest(this, host_resolver_, url); 447 int status = request->Start(); 448 if (status == net::ERR_IO_PENDING) { 449 // Will complete asynchronously. 450 pending_lookups_.insert(request); 451 peak_pending_lookups_ = std::max(peak_pending_lookups_, 452 pending_lookups_.size()); 453 } else { 454 // Completed synchronously (was already cached by HostResolver), or else 455 // there was (equivalently) some network error that prevents us from 456 // finding the name. Status net::OK means it was "found." 457 LookupFinished(request, url, status == net::OK); 458 delete request; 459 } 460 } 461} 462 463bool Predictor::CongestionControlPerformed(UrlInfo* info) { 464 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 465 // Note: queue_duration is ONLY valid after we go to assigned state. 466 if (info->queue_duration() < max_dns_queue_delay_) 467 return false; 468 // We need to discard all entries in our queue, as we're keeping them waiting 469 // too long. By doing this, we'll have a chance to quickly service urgent 470 // resolutions, and not have a bogged down system. 471 while (true) { 472 info->RemoveFromQueue(); 473 if (work_queue_.IsEmpty()) 474 break; 475 info = &results_[work_queue_.Pop()]; 476 info->SetAssignedState(); 477 } 478 return true; 479} 480 481void Predictor::OnLookupFinished(LookupRequest* request, const GURL& url, 482 bool found) { 483 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 484 485 LookupFinished(request, url, found); 486 pending_lookups_.erase(request); 487 delete request; 488 489 StartSomeQueuedResolutions(); 490} 491 492void Predictor::LookupFinished(LookupRequest* request, const GURL& url, 493 bool found) { 494 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 495 UrlInfo* info = &results_[url]; 496 DCHECK(info->HasUrl(url)); 497 if (info->is_marked_to_delete()) { 498 results_.erase(url); 499 } else { 500 if (found) 501 info->SetFoundState(); 502 else 503 info->SetNoSuchNameState(); 504 } 505} 506 507void Predictor::DiscardAllResults() { 508 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 509 // Delete anything listed so far in this session that shows in about:dns. 510 cache_eviction_map_.clear(); 511 dns_cache_hits_.clear(); 512 referrers_.clear(); 513 514 515 // Try to delete anything in our work queue. 516 while (!work_queue_.IsEmpty()) { 517 // Emulate processing cycle as though host was not found. 518 GURL url = work_queue_.Pop(); 519 UrlInfo* info = &results_[url]; 520 DCHECK(info->HasUrl(url)); 521 info->SetAssignedState(); 522 info->SetNoSuchNameState(); 523 } 524 // Now every result_ is either resolved, or is being resolved 525 // (see LookupRequest). 526 527 // Step through result_, recording names of all hosts that can't be erased. 528 // We can't erase anything being worked on. 529 Results assignees; 530 for (Results::iterator it = results_.begin(); results_.end() != it; ++it) { 531 GURL url(it->first); 532 UrlInfo* info = &it->second; 533 DCHECK(info->HasUrl(url)); 534 if (info->is_assigned()) { 535 info->SetPendingDeleteState(); 536 assignees[url] = *info; 537 } 538 } 539 DCHECK(assignees.size() <= max_concurrent_dns_lookups_); 540 results_.clear(); 541 // Put back in the names being worked on. 542 for (Results::iterator it = assignees.begin(); assignees.end() != it; ++it) { 543 DCHECK(it->second.is_marked_to_delete()); 544 results_[it->first] = it->second; 545 } 546} 547 548void Predictor::TrimReferrers() { 549 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 550 std::vector<GURL> urls; 551 for (Referrers::const_iterator it = referrers_.begin(); 552 it != referrers_.end(); ++it) 553 urls.push_back(it->first); 554 for (size_t i = 0; i < urls.size(); ++i) 555 if (!referrers_[urls[i]].Trim()) 556 referrers_.erase(urls[i]); 557} 558 559void Predictor::SerializeReferrers(ListValue* referral_list) { 560 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 561 referral_list->Clear(); 562 referral_list->Append(new FundamentalValue(DNS_REFERRER_VERSION)); 563 for (Referrers::const_iterator it = referrers_.begin(); 564 it != referrers_.end(); ++it) { 565 // Serialize the list of subresource names. 566 Value* subresource_list(it->second.Serialize()); 567 568 // Create a list for each referer. 569 ListValue* motivator(new ListValue); 570 motivator->Append(new StringValue(it->first.spec())); 571 motivator->Append(subresource_list); 572 573 referral_list->Append(motivator); 574 } 575} 576 577void Predictor::DeserializeReferrers(const ListValue& referral_list) { 578 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO)); 579 int format_version = -1; 580 if (referral_list.GetSize() > 0 && 581 referral_list.GetInteger(0, &format_version) && 582 format_version == DNS_REFERRER_VERSION) { 583 for (size_t i = 1; i < referral_list.GetSize(); ++i) { 584 ListValue* motivator; 585 if (!referral_list.GetList(i, &motivator)) { 586 NOTREACHED(); 587 return; 588 } 589 std::string motivating_url_spec; 590 if (!motivator->GetString(0, &motivating_url_spec)) { 591 NOTREACHED(); 592 return; 593 } 594 595 Value* subresource_list; 596 if (!motivator->Get(1, &subresource_list)) { 597 NOTREACHED(); 598 return; 599 } 600 601 referrers_[GURL(motivating_url_spec)].Deserialize(*subresource_list); 602 } 603 } 604} 605 606 607//------------------------------------------------------------------------------ 608 609Predictor::HostNameQueue::HostNameQueue() { 610} 611 612Predictor::HostNameQueue::~HostNameQueue() { 613} 614 615void Predictor::HostNameQueue::Push(const GURL& url, 616 UrlInfo::ResolutionMotivation motivation) { 617 switch (motivation) { 618 case UrlInfo::STATIC_REFERAL_MOTIVATED: 619 case UrlInfo::LEARNED_REFERAL_MOTIVATED: 620 case UrlInfo::MOUSE_OVER_MOTIVATED: 621 rush_queue_.push(url); 622 break; 623 624 default: 625 background_queue_.push(url); 626 break; 627 } 628} 629 630bool Predictor::HostNameQueue::IsEmpty() const { 631 return rush_queue_.empty() && background_queue_.empty(); 632} 633 634GURL Predictor::HostNameQueue::Pop() { 635 DCHECK(!IsEmpty()); 636 std::queue<GURL> *queue(rush_queue_.empty() ? &background_queue_ 637 : &rush_queue_); 638 GURL url(queue->front()); 639 queue->pop(); 640 return url; 641} 642 643} // namespace chrome_browser_net 644