predictor.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2006-2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/net/predictor.h"
6
7#include <algorithm>
8#include <set>
9#include <sstream>
10
11#include "base/compiler_specific.h"
12#include "base/histogram.h"
13#include "base/stats_counters.h"
14#include "base/string_util.h"
15#include "base/time.h"
16#include "chrome/browser/chrome_thread.h"
17#include "chrome/browser/net/preconnect.h"
18#include "net/base/address_list.h"
19#include "net/base/completion_callback.h"
20#include "net/base/host_port_pair.h"
21#include "net/base/host_resolver.h"
22#include "net/base/net_errors.h"
23#include "net/base/net_log.h"
24
25using base::TimeDelta;
26
27namespace chrome_browser_net {
28
29class Predictor::LookupRequest {
30 public:
31  LookupRequest(Predictor* predictor,
32                net::HostResolver* host_resolver,
33                const GURL& url)
34      : ALLOW_THIS_IN_INITIALIZER_LIST(
35          net_callback_(this, &LookupRequest::OnLookupFinished)),
36        predictor_(predictor),
37        url_(url),
38        resolver_(host_resolver) {
39  }
40
41  // Return underlying network resolver status.
42  // net::OK ==> Host was found synchronously.
43  // net:ERR_IO_PENDING ==> Network will callback later with result.
44  // anything else ==> Host was not found synchronously.
45  int Start() {
46    net::HostResolver::RequestInfo resolve_info(url_.host(),
47                                                url_.EffectiveIntPort());
48
49    // Make a note that this is a speculative resolve request. This allows us
50    // to separate it from real navigations in the observer's callback, and
51    // lets the HostResolver know it can de-prioritize it.
52    resolve_info.set_is_speculative(true);
53    return resolver_.Resolve(
54        resolve_info, &addresses_, &net_callback_, net::BoundNetLog());
55  }
56
57 private:
58  void OnLookupFinished(int result) {
59    predictor_->OnLookupFinished(this, url_, result == net::OK);
60  }
61
62  // HostResolver will call us using this callback when resolution is complete.
63  net::CompletionCallbackImpl<LookupRequest> net_callback_;
64
65  Predictor* predictor_;  // The predictor which started us.
66
67  const GURL url_;  // Hostname to resolve.
68  net::SingleRequestHostResolver resolver_;
69  net::AddressList addresses_;
70
71  DISALLOW_COPY_AND_ASSIGN(LookupRequest);
72};
73
74Predictor::Predictor(net::HostResolver* host_resolver,
75                     base::TimeDelta max_dns_queue_delay,
76                     size_t max_concurrent,
77                     bool preconnect_enabled)
78    : peak_pending_lookups_(0),
79      shutdown_(false),
80      max_concurrent_dns_lookups_(max_concurrent),
81      max_dns_queue_delay_(max_dns_queue_delay),
82      host_resolver_(host_resolver),
83      preconnect_enabled_(preconnect_enabled) {
84  Referrer::SetUsePreconnectValuations(preconnect_enabled);
85}
86
87Predictor::~Predictor() {
88  DCHECK(shutdown_);
89}
90
91void Predictor::Shutdown() {
92  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
93  DCHECK(!shutdown_);
94  shutdown_ = true;
95
96  std::set<LookupRequest*>::iterator it;
97  for (it = pending_lookups_.begin(); it != pending_lookups_.end(); ++it)
98    delete *it;
99}
100
101// Overloaded Resolve() to take a vector of names.
102void Predictor::ResolveList(const UrlList& urls,
103                            UrlInfo::ResolutionMotivation motivation) {
104  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
105
106  for (UrlList::const_iterator it = urls.begin(); it < urls.end(); ++it) {
107    AppendToResolutionQueue(*it, motivation);
108  }
109}
110
111// Basic Resolve() takes an invidual name, and adds it
112// to the queue.
113void Predictor::Resolve(const GURL& url,
114                        UrlInfo::ResolutionMotivation motivation) {
115  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
116  if (!url.has_host())
117    return;
118  AppendToResolutionQueue(url, motivation);
119}
120
121bool Predictor::AccruePrefetchBenefits(const GURL& referrer,
122                                       UrlInfo* navigation_info) {
123  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
124  GURL url = navigation_info->url();
125  Results::iterator it = results_.find(url);
126  if (it == results_.end()) {
127    // Use UMA histogram to quantify potential future gains here.
128    UMA_HISTOGRAM_LONG_TIMES("DNS.UnexpectedResolutionL",
129                             navigation_info->resolve_duration());
130    navigation_info->DLogResultsStats("DNS UnexpectedResolution");
131
132    LearnFromNavigation(referrer, navigation_info->url());
133    return false;
134  }
135  UrlInfo& prefetched_host_info(it->second);
136
137  // Sometimes a host is used as a subresource by several referrers, so it is
138  // in our list, but was never motivated by a page-link-scan.  In that case, it
139  // really is an "unexpected" navigation, and we should tally it, and augment
140  // our referrers_.
141  bool referrer_based_prefetch = !prefetched_host_info.was_linked();
142  if (referrer_based_prefetch) {
143    // This wasn't the first time this host refered to *some* referrer.
144    LearnFromNavigation(referrer, navigation_info->url());
145  }
146
147  DnsBenefit benefit = prefetched_host_info.AccruePrefetchBenefits(
148      navigation_info);
149  switch (benefit) {
150    case PREFETCH_NAME_FOUND:
151    case PREFETCH_NAME_NONEXISTANT:
152      dns_cache_hits_.push_back(*navigation_info);
153      if (referrer_based_prefetch) {
154        if (referrer.has_host()) {
155          referrers_[referrer].AccrueValue(
156              navigation_info->benefits_remaining(), url);
157        }
158      }
159      return true;
160
161    case PREFETCH_CACHE_EVICTION:
162      cache_eviction_map_[url] = *navigation_info;
163      return false;
164
165    case PREFETCH_NO_BENEFIT:
166      // Prefetch never hit the network. Name was pre-cached.
167      return false;
168
169    default:
170      NOTREACHED();
171      return false;
172  }
173}
174
175void Predictor::LearnFromNavigation(const GURL& referring_url,
176                                    const GURL& target_url) {
177  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
178  if (referring_url.has_host() &&
179      referring_url != target_url) {
180    DCHECK(referring_url == referring_url.GetWithEmptyPath());
181    referrers_[referring_url].SuggestHost(target_url);
182  }
183}
184
185void Predictor::PredictSubresources(const GURL& url) {
186  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
187  Referrers::iterator it = referrers_.find(url);
188  if (referrers_.end() == it)
189    return;
190  Referrer* referrer = &(it->second);
191  referrer->IncrementUseCount();
192  for (Referrer::iterator future_url = referrer->begin();
193       future_url != referrer->end(); ++future_url) {
194    UrlInfo* queued_info = AppendToResolutionQueue(
195        future_url->first,
196        UrlInfo::LEARNED_REFERAL_MOTIVATED);
197    if (queued_info)
198      queued_info->SetReferringHostname(url);
199  }
200}
201
202void Predictor::PredictFrameSubresources(const GURL& url) {
203  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
204  DCHECK(url.GetWithEmptyPath() == url);
205  Referrers::iterator it = referrers_.find(url);
206  if (referrers_.end() == it)
207    return;
208  Referrer* referrer = &(it->second);
209  referrer->IncrementUseCount();
210  for (Referrer::iterator future_url = referrer->begin();
211       future_url != referrer->end(); ++future_url) {
212    if (future_url->second.IsPreconnectWorthDoing())
213      Preconnect::PreconnectOnIOThread(future_url->first);
214  }
215}
216
217// Provide sort order so all .com's are together, etc.
218struct RightToLeftStringSorter {
219  bool operator()(const net::HostPortPair& left,
220                  const net::HostPortPair& right) const {
221    return string_compare(left.host, right.host);
222  }
223  bool operator()(const GURL& left,
224                  const GURL& right) const {
225    return string_compare(left.host(), right.host());
226  }
227
228  static bool string_compare(const std::string& left_host,
229                             const std::string right_host) {
230    if (left_host == right_host) return true;
231    size_t left_already_matched = left_host.size();
232    size_t right_already_matched = right_host.size();
233
234    // Ensure both strings have characters.
235    if (!left_already_matched) return true;
236    if (!right_already_matched) return false;
237
238    // Watch for trailing dot, so we'll always be safe to go one beyond dot.
239    if ('.' == left_host[left_already_matched - 1]) {
240      if ('.' != right_host[right_already_matched - 1])
241        return true;
242      // Both have dots at end of string.
243      --left_already_matched;
244      --right_already_matched;
245    } else {
246      if ('.' == right_host[right_already_matched - 1])
247        return false;
248    }
249
250    while (1) {
251      if (!left_already_matched) return true;
252      if (!right_already_matched) return false;
253
254      size_t left_length, right_length;
255      size_t left_start = left_host.find_last_of('.', left_already_matched - 1);
256      if (std::string::npos == left_start) {
257        left_length = left_already_matched;
258        left_already_matched = left_start = 0;
259      } else {
260        left_length = left_already_matched - left_start;
261        left_already_matched = left_start;
262        ++left_start;  // Don't compare the dot.
263      }
264      size_t right_start = right_host.find_last_of('.',
265                                                   right_already_matched - 1);
266      if (std::string::npos == right_start) {
267        right_length = right_already_matched;
268        right_already_matched = right_start = 0;
269      } else {
270        right_length = right_already_matched - right_start;
271        right_already_matched = right_start;
272        ++right_start;  // Don't compare the dot.
273      }
274
275      int diff = left_host.compare(left_start, left_host.size(),
276                                   right_host, right_start, right_host.size());
277      if (diff > 0) return false;
278      if (diff < 0) return true;
279    }
280  }
281};
282
283void Predictor::GetHtmlReferrerLists(std::string* output) {
284  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
285  if (referrers_.empty())
286    return;
287
288  // TODO(jar): Remove any plausible JavaScript from names before displaying.
289
290  typedef std::set<GURL, struct RightToLeftStringSorter>
291      SortedNames;
292  SortedNames sorted_names;
293
294  for (Referrers::iterator it = referrers_.begin();
295       referrers_.end() != it; ++it)
296    sorted_names.insert(it->first);
297
298  output->append("<br><table border>");
299  output->append(
300      "<tr><th>Host for Page</th>"
301      "<th>Page Load<br>Count</th>"
302      "<th>Subresource<br>Navigations</th>"
303      "<th>Subresource<br>PreConnects</th>"
304      "<th>Expected<br>Connects</th>"
305      "<th>DNS<br>Savings</th>"
306      "<th>Subresource Spec</th></tr>");
307
308  for (SortedNames::iterator it = sorted_names.begin();
309       sorted_names.end() != it; ++it) {
310    Referrer* referrer = &(referrers_[*it]);
311    bool first_set_of_futures = true;
312    for (Referrer::iterator future_url = referrer->begin();
313         future_url != referrer->end(); ++future_url) {
314      output->append("<tr align=right>");
315      if (first_set_of_futures)
316        StringAppendF(output, "<td rowspan=%d>%s</td><td rowspan=%d>%d</td>",
317                      static_cast<int>(referrer->size()),
318                      it->spec().c_str(),
319                      static_cast<int>(referrer->size()),
320                      static_cast<int>(referrer->use_count()));
321      first_set_of_futures = false;
322      StringAppendF(output,
323          "<td>%d</td><td>%d</td><td>%2.3f</td><td>%dms</td><td>%s</td></tr>",
324          static_cast<int>(future_url->second.navigation_count()),
325          static_cast<int>(future_url->second.preconnection_count()),
326          static_cast<double>(future_url->second.subresource_use_rate()),
327          static_cast<int>(future_url->second.latency().InMilliseconds()),
328          future_url->first.spec().c_str());
329    }
330  }
331  output->append("</table>");
332}
333
334void Predictor::GetHtmlInfo(std::string* output) {
335  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
336  // Local lists for calling UrlInfo
337  UrlInfo::DnsInfoTable cache_hits;
338  UrlInfo::DnsInfoTable cache_evictions;
339  UrlInfo::DnsInfoTable name_not_found;
340  UrlInfo::DnsInfoTable network_hits;
341  UrlInfo::DnsInfoTable already_cached;
342
343  // Get copies of all useful data.
344  typedef std::map<GURL, UrlInfo, RightToLeftStringSorter>
345      Snapshot;
346  Snapshot snapshot;
347  {
348    // UrlInfo supports value semantics, so we can do a shallow copy.
349    for (Results::iterator it(results_.begin()); it != results_.end(); it++) {
350      snapshot[it->first] = it->second;
351    }
352    for (Results::iterator it(cache_eviction_map_.begin());
353         it != cache_eviction_map_.end();
354         it++) {
355      cache_evictions.push_back(it->second);
356    }
357    // Reverse list as we copy cache hits, so that new hits are at the top.
358    size_t index = dns_cache_hits_.size();
359    while (index > 0) {
360      index--;
361      cache_hits.push_back(dns_cache_hits_[index]);
362    }
363  }
364
365  // Partition the UrlInfo's into categories.
366  for (Snapshot::iterator it(snapshot.begin()); it != snapshot.end(); it++) {
367    if (it->second.was_nonexistant()) {
368      name_not_found.push_back(it->second);
369      continue;
370    }
371    if (!it->second.was_found())
372      continue;  // Still being processed.
373    if (TimeDelta() != it->second.benefits_remaining()) {
374      network_hits.push_back(it->second);  // With no benefit yet.
375      continue;
376    }
377    if (UrlInfo::kMaxNonNetworkDnsLookupDuration >
378      it->second.resolve_duration()) {
379      already_cached.push_back(it->second);
380      continue;
381    }
382    // Remaining case is where prefetch benefit was significant, and was used.
383    // Since we shot those cases as historical hits, we won't bother here.
384  }
385
386  bool brief = false;
387#ifdef NDEBUG
388  brief = true;
389#endif  // NDEBUG
390
391  // Call for display of each table, along with title.
392  UrlInfo::GetHtmlTable(cache_hits,
393      "Prefetching DNS records produced benefits for ", false, output);
394  UrlInfo::GetHtmlTable(cache_evictions,
395      "Cache evictions negated DNS prefetching benefits for ", brief, output);
396  UrlInfo::GetHtmlTable(network_hits,
397      "Prefetching DNS records was not yet beneficial for ", brief, output);
398  UrlInfo::GetHtmlTable(already_cached,
399      "Previously cached resolutions were found for ", brief, output);
400  UrlInfo::GetHtmlTable(name_not_found,
401      "Prefetching DNS records revealed non-existance for ", brief, output);
402}
403
404UrlInfo* Predictor::AppendToResolutionQueue(
405    const GURL& url,
406    UrlInfo::ResolutionMotivation motivation) {
407  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
408  DCHECK(url.has_host());
409
410  if (shutdown_)
411    return NULL;
412
413  UrlInfo* info = &results_[url];
414  info->SetUrl(url);  // Initialize or DCHECK.
415  // TODO(jar):  I need to discard names that have long since expired.
416  // Currently we only add to the domain map :-/
417
418  DCHECK(info->HasUrl(url));
419
420  if (!info->NeedsDnsUpdate()) {
421    info->DLogResultsStats("DNS PrefetchNotUpdated");
422    return NULL;
423  }
424
425  info->SetQueuedState(motivation);
426  work_queue_.Push(url, motivation);
427  StartSomeQueuedResolutions();
428  return info;
429}
430
431void Predictor::StartSomeQueuedResolutions() {
432  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
433
434  while (!work_queue_.IsEmpty() &&
435         pending_lookups_.size() < max_concurrent_dns_lookups_) {
436    const GURL url(work_queue_.Pop());
437    UrlInfo* info = &results_[url];
438    DCHECK(info->HasUrl(url));
439    info->SetAssignedState();
440
441    if (CongestionControlPerformed(info)) {
442      DCHECK(work_queue_.IsEmpty());
443      return;
444    }
445
446    LookupRequest* request = new LookupRequest(this, host_resolver_, url);
447    int status = request->Start();
448    if (status == net::ERR_IO_PENDING) {
449      // Will complete asynchronously.
450      pending_lookups_.insert(request);
451      peak_pending_lookups_ = std::max(peak_pending_lookups_,
452                                       pending_lookups_.size());
453    } else {
454      // Completed synchronously (was already cached by HostResolver), or else
455      // there was (equivalently) some network error that prevents us from
456      // finding the name.  Status net::OK means it was "found."
457      LookupFinished(request, url, status == net::OK);
458      delete request;
459    }
460  }
461}
462
463bool Predictor::CongestionControlPerformed(UrlInfo* info) {
464  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
465  // Note: queue_duration is ONLY valid after we go to assigned state.
466  if (info->queue_duration() < max_dns_queue_delay_)
467    return false;
468  // We need to discard all entries in our queue, as we're keeping them waiting
469  // too long.  By doing this, we'll have a chance to quickly service urgent
470  // resolutions, and not have a bogged down system.
471  while (true) {
472    info->RemoveFromQueue();
473    if (work_queue_.IsEmpty())
474      break;
475    info = &results_[work_queue_.Pop()];
476    info->SetAssignedState();
477  }
478  return true;
479}
480
481void Predictor::OnLookupFinished(LookupRequest* request, const GURL& url,
482                                 bool found) {
483  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
484
485  LookupFinished(request, url, found);
486  pending_lookups_.erase(request);
487  delete request;
488
489  StartSomeQueuedResolutions();
490}
491
492void Predictor::LookupFinished(LookupRequest* request, const GURL& url,
493                               bool found) {
494  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
495  UrlInfo* info = &results_[url];
496  DCHECK(info->HasUrl(url));
497  if (info->is_marked_to_delete()) {
498    results_.erase(url);
499  } else {
500    if (found)
501      info->SetFoundState();
502    else
503      info->SetNoSuchNameState();
504  }
505}
506
507void Predictor::DiscardAllResults() {
508  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
509  // Delete anything listed so far in this session that shows in about:dns.
510  cache_eviction_map_.clear();
511  dns_cache_hits_.clear();
512  referrers_.clear();
513
514
515  // Try to delete anything in our work queue.
516  while (!work_queue_.IsEmpty()) {
517    // Emulate processing cycle as though host was not found.
518    GURL url = work_queue_.Pop();
519    UrlInfo* info = &results_[url];
520    DCHECK(info->HasUrl(url));
521    info->SetAssignedState();
522    info->SetNoSuchNameState();
523  }
524  // Now every result_ is either resolved, or is being resolved
525  // (see LookupRequest).
526
527  // Step through result_, recording names of all hosts that can't be erased.
528  // We can't erase anything being worked on.
529  Results assignees;
530  for (Results::iterator it = results_.begin(); results_.end() != it; ++it) {
531    GURL url(it->first);
532    UrlInfo* info = &it->second;
533    DCHECK(info->HasUrl(url));
534    if (info->is_assigned()) {
535      info->SetPendingDeleteState();
536      assignees[url] = *info;
537    }
538  }
539  DCHECK(assignees.size() <= max_concurrent_dns_lookups_);
540  results_.clear();
541  // Put back in the names being worked on.
542  for (Results::iterator it = assignees.begin(); assignees.end() != it; ++it) {
543    DCHECK(it->second.is_marked_to_delete());
544    results_[it->first] = it->second;
545  }
546}
547
548void Predictor::TrimReferrers() {
549  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
550  std::vector<GURL> urls;
551  for (Referrers::const_iterator it = referrers_.begin();
552       it != referrers_.end(); ++it)
553    urls.push_back(it->first);
554  for (size_t i = 0; i < urls.size(); ++i)
555    if (!referrers_[urls[i]].Trim())
556      referrers_.erase(urls[i]);
557}
558
559void Predictor::SerializeReferrers(ListValue* referral_list) {
560  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
561  referral_list->Clear();
562  referral_list->Append(new FundamentalValue(DNS_REFERRER_VERSION));
563  for (Referrers::const_iterator it = referrers_.begin();
564       it != referrers_.end(); ++it) {
565    // Serialize the list of subresource names.
566    Value* subresource_list(it->second.Serialize());
567
568    // Create a list for each referer.
569    ListValue* motivator(new ListValue);
570    motivator->Append(new StringValue(it->first.spec()));
571    motivator->Append(subresource_list);
572
573    referral_list->Append(motivator);
574  }
575}
576
577void Predictor::DeserializeReferrers(const ListValue& referral_list) {
578  DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
579  int format_version = -1;
580  if (referral_list.GetSize() > 0 &&
581      referral_list.GetInteger(0, &format_version) &&
582      format_version == DNS_REFERRER_VERSION) {
583    for (size_t i = 1; i < referral_list.GetSize(); ++i) {
584      ListValue* motivator;
585      if (!referral_list.GetList(i, &motivator)) {
586        NOTREACHED();
587        return;
588      }
589      std::string motivating_url_spec;
590      if (!motivator->GetString(0, &motivating_url_spec)) {
591        NOTREACHED();
592        return;
593      }
594
595      Value* subresource_list;
596      if (!motivator->Get(1, &subresource_list)) {
597        NOTREACHED();
598        return;
599      }
600
601      referrers_[GURL(motivating_url_spec)].Deserialize(*subresource_list);
602    }
603  }
604}
605
606
607//------------------------------------------------------------------------------
608
609Predictor::HostNameQueue::HostNameQueue() {
610}
611
612Predictor::HostNameQueue::~HostNameQueue() {
613}
614
615void Predictor::HostNameQueue::Push(const GURL& url,
616    UrlInfo::ResolutionMotivation motivation) {
617  switch (motivation) {
618    case UrlInfo::STATIC_REFERAL_MOTIVATED:
619    case UrlInfo::LEARNED_REFERAL_MOTIVATED:
620    case UrlInfo::MOUSE_OVER_MOTIVATED:
621      rush_queue_.push(url);
622      break;
623
624    default:
625      background_queue_.push(url);
626      break;
627  }
628}
629
630bool Predictor::HostNameQueue::IsEmpty() const {
631  return rush_queue_.empty() && background_queue_.empty();
632}
633
634GURL Predictor::HostNameQueue::Pop() {
635  DCHECK(!IsEmpty());
636  std::queue<GURL> *queue(rush_queue_.empty() ? &background_queue_
637                                              : &rush_queue_);
638  GURL url(queue->front());
639  queue->pop();
640  return url;
641}
642
643}  // namespace chrome_browser_net
644