url_info.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2006-2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/net/url_info.h"
6
7#include <math.h>
8
9#include <algorithm>
10#include <string>
11
12#include "base/field_trial.h"
13#include "base/format_macros.h"
14#include "base/histogram.h"
15#include "base/logging.h"
16#include "base/string_util.h"
17
18using base::Time;
19using base::TimeDelta;
20using base::TimeTicks;
21
22namespace chrome_browser_net {
23
24static bool detailed_logging_enabled = false;
25
26// Use command line switch to enable detailed logging.
27void EnablePredictorDetailedLog(bool enable) {
28  detailed_logging_enabled = enable;
29}
30
31// static
32int UrlInfo::sequence_counter = 1;
33
34
35bool UrlInfo::NeedsDnsUpdate() {
36  switch (state_) {
37    case PENDING:  // Just now created info.
38      return true;
39
40    case QUEUED:  // In queue.
41    case ASSIGNED:  // It's being resolved.
42    case ASSIGNED_BUT_MARKED:  // It's being resolved.
43      return false;  // We're already working on it
44
45    case NO_SUCH_NAME:  // Lookup failed.
46    case FOUND:  // Lookup succeeded.
47      return !IsStillCached();  // See if DNS cache expired.
48
49    default:
50      NOTREACHED();
51      return false;
52  }
53}
54
55const TimeDelta UrlInfo::kNullDuration(TimeDelta::FromMilliseconds(-1));
56
57// Common low end TTL for sites is 5 minutes.  However, DNS servers give us
58// the remaining time, not the original 5 minutes.  Hence it doesn't much matter
59// whether we found something in the local cache, or an ISP cache, it will
60// on average be 2.5 minutes before it expires.  We could try to model this with
61// 180 seconds, but simpler is just to do the lookups all the time (wasting
62// OS calls(?)), and let that OS cache decide what to do (with TTL in hand).
63// We use a small time to help get some duplicate suppression, in case a page
64// has a TON of copies of the same domain name, so that we don't thrash the OS
65// to death.  Hopefully it is small enough that we're not hurting our cache hit
66// rate (i.e., we could always ask the OS).
67TimeDelta UrlInfo::kCacheExpirationDuration(TimeDelta::FromSeconds(5));
68
69const TimeDelta UrlInfo::kMaxNonNetworkDnsLookupDuration(
70    TimeDelta::FromMilliseconds(15));
71
72// Used by test ONLY.  The value is otherwise constant.
73void UrlInfo::set_cache_expiration(TimeDelta time) {
74  kCacheExpirationDuration = time;
75}
76
77void UrlInfo::SetQueuedState(ResolutionMotivation motivation) {
78  DCHECK(PENDING == state_ || FOUND == state_ || NO_SUCH_NAME == state_);
79  old_prequeue_state_ = state_;
80  state_ = QUEUED;
81  queue_duration_ = resolve_duration_ = kNullDuration;
82  SetMotivation(motivation);
83  GetDuration();  // Set time_
84  DLogResultsStats("DNS Prefetch in queue");
85}
86
87void UrlInfo::SetAssignedState() {
88  DCHECK(QUEUED == state_);
89  state_ = ASSIGNED;
90  queue_duration_ = GetDuration();
91  DLogResultsStats("DNS Prefetch assigned");
92  UMA_HISTOGRAM_TIMES("DNS.PrefetchQueue", queue_duration_);
93}
94
95void UrlInfo::RemoveFromQueue() {
96  DCHECK(ASSIGNED == state_);
97  state_ = old_prequeue_state_;
98  DLogResultsStats("DNS Prefetch reset to prequeue");
99  static const TimeDelta kBoundary = TimeDelta::FromSeconds(2);
100  if (queue_duration_ > kBoundary) {
101    UMA_HISTOGRAM_MEDIUM_TIMES("DNS.QueueRecycledDeltaOver2",
102                               queue_duration_ - kBoundary);
103    return;
104  }
105  // Make a custom linear histogram for the region from 0 to boundary.
106  const size_t kBucketCount = 52;
107  static scoped_refptr<Histogram> histogram = LinearHistogram::FactoryTimeGet(
108      "DNS.QueueRecycledUnder2", TimeDelta(), kBoundary, kBucketCount,
109      Histogram::kUmaTargetedHistogramFlag);
110  histogram->AddTime(queue_duration_);
111}
112
113void UrlInfo::SetPendingDeleteState() {
114  DCHECK(ASSIGNED == state_  || ASSIGNED_BUT_MARKED == state_);
115  state_ = ASSIGNED_BUT_MARKED;
116}
117
118void UrlInfo::SetFoundState() {
119  DCHECK(ASSIGNED == state_);
120  state_ = FOUND;
121  resolve_duration_ = GetDuration();
122  if (kMaxNonNetworkDnsLookupDuration <= resolve_duration_) {
123    UMA_HISTOGRAM_CUSTOM_TIMES("DNS.PrefetchResolution", resolve_duration_,
124        kMaxNonNetworkDnsLookupDuration, TimeDelta::FromMinutes(15), 100);
125
126    static bool use_ipv6_histogram(FieldTrialList::Find("IPv6_Probe") &&
127        !FieldTrialList::Find("IPv6_Probe")->group_name().empty());
128    if (use_ipv6_histogram) {
129      UMA_HISTOGRAM_CUSTOM_TIMES(
130          FieldTrial::MakeName("DNS.PrefetchResolution", "IPv6_Probe"),
131          resolve_duration_, kMaxNonNetworkDnsLookupDuration,
132          TimeDelta::FromMinutes(15), 100);
133    }
134
135    // Record potential beneficial time, and maybe we'll get a cache hit.
136    // We keep the maximum, as the warming we did earlier may still be
137    // helping with a cache upstream in DNS resolution.
138    benefits_remaining_ = std::max(resolve_duration_, benefits_remaining_);
139  }
140  sequence_number_ = sequence_counter++;
141  DLogResultsStats("DNS PrefetchFound");
142}
143
144void UrlInfo::SetNoSuchNameState() {
145  DCHECK(ASSIGNED == state_);
146  state_ = NO_SUCH_NAME;
147  resolve_duration_ = GetDuration();
148  if (kMaxNonNetworkDnsLookupDuration <= resolve_duration_) {
149    DHISTOGRAM_TIMES("DNS.PrefetchNotFoundName", resolve_duration_);
150    // Record potential beneficial time, and maybe we'll get a cache hit.
151    benefits_remaining_ = std::max(resolve_duration_, benefits_remaining_);
152  }
153  sequence_number_ = sequence_counter++;
154  DLogResultsStats("DNS PrefetchNotFound");
155}
156
157void UrlInfo::SetStartedState() {
158  DCHECK(PENDING == state_);
159  state_ = STARTED;
160  queue_duration_ = resolve_duration_ = TimeDelta();  // 0ms.
161  SetMotivation(NO_PREFETCH_MOTIVATION);
162  GetDuration();  // Set time.
163}
164
165void UrlInfo::SetFinishedState(bool was_resolved) {
166  DCHECK(STARTED == state_);
167  state_ = was_resolved ? FINISHED : FINISHED_UNRESOLVED;
168  resolve_duration_ = GetDuration();
169  // TODO(jar): Sequence number should be incremented in prefetched HostInfo.
170  DLogResultsStats("DNS HTTP Finished");
171}
172
173void UrlInfo::SetUrl(const GURL& url) {
174  if (url_.is_empty())  // Not yet initialized.
175    url_ = url;
176  else
177    DCHECK_EQ(url_, url);
178}
179
180// IsStillCached() guesses if the DNS cache still has IP data,
181// or at least remembers results about "not finding host."
182bool UrlInfo::IsStillCached() const {
183  DCHECK(FOUND == state_ || NO_SUCH_NAME == state_);
184
185  // Default MS OS does not cache failures. Hence we could return false almost
186  // all the time for that case.  However, we'd never try again to prefetch
187  // the value if we returned false that way.  Hence we'll just let the lookup
188  // time out the same way as FOUND case.
189
190  if (sequence_counter - sequence_number_ > kMaxGuaranteedDnsCacheSize)
191    return false;
192
193  TimeDelta time_since_resolution = TimeTicks::Now() - time_;
194
195  return time_since_resolution < kCacheExpirationDuration;
196}
197
198// Compare the actual navigation DNS latency found in navigation_info, to the
199// previously prefetched info.
200DnsBenefit UrlInfo::AccruePrefetchBenefits(UrlInfo* navigation_info) {
201  DCHECK(FINISHED == navigation_info->state_ ||
202         FINISHED_UNRESOLVED == navigation_info->state_);
203  DCHECK(navigation_info->url() == url_);
204
205  if ((0 == benefits_remaining_.InMilliseconds()) ||
206      (FOUND != state_ && NO_SUCH_NAME != state_)) {
207    if (FINISHED == navigation_info->state_)
208      UMA_HISTOGRAM_LONG_TIMES("DNS.IndependentNavigation",
209                               navigation_info->resolve_duration_);
210    else
211      UMA_HISTOGRAM_LONG_TIMES("DNS.IndependentFailedNavigation",
212                               navigation_info->resolve_duration_);
213    return PREFETCH_NO_BENEFIT;
214  }
215
216  TimeDelta benefit = benefits_remaining_ - navigation_info->resolve_duration_;
217  navigation_info->benefits_remaining_ = benefits_remaining_;
218  benefits_remaining_ = TimeDelta();  // We used up all our benefits here.
219
220  navigation_info->motivation_ = motivation_;
221  if (LEARNED_REFERAL_MOTIVATED == motivation_ ||
222      STATIC_REFERAL_MOTIVATED == motivation_)
223    navigation_info->referring_url_ = referring_url_;
224
225  if (navigation_info->resolve_duration_ > kMaxNonNetworkDnsLookupDuration) {
226    // Our precache effort didn't help since HTTP stack hit the network.
227    UMA_HISTOGRAM_LONG_TIMES("DNS.PrefetchCacheEvictionL", resolve_duration_);
228    DLogResultsStats("DNS PrefetchCacheEviction");
229    return PREFETCH_CACHE_EVICTION;
230  }
231
232  if (NO_SUCH_NAME == state_) {
233    UMA_HISTOGRAM_LONG_TIMES("DNS.PrefetchNegativeHitL", benefit);
234    DLogResultsStats("DNS PrefetchNegativeHit");
235    return PREFETCH_NAME_NONEXISTANT;
236  }
237
238  DCHECK_EQ(FOUND, state_);
239  if (LEARNED_REFERAL_MOTIVATED == motivation_ ||
240      STATIC_REFERAL_MOTIVATED == motivation_) {
241    UMA_HISTOGRAM_TIMES("DNS.PrefetchReferredPositiveHit", benefit);
242    DLogResultsStats("DNS PrefetchReferredPositiveHit");
243  } else {
244    UMA_HISTOGRAM_LONG_TIMES("DNS.PrefetchPositiveHitL", benefit);
245    DLogResultsStats("DNS PrefetchPositiveHit");
246  }
247  return PREFETCH_NAME_FOUND;
248}
249
250void UrlInfo::DLogResultsStats(const char* message) const {
251  if (!detailed_logging_enabled)
252    return;
253  DLOG(INFO) << "\t" << message <<  "\tq="
254      << queue_duration().InMilliseconds() << "ms,\tr="
255      << resolve_duration().InMilliseconds() << "ms\tp="
256      << benefits_remaining_.InMilliseconds() << "ms\tseq="
257      << sequence_number_
258      << "\t" << url_.spec();
259}
260
261//------------------------------------------------------------------------------
262// This last section supports HTML output, such as seen in about:dns.
263//------------------------------------------------------------------------------
264
265// Preclude any possibility of Java Script or markup in the text, by only
266// allowing alphanumerics, '.', '-', ':', and whitespace.
267static std::string RemoveJs(const std::string& text) {
268  std::string output(text);
269  size_t length = output.length();
270  for (size_t i = 0; i < length; i++) {
271    char next = output[i];
272    if (isalnum(next) || isspace(next) || strchr(".-:/", next) != NULL)
273      continue;
274    output[i] = '?';
275  }
276  return output;
277}
278
279class MinMaxAverage {
280 public:
281  MinMaxAverage()
282    : sum_(0), square_sum_(0), count_(0),
283      minimum_(kint64max), maximum_(kint64min) {
284  }
285
286  // Return values for use in printf formatted as "%d"
287  int sample(int64 value) {
288    sum_ += value;
289    square_sum_ += value * value;
290    count_++;
291    minimum_ = std::min(minimum_, value);
292    maximum_ = std::max(maximum_, value);
293    return static_cast<int>(value);
294  }
295  int minimum() const { return static_cast<int>(minimum_);    }
296  int maximum() const { return static_cast<int>(maximum_);    }
297  int average() const { return static_cast<int>(sum_/count_); }
298  int     sum() const { return static_cast<int>(sum_);        }
299
300  int standard_deviation() const {
301    double average = static_cast<float>(sum_) / count_;
302    double variance = static_cast<float>(square_sum_)/count_
303                      - average * average;
304    return static_cast<int>(floor(sqrt(variance) + .5));
305  }
306
307 private:
308  int64 sum_;
309  int64 square_sum_;
310  int count_;
311  int64 minimum_;
312  int64 maximum_;
313
314  // DISALLOW_COPY_AND_ASSIGN(MinMaxAverage);
315};
316
317static std::string HoursMinutesSeconds(int seconds) {
318  std::string result;
319  int print_seconds = seconds % 60;
320  int minutes = seconds / 60;
321  int print_minutes = minutes % 60;
322  int print_hours = minutes/60;
323  if (print_hours)
324    StringAppendF(&result, "%.2d:",  print_hours);
325  if (print_hours || print_minutes)
326    StringAppendF(&result, "%2.2d:",  print_minutes);
327  StringAppendF(&result, "%2.2d",  print_seconds);
328  return result;
329}
330
331// static
332void UrlInfo::GetHtmlTable(const DnsInfoTable host_infos,
333                               const char* description,
334                               const bool brief,
335                               std::string* output) {
336  if (0 == host_infos.size())
337    return;
338  output->append(description);
339  StringAppendF(output, "%" PRIuS " %s", host_infos.size(),
340                (1 == host_infos.size()) ? "hostname" : "hostnames");
341
342  if (brief) {
343    output->append("<br><br>");
344    return;
345  }
346
347  const char* row_format = "<tr align=right><td>%s</td>"
348                           "<td>%d</td><td>%d</td><td>%s</td><td>%s</td></tr>";
349
350  output->append("<br><table border=1>");
351  StringAppendF(output,
352      "<tr><th>%s</th><th>%s</th><th>%s</th><th>%s</th><th>%s</th></tr>",
353      "Host name", "Applicable Prefetch<br>Time (ms)",
354      "Recent Resolution<br>Time(ms)", "How long ago<br>(HH:MM:SS)",
355      "Motivation");
356
357  // Print bulk of table, and gather stats at same time.
358  MinMaxAverage queue, resolve, preresolve, when;
359  TimeTicks current_time = TimeTicks::Now();
360  for (DnsInfoTable::const_iterator it(host_infos.begin());
361       it != host_infos.end(); it++) {
362    queue.sample((it->queue_duration_.InMilliseconds()));
363    StringAppendF(output, row_format,
364                  RemoveJs(it->url_.spec()).c_str(),
365                  preresolve.sample((it->benefits_remaining_.InMilliseconds())),
366                  resolve.sample((it->resolve_duration_.InMilliseconds())),
367                  HoursMinutesSeconds(when.sample(
368                      (current_time - it->time_).InSeconds())).c_str(),
369                  it->GetAsciiMotivation().c_str());
370  }
371  // Write min, max, and average summary lines.
372  if (host_infos.size() > 2) {
373    output->append("<B>");
374    StringAppendF(output, row_format,
375                  "<b>---minimum---</b>",
376                  preresolve.minimum(), resolve.minimum(),
377                  HoursMinutesSeconds(when.minimum()).c_str(), "");
378    StringAppendF(output, row_format,
379                  "<b>---average---</b>",
380                  preresolve.average(), resolve.average(),
381                  HoursMinutesSeconds(when.average()).c_str(), "");
382    StringAppendF(output, row_format,
383                  "<b>standard deviation</b>",
384                  preresolve.standard_deviation(),
385                  resolve.standard_deviation(), "n/a", "");
386    StringAppendF(output, row_format,
387                  "<b>---maximum---</b>",
388                  preresolve.maximum(), resolve.maximum(),
389                  HoursMinutesSeconds(when.maximum()).c_str(), "");
390    StringAppendF(output, row_format,
391                  "<b>-----SUM-----</b>",
392                  preresolve.sum(), resolve.sum(), "n/a", "");
393  }
394  output->append("</table>");
395
396#ifdef DEBUG
397  StringAppendF(output,
398                "Prefetch Queue Durations: min=%d, avg=%d, max=%d<br><br>",
399                queue.minimum(), queue.average(), queue.maximum());
400#endif
401
402  output->append("<br>");
403}
404
405void UrlInfo::SetMotivation(ResolutionMotivation motivation) {
406  motivation_ = motivation;
407  if (motivation < LINKED_MAX_MOTIVATED)
408    was_linked_ = true;
409}
410
411std::string UrlInfo::GetAsciiMotivation() const {
412  switch (motivation_) {
413    case MOUSE_OVER_MOTIVATED:
414      return "[mouse-over]";
415
416    case PAGE_SCAN_MOTIVATED:
417      return "[page scan]";
418
419    case OMNIBOX_MOTIVATED:
420      return "[omnibox]";
421
422    case STARTUP_LIST_MOTIVATED:
423      return "[startup list]";
424
425    case NO_PREFETCH_MOTIVATION:
426      return "n/a";
427
428    case STATIC_REFERAL_MOTIVATED:
429      return RemoveJs(referring_url_.spec()) + "*";
430
431    case LEARNED_REFERAL_MOTIVATED:
432      return RemoveJs(referring_url_.spec());
433
434    default:
435      return "";
436  }
437}
438
439}  // namespace chrome_browser_net
440