1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/net/referrer.h"
6
7#include <limits.h>
8
9#include "base/compiler_specific.h"
10#include "base/logging.h"
11#include "base/message_loop/message_loop.h"
12#include "base/values.h"
13#include "chrome/browser/net/predictor.h"
14
15namespace chrome_browser_net {
16
17//------------------------------------------------------------------------------
18// Smoothing parameter for updating subresource_use_rate_.
19
20// We always combine our old expected value, weighted by some factor W (we use
21// kWeightingForOldConnectsExpectedValue), with the new expected value Enew.
22// The new "expected value" is the number of actual connections made due to the
23// current navigations.
24// That means that IF we end up needing to connect, we should apply the formula:
25// Eupdated = Eold * W  +  Enew * (1 - W)
26// If we visit the containing url, but don't end up needing a connection, then
27// Enew == 0, so we use the formula:
28// Eupdated = Eold * W
29// To achieve the above updating algorithm, we end up doing the multiplication
30// by W every time we contemplate doing a preconnection (i.e., when we navigate
31// to the containing URL, and consider doing a preconnection), and then IFF we
32// learn that we really needed a connection to the subresource, we complete the
33// above algorithm by adding the (1 - W) for each connection we make.
34
35// We weight the new expected value by a factor which is in the range of 0.0 to
36// 1.0.
37static const double kWeightingForOldConnectsExpectedValue = 0.66;
38
39// To estimate the expected value of the number of connections that we'll need
40// when a referrer is navigated to, we start with the following low initial
41// value.
42// Each time we do indeed (again) need the subresource, this value will get
43// increased.
44// Each time we navigate to the refererrer but never end up needing this
45// subresource, the value will decrease.
46// Very conservative is 0.0, which will mean that we have to wait for a while
47// before doing much speculative acvtivity.  We do persist results, so we'll
48// save the asymptotic (correct?) learned answer in the long run.
49// Some browsers blindly make 2 connections all the time, so we'll use that as
50// a starting point.
51static const double kInitialConnectsExpectedValue = 2.0;
52
53Referrer::Referrer() : use_count_(1) {}
54
55void Referrer::SuggestHost(const GURL& url) {
56  // Limit how large our list can get, in case we make mistakes about what
57  // hostnames are in sub-resources (example: Some advertisments have a link to
58  // the ad agency, and then provide a "surprising" redirect to the advertised
59  // entity, which then (mistakenly) appears to be a subresource on the page
60  // hosting the ad).
61  // TODO(jar): Do experiments to optimize the max count of suggestions.
62  static const size_t kMaxSuggestions = 10;
63
64  if (!url.has_host())  // TODO(jar): Is this really needed????
65    return;
66  DCHECK(url == url.GetWithEmptyPath());
67  SubresourceMap::iterator it = find(url);
68  if (it != end()) {
69    it->second.SubresourceIsNeeded();
70    return;
71  }
72
73  if (kMaxSuggestions <= size()) {
74    DeleteLeastUseful();
75    DCHECK(kMaxSuggestions > size());
76  }
77  (*this)[url].SubresourceIsNeeded();
78}
79
80void Referrer::DeleteLeastUseful() {
81  // Find the item with the lowest value.  Most important is preconnection_rate,
82  // and least is lifetime (age).
83  GURL least_useful_url;
84  double lowest_rate_seen = 0.0;
85  // We use longs for durations because we will use multiplication on them.
86  int64 least_useful_lifetime = 0;  // Duration in milliseconds.
87
88  const base::Time kNow(base::Time::Now());  // Avoid multiple calls.
89  for (SubresourceMap::iterator it = begin(); it != end(); ++it) {
90    int64 lifetime = (kNow - it->second.birth_time()).InMilliseconds();
91    double rate = it->second.subresource_use_rate();
92    if (least_useful_url.has_host()) {
93      if (rate > lowest_rate_seen)
94        continue;
95      if (lifetime <= least_useful_lifetime)
96        continue;
97    }
98    least_useful_url = it->first;
99    lowest_rate_seen = rate;
100    least_useful_lifetime = lifetime;
101  }
102  if (least_useful_url.has_host())
103    erase(least_useful_url);
104}
105
106bool Referrer::Trim(double reduce_rate, double threshold) {
107  std::vector<GURL> discarded_urls;
108  for (SubresourceMap::iterator it = begin(); it != end(); ++it) {
109    if (!it->second.Trim(reduce_rate, threshold))
110      discarded_urls.push_back(it->first);
111  }
112  for (size_t i = 0; i < discarded_urls.size(); ++i)
113    erase(discarded_urls[i]);
114  return size() > 0;
115}
116
117bool ReferrerValue::Trim(double reduce_rate, double threshold) {
118  subresource_use_rate_ *= reduce_rate;
119  return subresource_use_rate_ > threshold;
120}
121
122
123void Referrer::Deserialize(const base::Value& value) {
124  if (value.GetType() != base::Value::TYPE_LIST)
125    return;
126  const base::ListValue* subresource_list(
127      static_cast<const base::ListValue*>(&value));
128  size_t index = 0;  // Bounds checking is done by subresource_list->Get*().
129  while (true) {
130    std::string url_spec;
131    if (!subresource_list->GetString(index++, &url_spec))
132      return;
133    double rate;
134    if (!subresource_list->GetDouble(index++, &rate))
135      return;
136
137    GURL url(url_spec);
138    // TODO(jar): We could be more direct, and change birth date or similar to
139    // show that this is a resurrected value we're adding in.  I'm not yet sure
140    // of how best to optimize the learning and pruning (Trim) algorithm at this
141    // level, so for now, we just suggest subresources, which leaves them all
142    // with the same birth date (typically start of process).
143    SuggestHost(url);
144    (*this)[url].SetSubresourceUseRate(rate);
145  }
146}
147
148base::Value* Referrer::Serialize() const {
149  base::ListValue* subresource_list(new base::ListValue);
150  for (const_iterator it = begin(); it != end(); ++it) {
151    base::StringValue* url_spec(new base::StringValue(it->first.spec()));
152    base::FundamentalValue* rate(new base::FundamentalValue(
153        it->second.subresource_use_rate()));
154
155    subresource_list->Append(url_spec);
156    subresource_list->Append(rate);
157  }
158  return subresource_list;
159}
160
161//------------------------------------------------------------------------------
162
163ReferrerValue::ReferrerValue()
164    : birth_time_(base::Time::Now()),
165      navigation_count_(0),
166      preconnection_count_(0),
167      preresolution_count_(0),
168      subresource_use_rate_(kInitialConnectsExpectedValue) {
169}
170
171void ReferrerValue::SubresourceIsNeeded() {
172  DCHECK_GE(kWeightingForOldConnectsExpectedValue, 0);
173  DCHECK_LE(kWeightingForOldConnectsExpectedValue, 1.0);
174  ++navigation_count_;
175  subresource_use_rate_ += 1 - kWeightingForOldConnectsExpectedValue;
176}
177
178void ReferrerValue::ReferrerWasObserved() {
179  subresource_use_rate_ *= kWeightingForOldConnectsExpectedValue;
180  // Note: the use rate is temporarilly possibly incorect, as we need to find
181  // out if we really end up connecting.  This will happen in a few hundred
182  // milliseconds (when content arrives, etc.).
183  // Value of subresource_use_rate_ should be sampled before this call.
184}
185
186}  // namespace chrome_browser_net
187