1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be
3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file.
4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/net/referrer.h"
6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <limits.h>
8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
9ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/compiler_specific.h"
10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/logging.h"
11ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/message_loop.h"
123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/values.h"
133345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "chrome/browser/net/predictor.h"
14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace chrome_browser_net {
16c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//------------------------------------------------------------------------------
18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Smoothing parameter for updating subresource_use_rate_.
19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// We always combine our old expected value, weighted by some factor W (we use
21ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// kWeightingForOldConnectsExpectedValue), with the new expected value Enew.
22ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// The new "expected value" is the number of actual connections made due to the
23ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// current navigations.
24c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// That means that IF we end up needing to connect, we should apply the formula:
253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Eupdated = Eold * W  +  Enew * (1 - W)
263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// If we visit the containing url, but don't end up needing a connection, then
273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Enew == 0, so we use the formula:
283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Eupdated = Eold * W
293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// To achieve the above updating algorithm, we end up doing the multiplication
303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// by W every time we contemplate doing a preconnection (i.e., when we navigate
31c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// to the containing URL, and consider doing a preconnection), and then IFF we
32c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// learn that we really needed a connection to the subresource, we complete the
33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// above algorithm by adding the (1 - W) for each connection we make.
34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
35c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// We weight the new expected value by a factor which is in the range of 0.0 to
36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// 1.0.
37ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstatic const double kWeightingForOldConnectsExpectedValue = 0.66;
38c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
393345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// To estimate the expected value of the number of connections that we'll need
40ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// when a referrer is navigated to, we start with the following low initial
41ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// value.
42ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Each time we do indeed (again) need the subresource, this value will get
43ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// increased.
44ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Each time we navigate to the refererrer but never end up needing this
45ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// subresource, the value will decrease.
463345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Very conservative is 0.0, which will mean that we have to wait for a while
47ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// before doing much speculative acvtivity.  We do persist results, so we'll
48ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// save the asymptotic (correct?) learned answer in the long run.
49ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Some browsers blindly make 2 connections all the time, so we'll use that as
50ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// a starting point.
51ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstatic const double kInitialConnectsExpectedValue = 2.0;
52c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
53ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenReferrer::Referrer() : use_count_(1) {}
54c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid Referrer::SuggestHost(const GURL& url) {
56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Limit how large our list can get, in case we make mistakes about what
57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // hostnames are in sub-resources (example: Some advertisments have a link to
58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // the ad agency, and then provide a "surprising" redirect to the advertised
59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // entity, which then (mistakenly) appears to be a subresource on the page
60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // hosting the ad).
61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // TODO(jar): Do experiments to optimize the max count of suggestions.
62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  static const size_t kMaxSuggestions = 10;
63c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!url.has_host())  // TODO(jar): Is this really needed????
65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  DCHECK(url == url.GetWithEmptyPath());
67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  SubresourceMap::iterator it = find(url);
68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (it != end()) {
69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    it->second.SubresourceIsNeeded();
70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (kMaxSuggestions <= size()) {
74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    DeleteLeastUseful();
75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    DCHECK(kMaxSuggestions > size());
76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  (*this)[url].SubresourceIsNeeded();
78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid Referrer::DeleteLeastUseful() {
81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Find the item with the lowest value.  Most important is preconnection_rate,
823345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // and least is lifetime (age).
83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  GURL least_useful_url;
84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  double lowest_rate_seen = 0.0;
85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // We use longs for durations because we will use multiplication on them.
86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int64 least_useful_lifetime = 0;  // Duration in milliseconds.
87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const base::Time kNow(base::Time::Now());  // Avoid multiple calls.
89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (SubresourceMap::iterator it = begin(); it != end(); ++it) {
90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    int64 lifetime = (kNow - it->second.birth_time()).InMilliseconds();
91c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    double rate = it->second.subresource_use_rate();
92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (least_useful_url.has_host()) {
93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      if (rate > lowest_rate_seen)
94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        continue;
953345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      if (lifetime <= least_useful_lifetime)
963345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick        continue;
97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    least_useful_url = it->first;
99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    lowest_rate_seen = rate;
100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    least_useful_lifetime = lifetime;
101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
1023345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  if (least_useful_url.has_host())
1033345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    erase(least_useful_url);
104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
106ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool Referrer::Trim(double reduce_rate, double threshold) {
1073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  std::vector<GURL> discarded_urls;
108ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  for (SubresourceMap::iterator it = begin(); it != end(); ++it) {
109ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (!it->second.Trim(reduce_rate, threshold))
1103345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      discarded_urls.push_back(it->first);
111ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
1123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  for (size_t i = 0; i < discarded_urls.size(); ++i)
1133345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    erase(discarded_urls[i]);
1143345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  return size() > 0;
115c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
116c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
117ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool ReferrerValue::Trim(double reduce_rate, double threshold) {
118ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  subresource_use_rate_ *= reduce_rate;
119ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return subresource_use_rate_ > threshold;
120c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
123c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid Referrer::Deserialize(const Value& value) {
124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (value.GetType() != Value::TYPE_LIST)
125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const ListValue* subresource_list(static_cast<const ListValue*>(&value));
127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size_t index = 0;  // Bounds checking is done by subresource_list->Get*().
128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  while (true) {
129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    std::string url_spec;
130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (!subresource_list->GetString(index++, &url_spec))
131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return;
132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    double rate;
13372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen    if (!subresource_list->GetDouble(index++, &rate))
134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return;
135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    GURL url(url_spec);
137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // TODO(jar): We could be more direct, and change birth date or similar to
138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // show that this is a resurrected value we're adding in.  I'm not yet sure
139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // of how best to optimize the learning and pruning (Trim) algorithm at this
140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // level, so for now, we just suggest subresources, which leaves them all
141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // with the same birth date (typically start of process).
142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    SuggestHost(url);
143c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    (*this)[url].SetSubresourceUseRate(rate);
144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
145c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
147c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochValue* Referrer::Serialize() const {
148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ListValue* subresource_list(new ListValue);
149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (const_iterator it = begin(); it != end(); ++it) {
150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    StringValue* url_spec(new StringValue(it->first.spec()));
151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    FundamentalValue* rate(new FundamentalValue(
152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        it->second.subresource_use_rate()));
153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    subresource_list->Append(url_spec);
155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    subresource_list->Append(rate);
156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return subresource_list;
158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//------------------------------------------------------------------------------
161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
162c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochReferrerValue::ReferrerValue()
163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    : birth_time_(base::Time::Now()),
164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      navigation_count_(0),
165c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      preconnection_count_(0),
1663345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      preresolution_count_(0),
167ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      subresource_use_rate_(kInitialConnectsExpectedValue) {
168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid ReferrerValue::SubresourceIsNeeded() {
171ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  DCHECK_GE(kWeightingForOldConnectsExpectedValue, 0);
172ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  DCHECK_LE(kWeightingForOldConnectsExpectedValue, 1.0);
173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ++navigation_count_;
174ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  subresource_use_rate_ += 1 - kWeightingForOldConnectsExpectedValue;
175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
1773345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid ReferrerValue::ReferrerWasObserved() {
178ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  subresource_use_rate_ *= kWeightingForOldConnectsExpectedValue;
179c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Note: the use rate is temporarilly possibly incorect, as we need to find
180c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // out if we really end up connecting.  This will happen in a few hundred
181c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // milliseconds (when content arrives, etc.).
1823345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Value of subresource_use_rate_ should be sampled before this call.
183c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
184c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
185c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}  // namespace chrome_browser_net
186