15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/net/referrer.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <limits.h>
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/compiler_specific.h"
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"
119ab5563a3196760eb381d102cbb2bc0f7abc6a50Ben Murdoch#include "base/message_loop/message_loop.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/values.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/net/predictor.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace chrome_browser_net {
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Smoothing parameter for updating subresource_use_rate_.
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We always combine our old expected value, weighted by some factor W (we use
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// kWeightingForOldConnectsExpectedValue), with the new expected value Enew.
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The new "expected value" is the number of actual connections made due to the
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// current navigations.
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// That means that IF we end up needing to connect, we should apply the formula:
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Eupdated = Eold * W  +  Enew * (1 - W)
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If we visit the containing url, but don't end up needing a connection, then
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Enew == 0, so we use the formula:
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Eupdated = Eold * W
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// To achieve the above updating algorithm, we end up doing the multiplication
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// by W every time we contemplate doing a preconnection (i.e., when we navigate
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// to the containing URL, and consider doing a preconnection), and then IFF we
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// learn that we really needed a connection to the subresource, we complete the
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// above algorithm by adding the (1 - W) for each connection we make.
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We weight the new expected value by a factor which is in the range of 0.0 to
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1.0.
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const double kWeightingForOldConnectsExpectedValue = 0.66;
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// To estimate the expected value of the number of connections that we'll need
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// when a referrer is navigated to, we start with the following low initial
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// value.
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Each time we do indeed (again) need the subresource, this value will get
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// increased.
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Each time we navigate to the refererrer but never end up needing this
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// subresource, the value will decrease.
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Very conservative is 0.0, which will mean that we have to wait for a while
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// before doing much speculative acvtivity.  We do persist results, so we'll
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// save the asymptotic (correct?) learned answer in the long run.
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Some browsers blindly make 2 connections all the time, so we'll use that as
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// a starting point.
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const double kInitialConnectsExpectedValue = 2.0;
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Referrer::Referrer() : use_count_(1) {}
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void Referrer::SuggestHost(const GURL& url) {
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Limit how large our list can get, in case we make mistakes about what
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // hostnames are in sub-resources (example: Some advertisments have a link to
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the ad agency, and then provide a "surprising" redirect to the advertised
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // entity, which then (mistakenly) appears to be a subresource on the page
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // hosting the ad).
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(jar): Do experiments to optimize the max count of suggestions.
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const size_t kMaxSuggestions = 10;
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!url.has_host())  // TODO(jar): Is this really needed????
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(url == url.GetWithEmptyPath());
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SubresourceMap::iterator it = find(url);
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (it != end()) {
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    it->second.SubresourceIsNeeded();
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (kMaxSuggestions <= size()) {
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DeleteLeastUseful();
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DCHECK(kMaxSuggestions > size());
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  (*this)[url].SubresourceIsNeeded();
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void Referrer::DeleteLeastUseful() {
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Find the item with the lowest value.  Most important is preconnection_rate,
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // and least is lifetime (age).
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GURL least_useful_url;
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  double lowest_rate_seen = 0.0;
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We use longs for durations because we will use multiplication on them.
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int64 least_useful_lifetime = 0;  // Duration in milliseconds.
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const base::Time kNow(base::Time::Now());  // Avoid multiple calls.
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (SubresourceMap::iterator it = begin(); it != end(); ++it) {
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int64 lifetime = (kNow - it->second.birth_time()).InMilliseconds();
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    double rate = it->second.subresource_use_rate();
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (least_useful_url.has_host()) {
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (rate > lowest_rate_seen)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (lifetime <= least_useful_lifetime)
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    least_useful_url = it->first;
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lowest_rate_seen = rate;
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    least_useful_lifetime = lifetime;
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (least_useful_url.has_host())
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    erase(least_useful_url);
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool Referrer::Trim(double reduce_rate, double threshold) {
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<GURL> discarded_urls;
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (SubresourceMap::iterator it = begin(); it != end(); ++it) {
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!it->second.Trim(reduce_rate, threshold))
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      discarded_urls.push_back(it->first);
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t i = 0; i < discarded_urls.size(); ++i)
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    erase(discarded_urls[i]);
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return size() > 0;
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ReferrerValue::Trim(double reduce_rate, double threshold) {
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  subresource_use_rate_ *= reduce_rate;
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return subresource_use_rate_ > threshold;
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void Referrer::Deserialize(const base::Value& value) {
1245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (value.GetType() != base::Value::TYPE_LIST)
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
1265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  const base::ListValue* subresource_list(
1275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      static_cast<const base::ListValue*>(&value));
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t index = 0;  // Bounds checking is done by subresource_list->Get*().
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (true) {
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::string url_spec;
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!subresource_list->GetString(index++, &url_spec))
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    double rate;
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!subresource_list->GetDouble(index++, &rate))
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    GURL url(url_spec);
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // TODO(jar): We could be more direct, and change birth date or similar to
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // show that this is a resurrected value we're adding in.  I'm not yet sure
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // of how best to optimize the learning and pruning (Trim) algorithm at this
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // level, so for now, we just suggest subresources, which leaves them all
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // with the same birth date (typically start of process).
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SuggestHost(url);
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    (*this)[url].SetSubresourceUseRate(rate);
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)base::Value* Referrer::Serialize() const {
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::ListValue* subresource_list(new base::ListValue);
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (const_iterator it = begin(); it != end(); ++it) {
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::StringValue* url_spec(new base::StringValue(it->first.spec()));
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::FundamentalValue* rate(new base::FundamentalValue(
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        it->second.subresource_use_rate()));
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    subresource_list->Append(url_spec);
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    subresource_list->Append(rate);
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return subresource_list;
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ReferrerValue::ReferrerValue()
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : birth_time_(base::Time::Now()),
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      navigation_count_(0),
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      preconnection_count_(0),
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      preresolution_count_(0),
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      subresource_use_rate_(kInitialConnectsExpectedValue) {
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ReferrerValue::SubresourceIsNeeded() {
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_GE(kWeightingForOldConnectsExpectedValue, 0);
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK_LE(kWeightingForOldConnectsExpectedValue, 1.0);
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ++navigation_count_;
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  subresource_use_rate_ += 1 - kWeightingForOldConnectsExpectedValue;
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ReferrerValue::ReferrerWasObserved() {
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  subresource_use_rate_ *= kWeightingForOldConnectsExpectedValue;
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Note: the use rate is temporarilly possibly incorect, as we need to find
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // out if we really end up connecting.  This will happen in a few hundred
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // milliseconds (when content arrives, etc.).
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Value of subresource_use_rate_ should be sampled before this call.
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace chrome_browser_net
187