1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be 3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file. 4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/net/referrer.h" 6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <limits.h> 8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 9ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/compiler_specific.h" 10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/logging.h" 11ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/message_loop.h" 123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/values.h" 133345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "chrome/browser/net/predictor.h" 14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace chrome_browser_net { 16c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//------------------------------------------------------------------------------ 18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Smoothing parameter for updating subresource_use_rate_. 19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// We always combine our old expected value, weighted by some factor W (we use 21ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// kWeightingForOldConnectsExpectedValue), with the new expected value Enew. 22ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// The new "expected value" is the number of actual connections made due to the 23ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// current navigations. 24c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// That means that IF we end up needing to connect, we should apply the formula: 253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Eupdated = Eold * W + Enew * (1 - W) 263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// If we visit the containing url, but don't end up needing a connection, then 273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Enew == 0, so we use the formula: 283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Eupdated = Eold * W 293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// To achieve the above updating algorithm, we end up doing the multiplication 303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// by W every time we contemplate doing a preconnection (i.e., when we navigate 31c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// to the containing URL, and consider doing a preconnection), and then IFF we 32c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// learn that we really needed a connection to the subresource, we complete the 33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// above algorithm by adding the (1 - W) for each connection we make. 34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 35c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// We weight the new expected value by a factor which is in the range of 0.0 to 36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// 1.0. 37ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstatic const double kWeightingForOldConnectsExpectedValue = 0.66; 38c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 393345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// To estimate the expected value of the number of connections that we'll need 40ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// when a referrer is navigated to, we start with the following low initial 41ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// value. 42ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Each time we do indeed (again) need the subresource, this value will get 43ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// increased. 44ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Each time we navigate to the refererrer but never end up needing this 45ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// subresource, the value will decrease. 463345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Very conservative is 0.0, which will mean that we have to wait for a while 47ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// before doing much speculative acvtivity. We do persist results, so we'll 48ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// save the asymptotic (correct?) learned answer in the long run. 49ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Some browsers blindly make 2 connections all the time, so we'll use that as 50ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// a starting point. 51ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstatic const double kInitialConnectsExpectedValue = 2.0; 52c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 53ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenReferrer::Referrer() : use_count_(1) {} 54c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid Referrer::SuggestHost(const GURL& url) { 56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Limit how large our list can get, in case we make mistakes about what 57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // hostnames are in sub-resources (example: Some advertisments have a link to 58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // the ad agency, and then provide a "surprising" redirect to the advertised 59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // entity, which then (mistakenly) appears to be a subresource on the page 60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // hosting the ad). 61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // TODO(jar): Do experiments to optimize the max count of suggestions. 62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch static const size_t kMaxSuggestions = 10; 63c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!url.has_host()) // TODO(jar): Is this really needed???? 65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DCHECK(url == url.GetWithEmptyPath()); 67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch SubresourceMap::iterator it = find(url); 68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (it != end()) { 69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch it->second.SubresourceIsNeeded(); 70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (kMaxSuggestions <= size()) { 74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DeleteLeastUseful(); 75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DCHECK(kMaxSuggestions > size()); 76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch (*this)[url].SubresourceIsNeeded(); 78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid Referrer::DeleteLeastUseful() { 81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Find the item with the lowest value. Most important is preconnection_rate, 823345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // and least is lifetime (age). 83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch GURL least_useful_url; 84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch double lowest_rate_seen = 0.0; 85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // We use longs for durations because we will use multiplication on them. 86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int64 least_useful_lifetime = 0; // Duration in milliseconds. 87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const base::Time kNow(base::Time::Now()); // Avoid multiple calls. 89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (SubresourceMap::iterator it = begin(); it != end(); ++it) { 90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int64 lifetime = (kNow - it->second.birth_time()).InMilliseconds(); 91c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch double rate = it->second.subresource_use_rate(); 92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (least_useful_url.has_host()) { 93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (rate > lowest_rate_seen) 94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch continue; 953345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (lifetime <= least_useful_lifetime) 963345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick continue; 97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch least_useful_url = it->first; 99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch lowest_rate_seen = rate; 100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch least_useful_lifetime = lifetime; 101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 1023345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (least_useful_url.has_host()) 1033345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick erase(least_useful_url); 104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 106ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool Referrer::Trim(double reduce_rate, double threshold) { 1073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick std::vector<GURL> discarded_urls; 108ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen for (SubresourceMap::iterator it = begin(); it != end(); ++it) { 109ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (!it->second.Trim(reduce_rate, threshold)) 1103345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick discarded_urls.push_back(it->first); 111ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 1123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick for (size_t i = 0; i < discarded_urls.size(); ++i) 1133345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick erase(discarded_urls[i]); 1143345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick return size() > 0; 115c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 116c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 117ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool ReferrerValue::Trim(double reduce_rate, double threshold) { 118ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen subresource_use_rate_ *= reduce_rate; 119ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return subresource_use_rate_ > threshold; 120c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 123c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid Referrer::Deserialize(const Value& value) { 124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (value.GetType() != Value::TYPE_LIST) 125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const ListValue* subresource_list(static_cast<const ListValue*>(&value)); 127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch size_t index = 0; // Bounds checking is done by subresource_list->Get*(). 128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch while (true) { 129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string url_spec; 130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!subresource_list->GetString(index++, &url_spec)) 131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch double rate; 13372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen if (!subresource_list->GetDouble(index++, &rate)) 134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch GURL url(url_spec); 137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // TODO(jar): We could be more direct, and change birth date or similar to 138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // show that this is a resurrected value we're adding in. I'm not yet sure 139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // of how best to optimize the learning and pruning (Trim) algorithm at this 140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // level, so for now, we just suggest subresources, which leaves them all 141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // with the same birth date (typically start of process). 142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch SuggestHost(url); 143c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch (*this)[url].SetSubresourceUseRate(rate); 144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 145c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 147c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochValue* Referrer::Serialize() const { 148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ListValue* subresource_list(new ListValue); 149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (const_iterator it = begin(); it != end(); ++it) { 150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch StringValue* url_spec(new StringValue(it->first.spec())); 151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch FundamentalValue* rate(new FundamentalValue( 152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch it->second.subresource_use_rate())); 153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch subresource_list->Append(url_spec); 155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch subresource_list->Append(rate); 156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return subresource_list; 158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//------------------------------------------------------------------------------ 161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 162c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochReferrerValue::ReferrerValue() 163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch : birth_time_(base::Time::Now()), 164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch navigation_count_(0), 165c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch preconnection_count_(0), 1663345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick preresolution_count_(0), 167ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen subresource_use_rate_(kInitialConnectsExpectedValue) { 168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid ReferrerValue::SubresourceIsNeeded() { 171ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK_GE(kWeightingForOldConnectsExpectedValue, 0); 172ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK_LE(kWeightingForOldConnectsExpectedValue, 1.0); 173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ++navigation_count_; 174ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen subresource_use_rate_ += 1 - kWeightingForOldConnectsExpectedValue; 175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 1773345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid ReferrerValue::ReferrerWasObserved() { 178ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen subresource_use_rate_ *= kWeightingForOldConnectsExpectedValue; 179c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Note: the use rate is temporarilly possibly incorect, as we need to find 180c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // out if we really end up connecting. This will happen in a few hundred 181c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // milliseconds (when content arrives, etc.). 1823345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Value of subresource_use_rate_ should be sampled before this call. 183c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 184c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 185c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} // namespace chrome_browser_net 186