15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/net/referrer.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <limits.h> 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/compiler_specific.h" 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h" 119ab5563a3196760eb381d102cbb2bc0f7abc6a50Ben Murdoch#include "base/message_loop/message_loop.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/values.h" 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/net/predictor.h" 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace chrome_browser_net { 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------ 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Smoothing parameter for updating subresource_use_rate_. 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We always combine our old expected value, weighted by some factor W (we use 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// kWeightingForOldConnectsExpectedValue), with the new expected value Enew. 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The new "expected value" is the number of actual connections made due to the 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// current navigations. 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// That means that IF we end up needing to connect, we should apply the formula: 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Eupdated = Eold * W + Enew * (1 - W) 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If we visit the containing url, but don't end up needing a connection, then 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Enew == 0, so we use the formula: 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Eupdated = Eold * W 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// To achieve the above updating algorithm, we end up doing the multiplication 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// by W every time we contemplate doing a preconnection (i.e., when we navigate 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// to the containing URL, and consider doing a preconnection), and then IFF we 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// learn that we really needed a connection to the subresource, we complete the 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// above algorithm by adding the (1 - W) for each connection we make. 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We weight the new expected value by a factor which is in the range of 0.0 to 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1.0. 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const double kWeightingForOldConnectsExpectedValue = 0.66; 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// To estimate the expected value of the number of connections that we'll need 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// when a referrer is navigated to, we start with the following low initial 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// value. 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Each time we do indeed (again) need the subresource, this value will get 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// increased. 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Each time we navigate to the refererrer but never end up needing this 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// subresource, the value will decrease. 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Very conservative is 0.0, which will mean that we have to wait for a while 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// before doing much speculative acvtivity. We do persist results, so we'll 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// save the asymptotic (correct?) learned answer in the long run. 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Some browsers blindly make 2 connections all the time, so we'll use that as 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// a starting point. 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const double kInitialConnectsExpectedValue = 2.0; 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Referrer::Referrer() : use_count_(1) {} 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void Referrer::SuggestHost(const GURL& url) { 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Limit how large our list can get, in case we make mistakes about what 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // hostnames are in sub-resources (example: Some advertisments have a link to 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the ad agency, and then provide a "surprising" redirect to the advertised 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // entity, which then (mistakenly) appears to be a subresource on the page 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // hosting the ad). 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TODO(jar): Do experiments to optimize the max count of suggestions. 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const size_t kMaxSuggestions = 10; 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!url.has_host()) // TODO(jar): Is this really needed???? 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK(url == url.GetWithEmptyPath()); 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SubresourceMap::iterator it = find(url); 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (it != end()) { 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) it->second.SubresourceIsNeeded(); 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (kMaxSuggestions <= size()) { 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DeleteLeastUseful(); 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK(kMaxSuggestions > size()); 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (*this)[url].SubresourceIsNeeded(); 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void Referrer::DeleteLeastUseful() { 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Find the item with the lowest value. Most important is preconnection_rate, 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // and least is lifetime (age). 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GURL least_useful_url; 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) double lowest_rate_seen = 0.0; 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We use longs for durations because we will use multiplication on them. 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int64 least_useful_lifetime = 0; // Duration in milliseconds. 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const base::Time kNow(base::Time::Now()); // Avoid multiple calls. 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (SubresourceMap::iterator it = begin(); it != end(); ++it) { 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int64 lifetime = (kNow - it->second.birth_time()).InMilliseconds(); 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) double rate = it->second.subresource_use_rate(); 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (least_useful_url.has_host()) { 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (rate > lowest_rate_seen) 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (lifetime <= least_useful_lifetime) 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) least_useful_url = it->first; 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lowest_rate_seen = rate; 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) least_useful_lifetime = lifetime; 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (least_useful_url.has_host()) 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) erase(least_useful_url); 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool Referrer::Trim(double reduce_rate, double threshold) { 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<GURL> discarded_urls; 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (SubresourceMap::iterator it = begin(); it != end(); ++it) { 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!it->second.Trim(reduce_rate, threshold)) 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) discarded_urls.push_back(it->first); 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t i = 0; i < discarded_urls.size(); ++i) 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) erase(discarded_urls[i]); 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return size() > 0; 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ReferrerValue::Trim(double reduce_rate, double threshold) { 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) subresource_use_rate_ *= reduce_rate; 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return subresource_use_rate_ > threshold; 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void Referrer::Deserialize(const base::Value& value) { 1245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (value.GetType() != base::Value::TYPE_LIST) 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) const base::ListValue* subresource_list( 1275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) static_cast<const base::ListValue*>(&value)); 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t index = 0; // Bounds checking is done by subresource_list->Get*(). 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (true) { 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string url_spec; 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!subresource_list->GetString(index++, &url_spec)) 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) double rate; 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!subresource_list->GetDouble(index++, &rate)) 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GURL url(url_spec); 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TODO(jar): We could be more direct, and change birth date or similar to 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // show that this is a resurrected value we're adding in. I'm not yet sure 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // of how best to optimize the learning and pruning (Trim) algorithm at this 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // level, so for now, we just suggest subresources, which leaves them all 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // with the same birth date (typically start of process). 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SuggestHost(url); 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (*this)[url].SetSubresourceUseRate(rate); 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)base::Value* Referrer::Serialize() const { 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::ListValue* subresource_list(new base::ListValue); 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (const_iterator it = begin(); it != end(); ++it) { 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::StringValue* url_spec(new base::StringValue(it->first.spec())); 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::FundamentalValue* rate(new base::FundamentalValue( 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) it->second.subresource_use_rate())); 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) subresource_list->Append(url_spec); 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) subresource_list->Append(rate); 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return subresource_list; 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------ 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ReferrerValue::ReferrerValue() 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : birth_time_(base::Time::Now()), 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) navigation_count_(0), 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) preconnection_count_(0), 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) preresolution_count_(0), 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) subresource_use_rate_(kInitialConnectsExpectedValue) { 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ReferrerValue::SubresourceIsNeeded() { 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK_GE(kWeightingForOldConnectsExpectedValue, 0); 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK_LE(kWeightingForOldConnectsExpectedValue, 1.0); 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++navigation_count_; 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) subresource_use_rate_ += 1 - kWeightingForOldConnectsExpectedValue; 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ReferrerValue::ReferrerWasObserved() { 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) subresource_use_rate_ *= kWeightingForOldConnectsExpectedValue; 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Note: the use rate is temporarilly possibly incorect, as we need to find 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // out if we really end up connecting. This will happen in a few hundred 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // milliseconds (when content arrives, etc.). 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Value of subresource_use_rate_ should be sampled before this call. 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace chrome_browser_net 187