15d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "components/url_matcher/url_matcher.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <algorithm> 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <iterator> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h" 111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "base/profiler/scoped_profile.h" 127dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include "url/gurl.h" 137dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include "url/url_canon.h" 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)namespace url_matcher { 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This set of classes implement a mapping of URL Component Patterns, such as 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// host_prefix, host_suffix, host_equals, ..., etc., to StringPatterns 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// for use in substring comparisons. 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The idea of this mapping is to reduce the problem of comparing many 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URL Component Patterns against one URL to the problem of searching many 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// substrings in one string: 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ---------------------- ----------------- 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// | URL Query operator | ----translate----> | StringPattern | 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ---------------------- ----------------- 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ^ 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// | 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// compare 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// | 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// v 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ---------------------- ----------------- 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// | URL to compare | | | 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// | to all URL Query | ----translate----> | String | 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// | operators | | | 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ---------------------- ----------------- 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The reason for this problem reduction is that there are efficient algorithms 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// for searching many substrings in one string (see Aho-Corasick algorithm). 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Additionally, some of the same pieces are reused to implement regular 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// expression comparisons. The FilteredRE2 implementation for matching many 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regular expressions against one string uses prefiltering, in which a set 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// of substrings (derived from the regexes) are first searched for, to reduce 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the number of regular expressions to test; the prefiltering step also 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// uses Aho-Corasick. 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Case 1: {host,path,query}_{prefix,suffix,equals} searches. 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ========================================================== 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// For searches in this class, we normalize URLs as follows: 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Step 1: 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Remove scheme, port and segment from URL: 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> http://www.example.com:8080/index.html?search=foo#first_match becomes 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// www.example.com/index.html?search=foo 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We remove the scheme and port number because they can be checked later 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// in a secondary filter step. We remove the segment (the #... part) because 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// this is not guaranteed to be ASCII-7 encoded. 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Step 2: 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Translate URL to String and add the following position markers: 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - BU = Beginning of URL 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - ED = End of Domain 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - EP = End of Path 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - EU = End of URL 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Furthermore, the hostname is canonicalized to start with a ".". 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Position markers are represented as characters >127, which are therefore 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// guaranteed not to be part of the ASCII-7 encoded URL character set. 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> www.example.com/index.html?search=foo becomes 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// BU .www.example.com ED /index.html EP ?search=foo EU 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> www.example.com/index.html becomes 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// BU .www.example.com ED /index.html EP EU 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Step 3: 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Translate URL Component Patterns as follows: 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// host_prefix(prefix) = BU add_missing_dot_prefix(prefix) 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> host_prefix("www.example") = BU .www.example 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// host_suffix(suffix) = suffix ED 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> host_suffix("example.com") = example.com ED 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> host_suffix(".example.com") = .example.com ED 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// host_equals(domain) = BU add_missing_dot_prefix(domain) ED 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> host_equals("www.example.com") = BU .www.example.com ED 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Similarly for path query parameters ({path, query}_{prefix, suffix, equals}). 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// With this, we can search the StringPatterns in the normalized URL. 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Case 2: url_{prefix,suffix,equals,contains} searches. 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ===================================================== 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Step 1: as above, except that 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - the scheme is not removed 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - the port is not removed if it is specified and does not match the default 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// port for the given scheme. 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Step 2: 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Translate URL to String and add the following position markers: 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - BU = Beginning of URL 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - EU = End of URL 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> http://www.example.com:8080/index.html?search=foo#first_match becomes 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// BU http://www.example.com:8080/index.html?search=foo EU 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> http://www.example.com:80/index.html?search=foo#first_match becomes 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// BU http://www.example.com/index.html?search=foo EU 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// url_prefix(prefix) = BU prefix 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> url_prefix("http://www.example") = BU http://www.example 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// url_contains(substring) = substring 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> url_contains("index") = index 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Case 3: {host,path,query}_contains searches. 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ============================================ 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// These kinds of searches are not supported directly but can be derived 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// by a combination of a url_contains() query followed by an explicit test: 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// host_contains(str) = url_contains(str) followed by test whether str occurs 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// in host component of original URL. 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> host_contains("example.co") = example.co 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// followed by gurl.host().find("example.co"); 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// [similarly for path_contains and query_contains]. 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Regular expression matching (url_matches searches) 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ================================================== 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This class also supports matching regular expressions (RE2 syntax) 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// against full URLs, which are transformed as in case 2. 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace { 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool IsRegexCriterion(URLMatcherCondition::Criterion criterion) { 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return criterion == URLMatcherCondition::URL_MATCHES; 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 149c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool IsOriginAndPathRegexCriterion(URLMatcherCondition::Criterion criterion) { 150c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return criterion == URLMatcherCondition::ORIGIN_AND_PATH_MATCHES; 151c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 152c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URLMatcherCondition 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition::URLMatcherCondition() 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : criterion_(HOST_PREFIX), 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string_pattern_(NULL) {} 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition::~URLMatcherCondition() {} 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition::URLMatcherCondition( 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Criterion criterion, 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPattern* string_pattern) 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : criterion_(criterion), 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string_pattern_(string_pattern) {} 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs) 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : criterion_(rhs.criterion_), 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string_pattern_(rhs.string_pattern_) {} 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition& URLMatcherCondition::operator=( 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const URLMatcherCondition& rhs) { 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) criterion_ = rhs.criterion_; 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string_pattern_ = rhs.string_pattern_; 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return *this; 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const { 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (criterion_ < rhs.criterion_) return true; 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (criterion_ > rhs.criterion_) return false; 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (string_pattern_ != NULL && rhs.string_pattern_ != NULL) 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return *string_pattern_ < *rhs.string_pattern_; 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) return true; 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL, 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // or both are NULL. 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherCondition::IsFullURLCondition() const { 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // For these criteria the SubstringMatcher needs to be executed on the 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // GURL that is canonicalized with 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // URLMatcherConditionFactory::CanonicalizeURLForFullSearches. 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (criterion_) { 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case HOST_CONTAINS: 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case PATH_CONTAINS: 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case QUERY_CONTAINS: 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case URL_PREFIX: 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case URL_SUFFIX: 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case URL_CONTAINS: 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case URL_EQUALS: 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherCondition::IsRegexCondition() const { 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return IsRegexCriterion(criterion_); 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 216c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool URLMatcherCondition::IsOriginAndPathRegexCondition() const { 217c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return IsOriginAndPathRegexCriterion(criterion_); 218c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 219c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherCondition::IsMatch( 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::set<StringPattern::ID>& matching_patterns, 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const GURL& url) const { 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK(string_pattern_); 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!ContainsKey(matching_patterns, string_pattern_->id())) 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // a substring match on the raw URL. In case of a match, we need to verify 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // that the match was found in the correct component of the URL. 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (criterion_) { 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case HOST_CONTAINS: 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return url.host().find(string_pattern_->pattern()) != 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string::npos; 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case PATH_CONTAINS: 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return url.path().find(string_pattern_->pattern()) != 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string::npos; 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case QUERY_CONTAINS: 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return url.query().find(string_pattern_->pattern()) != 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string::npos; 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URLMatcherConditionFactory 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace { 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// These are symbols that are not contained in 7-bit ASCII used in GURLs. 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const char kBeginningOfURL[] = {static_cast<char>(-1), 0}; 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const char kEndOfDomain[] = {static_cast<char>(-2), 0}; 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const char kEndOfPath[] = {static_cast<char>(-3), 0}; 254a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochconst char kQueryComponentDelimiter[] = {static_cast<char>(-4), 0}; 255a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochconst char kEndOfURL[] = {static_cast<char>(-5), 0}; 256a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 257a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// The delimiter for query parameters 258a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochconst char kQuerySeparator = '&'; 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {} 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherConditionFactory::~URLMatcherConditionFactory() { 2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) STLDeleteElements(&substring_pattern_singletons_); 2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) STLDeleteElements(®ex_pattern_singletons_); 266c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) STLDeleteElements(&origin_and_path_regex_pattern_singletons_); 2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches( 2702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const GURL& url) const { 2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain + 272c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) url.path() + kEndOfPath + 273a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch (url.has_query() ? CanonicalizeQuery(url.query(), true, true) 274a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch : std::string()) + 275a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch kEndOfURL; 2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition( 2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& prefix) { 2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::HOST_PREFIX, 2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kBeginningOfURL + CanonicalizeHostname(prefix)); 2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateHostSuffixCondition( 2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& suffix) { 2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::HOST_SUFFIX, 2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) suffix + kEndOfDomain); 2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateHostContainsCondition( 2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& str) { 2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::HOST_CONTAINS, str); 2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateHostEqualsCondition( 2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& str) { 2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::HOST_EQUALS, 2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kBeginningOfURL + CanonicalizeHostname(str) + kEndOfDomain); 2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreatePathPrefixCondition( 3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& prefix) { 3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::PATH_PREFIX, 3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEndOfDomain + prefix); 3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreatePathSuffixCondition( 3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& suffix) { 3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::PATH_SUFFIX, suffix + kEndOfPath); 3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreatePathContainsCondition( 3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& str) { 3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::PATH_CONTAINS, str); 3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreatePathEqualsCondition( 3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& str) { 3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::PATH_EQUALS, 3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEndOfDomain + str + kEndOfPath); 3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateQueryPrefixCondition( 3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& prefix) { 3252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) std::string pattern; 3262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!prefix.empty() && prefix[0] == '?') 327a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch pattern = kEndOfPath + CanonicalizeQuery(prefix.substr(1), true, false); 3282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else 329a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch pattern = kEndOfPath + CanonicalizeQuery(prefix, true, false); 3302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return CreateCondition(URLMatcherCondition::QUERY_PREFIX, pattern); 3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateQuerySuffixCondition( 3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& suffix) { 3362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!suffix.empty() && suffix[0] == '?') { 3372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return CreateQueryEqualsCondition(suffix); 3382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } else { 3392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return CreateCondition(URLMatcherCondition::QUERY_SUFFIX, 340a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch CanonicalizeQuery(suffix, false, true) + kEndOfURL); 3412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateQueryContainsCondition( 3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& str) { 3462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!str.empty() && str[0] == '?') 3472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return CreateQueryPrefixCondition(str); 3482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else 3492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return CreateCondition(URLMatcherCondition::QUERY_CONTAINS, str); 3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateQueryEqualsCondition( 3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& str) { 3542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) std::string pattern; 3552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!str.empty() && str[0] == '?') 356a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch pattern = 357a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch kEndOfPath + CanonicalizeQuery(str.substr(1), true, true) + kEndOfURL; 3582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else 359a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch pattern = kEndOfPath + CanonicalizeQuery(str, true, true) + kEndOfURL; 3602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return CreateCondition(URLMatcherCondition::QUERY_EQUALS, pattern); 3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition 3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) URLMatcherConditionFactory::CreateHostSuffixPathPrefixCondition( 3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& host_suffix, 3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& path_prefix) { 3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::HOST_SUFFIX_PATH_PREFIX, 3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) host_suffix + kEndOfDomain + path_prefix); 3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition 3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition( 3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& host, 3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& path_prefix) { 3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX, 3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain + 3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) path_prefix); 3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches( 3822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const GURL& url) const { 3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GURL::Replacements replacements; 3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) replacements.ClearPassword(); 3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) replacements.ClearUsername(); 3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) replacements.ClearRef(); 3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Clear port if it is implicit from scheme. 3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (url.has_port()) { 3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& port = url.scheme(); 390010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) if (url::DefaultPortForScheme(port.c_str(), port.size()) == 391010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) url.EffectiveIntPort()) { 3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) replacements.ClearPort(); 3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return kBeginningOfURL + url.ReplaceComponents(replacements).spec() + 3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kEndOfURL; 3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 399c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)static std::string CanonicalizeURLForRegexSearchesHelper( 400c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const GURL& url, 401c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool clear_query) { 4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GURL::Replacements replacements; 4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) replacements.ClearPassword(); 4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) replacements.ClearUsername(); 4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) replacements.ClearRef(); 406c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (clear_query) 407c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) replacements.ClearQuery(); 4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Clear port if it is implicit from scheme. 4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (url.has_port()) { 4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& port = url.scheme(); 411010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) if (url::DefaultPortForScheme(port.c_str(), port.size()) == 412010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) url.EffectiveIntPort()) { 4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) replacements.ClearPort(); 4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return url.ReplaceComponents(replacements).spec(); 4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 419c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches( 420c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const GURL& url) const { 421c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return CanonicalizeURLForRegexSearchesHelper(url, false); 422c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 423c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 424c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)std::string 425c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)URLMatcherConditionFactory::CanonicalizeURLForOriginAndPathRegexSearches( 426c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const GURL& url) const { 427c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return CanonicalizeURLForRegexSearchesHelper(url, true); 428c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 429c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition( 4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& prefix) { 4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::URL_PREFIX, 4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kBeginningOfURL + prefix); 4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition( 4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& suffix) { 4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL); 4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition( 4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& str) { 4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::URL_CONTAINS, str); 4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition( 4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& str) { 4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::URL_EQUALS, 4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kBeginningOfURL + str + kEndOfURL); 4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateURLMatchesCondition( 4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& regex) { 4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return CreateCondition(URLMatcherCondition::URL_MATCHES, regex); 4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 457c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)URLMatcherCondition 458c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)URLMatcherConditionFactory::CreateOriginAndPathMatchesCondition( 459c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const std::string& regex) { 460c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return CreateCondition(URLMatcherCondition::ORIGIN_AND_PATH_MATCHES, regex); 461c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)} 462c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcherConditionFactory::ForgetUnusedPatterns( 4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::set<StringPattern::ID>& used_patterns) { 4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PatternSingletons::iterator i = substring_pattern_singletons_.begin(); 4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (i != substring_pattern_singletons_.end()) { 467c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (ContainsKey(used_patterns, (*i)->id())) { 4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++i; 4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete *i; 4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) substring_pattern_singletons_.erase(i++); 4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i = regex_pattern_singletons_.begin(); 4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (i != regex_pattern_singletons_.end()) { 476c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (ContainsKey(used_patterns, (*i)->id())) { 4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++i; 4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete *i; 4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) regex_pattern_singletons_.erase(i++); 4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 483c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) i = origin_and_path_regex_pattern_singletons_.begin(); 484c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) while (i != origin_and_path_regex_pattern_singletons_.end()) { 485c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (ContainsKey(used_patterns, (*i)->id())) { 486c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ++i; 487c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else { 488c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) delete *i; 489c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) origin_and_path_regex_pattern_singletons_.erase(i++); 490c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 491c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherConditionFactory::IsEmpty() const { 4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return substring_pattern_singletons_.empty() && 496c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) regex_pattern_singletons_.empty() && 497c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) origin_and_path_regex_pattern_singletons_.empty(); 4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateCondition( 5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) URLMatcherCondition::Criterion criterion, 5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& pattern) { 5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPattern search_pattern(pattern, 0); 504c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) PatternSingletons* pattern_singletons = NULL; 505c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (IsRegexCriterion(criterion)) 506c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) pattern_singletons = ®ex_pattern_singletons_; 507c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) else if (IsOriginAndPathRegexCriterion(criterion)) 508c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) pattern_singletons = &origin_and_path_regex_pattern_singletons_; 509c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) else 510c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) pattern_singletons = &substring_pattern_singletons_; 5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PatternSingletons::const_iterator iter = 5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pattern_singletons->find(&search_pattern); 5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (iter != pattern_singletons->end()) { 5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return URLMatcherCondition(criterion, *iter); 5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPattern* new_pattern = 5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) new StringPattern(pattern, id_counter_++); 5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pattern_singletons->insert(new_pattern); 5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return URLMatcherCondition(criterion, new_pattern); 5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string URLMatcherConditionFactory::CanonicalizeHostname( 5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string& hostname) const { 5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!hostname.empty() && hostname[0] == '.') 5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return hostname; 5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return "." + hostname; 5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 533a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// This function prepares the query string by replacing query separator with a 534a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// magic value (|kQueryComponentDelimiter|). When the boolean 535a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// |prepend_beginning_of_query_component| is true the function prepends the 536a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// query with the same magic. This is done to locate the start of a key value 537a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// pair in the query string. The parameter |query| is passed by value 538a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// intentionally, since it is locally modified. 539a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochstd::string URLMatcherConditionFactory::CanonicalizeQuery( 540a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch std::string query, 541a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch bool prepend_beginning_of_query_component, 542a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch bool append_end_of_query_component) const { 543a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch for (std::string::iterator it = query.begin(); it != query.end(); ++it) { 544a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (*it == kQuerySeparator) 545a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch *it = kQueryComponentDelimiter[0]; 546a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 547a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (prepend_beginning_of_query_component) 548a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch query = kQueryComponentDelimiter + query; 549a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (append_end_of_query_component) 550a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch query += kQueryComponentDelimiter; 551a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return query; 552a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch} 553a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherConditionFactory::StringPatternPointerCompare::operator()( 5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPattern* lhs, 5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPattern* rhs) const { 5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (lhs == NULL && rhs != NULL) return true; 5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (lhs != NULL && rhs != NULL) 5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return lhs->pattern() < rhs->pattern(); 5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Either both are NULL or only rhs is NULL. 5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 565a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// URLQueryElementMatcherCondition 566a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// 567a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 568a02191e04bc25c4935f804f2c080ae28663d096dBen MurdochURLQueryElementMatcherCondition::URLQueryElementMatcherCondition( 569a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const std::string& key, 570a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const std::string& value, 571a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch QueryValueMatchType query_value_match_type, 572a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch QueryElementType query_element_type, 573a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch Type match_type, 574a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch URLMatcherConditionFactory* factory) { 575a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch match_type_ = match_type; 576a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 577a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (query_element_type == ELEMENT_TYPE_KEY_VALUE) { 578a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch key_ = kQueryComponentDelimiter + key + "="; 579a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch value_ = value; 580a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } else { 581a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch key_ = kQueryComponentDelimiter + key; 582a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch value_ = std::string(); 583a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 584a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 585a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (query_value_match_type == QUERY_VALUE_MATCH_EXACT) 586a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch value_ += kQueryComponentDelimiter; 587a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 588a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch // If |value_| is empty no need to find the |key_| and verify if the value 589a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch // matches. Simply checking the presence of key is sufficient, which is done 590a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch // by MATCH_ANY 591a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (value_.empty()) 592a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch match_type_ = MATCH_ANY; 593a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 594a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch URLMatcherCondition condition; 595a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch // If |match_type_| is MATCH_ANY, then we could simply look for the 596a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch // combination of |key_| + |value_|, which can be efficiently done by 597a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch // SubstringMatcher 598a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (match_type_ == MATCH_ANY) 599a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch condition = factory->CreateQueryContainsCondition(key_ + value_); 600a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch else 601a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch condition = factory->CreateQueryContainsCondition(key_); 602a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch string_pattern_ = condition.string_pattern(); 603a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 604a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch key_length_ = key_.length(); 605a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch value_length_ = value_.length(); 606a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch} 607a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 608a02191e04bc25c4935f804f2c080ae28663d096dBen MurdochURLQueryElementMatcherCondition::~URLQueryElementMatcherCondition() {} 609a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 610a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochbool URLQueryElementMatcherCondition::operator<( 611a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const URLQueryElementMatcherCondition& rhs) const { 612a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (match_type_ != rhs.match_type_) 613a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return match_type_ < rhs.match_type_; 614a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (string_pattern_ != NULL && rhs.string_pattern_ != NULL) 615a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return *string_pattern_ < *rhs.string_pattern_; 616a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) 617a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return true; 618a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL, 619a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch // or both are NULL. 620a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return false; 621a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch} 622a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 623a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochbool URLQueryElementMatcherCondition::IsMatch( 624a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const std::string& url_for_component_searches) const { 625a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch switch (match_type_) { 626a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch case MATCH_ANY: { 627a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch // For MATCH_ANY, no additional verification step is needed. We can trust 628a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch // the SubstringMatcher to do the verification. 629a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return true; 630a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 631a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch case MATCH_ALL: { 632a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch size_t start = 0; 633a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch int found = 0; 634a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch size_t offset; 635a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch while ((offset = url_for_component_searches.find(key_, start)) != 636a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch std::string::npos) { 637a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (url_for_component_searches.compare( 638a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch offset + key_length_, value_length_, value_) != 0) { 639a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return false; 640a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } else { 641a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch ++found; 642a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 643a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch start = offset + key_length_ + value_length_ - 1; 644a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 645a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return !!found; 646a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 647a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch case MATCH_FIRST: { 648a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch size_t offset = url_for_component_searches.find(key_); 649a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return url_for_component_searches.compare( 650a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch offset + key_length_, value_length_, value_) == 0; 651a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 652a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch case MATCH_LAST: { 653a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch size_t offset = url_for_component_searches.rfind(key_); 654a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return url_for_component_searches.compare( 655a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch offset + key_length_, value_length_, value_) == 0; 656a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 657a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 658a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch NOTREACHED(); 659a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return false; 660a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch} 661a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 662a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// 6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URLMatcherSchemeFilter 6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherSchemeFilter::URLMatcherSchemeFilter(const std::string& filter) 6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : filters_(1) { 6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) filters_.push_back(filter); 6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherSchemeFilter::URLMatcherSchemeFilter( 6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::vector<std::string>& filters) 6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : filters_(filters) {} 6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherSchemeFilter::~URLMatcherSchemeFilter() {} 6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherSchemeFilter::IsMatch(const GURL& url) const { 6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return std::find(filters_.begin(), filters_.end(), url.scheme()) != 6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) filters_.end(); 6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URLMatcherPortFilter 6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherPortFilter::URLMatcherPortFilter( 6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::vector<URLMatcherPortFilter::Range>& ranges) 6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : ranges_(ranges) {} 6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherPortFilter::~URLMatcherPortFilter() {} 6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherPortFilter::IsMatch(const GURL& url) const { 6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int port = url.EffectiveIntPort(); 6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (std::vector<Range>::const_iterator i = ranges_.begin(); 6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i != ranges_.end(); ++i) { 6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (i->first <= port && port <= i->second) 6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherPortFilter::Range URLMatcherPortFilter::CreateRange(int from, 7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int to) { 7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return Range(from, to); 7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherPortFilter::Range URLMatcherPortFilter::CreateRange(int port) { 7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return Range(port, port); 7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URLMatcherConditionSet 7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherConditionSet::~URLMatcherConditionSet() {} 7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherConditionSet::URLMatcherConditionSet( 7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ID id, 7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Conditions& conditions) 7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : id_(id), 7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) conditions_(conditions) {} 7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherConditionSet::URLMatcherConditionSet( 7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ID id, 7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const Conditions& conditions, 7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_ptr<URLMatcherSchemeFilter> scheme_filter, 7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_ptr<URLMatcherPortFilter> port_filter) 7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : id_(id), 7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) conditions_(conditions), 7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scheme_filter_(scheme_filter.Pass()), 7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) port_filter_(port_filter.Pass()) {} 7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 735a02191e04bc25c4935f804f2c080ae28663d096dBen MurdochURLMatcherConditionSet::URLMatcherConditionSet( 736a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch ID id, 737a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const Conditions& conditions, 738a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const QueryConditions& query_conditions, 739a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch scoped_ptr<URLMatcherSchemeFilter> scheme_filter, 740a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch scoped_ptr<URLMatcherPortFilter> port_filter) 741a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch : id_(id), 742a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch conditions_(conditions), 743a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch query_conditions_(query_conditions), 744a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch scheme_filter_(scheme_filter.Pass()), 745a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch port_filter_(port_filter.Pass()) {} 746a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 7475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherConditionSet::IsMatch( 7485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::set<StringPattern::ID>& matching_patterns, 7495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const GURL& url) const { 750a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return IsMatch(matching_patterns, url, std::string()); 751a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch} 752a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 753a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochbool URLMatcherConditionSet::IsMatch( 754a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const std::set<StringPattern::ID>& matching_patterns, 755a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const GURL& url, 756a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const std::string& url_for_component_searches) const { 7575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (Conditions::const_iterator i = conditions_.begin(); 7585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i != conditions_.end(); ++i) { 7595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!i->IsMatch(matching_patterns, url)) 7605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 7615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (scheme_filter_.get() && !scheme_filter_->IsMatch(url)) 7635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 7645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (port_filter_.get() && !port_filter_->IsMatch(url)) 7655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 766a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (query_conditions_.empty()) 767a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return true; 768a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch // The loop is duplicated below for performance reasons. If not all query 769a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch // elements are found, no need to verify match that is expected to take more 770a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch // cycles. 771a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch for (QueryConditions::const_iterator i = query_conditions_.begin(); 772a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch i != query_conditions_.end(); 773a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch ++i) { 774a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (!ContainsKey(matching_patterns, i->string_pattern()->id())) 775a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return false; 776a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 777a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch for (QueryConditions::const_iterator i = query_conditions_.begin(); 778a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch i != query_conditions_.end(); 779a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch ++i) { 780a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (!i->IsMatch(url_for_component_searches)) 781a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return false; 782a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 7835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 7845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 7855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 7875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URLMatcher 7885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 7895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcher::URLMatcher() {} 7915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcher::~URLMatcher() {} 7935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::AddConditionSets( 7955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const URLMatcherConditionSet::Vector& condition_sets) { 7965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (URLMatcherConditionSet::Vector::const_iterator i = 7975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_sets.begin(); i != condition_sets.end(); ++i) { 7985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK(url_matcher_condition_sets_.find((*i)->id()) == 7995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_matcher_condition_sets_.end()); 8005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_matcher_condition_sets_[(*i)->id()] = *i; 8015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 8025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UpdateInternalDatastructures(); 8035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 8045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::RemoveConditionSets( 8065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::vector<URLMatcherConditionSet::ID>& condition_set_ids) { 8075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (std::vector<URLMatcherConditionSet::ID>::const_iterator i = 8085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_set_ids.begin(); i != condition_set_ids.end(); ++i) { 8095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK(url_matcher_condition_sets_.find(*i) != 8105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_matcher_condition_sets_.end()); 8115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_matcher_condition_sets_.erase(*i); 8125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 8135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UpdateInternalDatastructures(); 8145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 8155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::ClearUnusedConditionSets() { 8175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UpdateConditionFactory(); 8185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 8195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL( 8212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const GURL& url) const { 8225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Find all IDs of StringPatterns that match |url|. 8235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // See URLMatcherConditionFactory for the canonicalization of URLs and the 8245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // distinction between full url searches and url component searches. 8255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::set<StringPattern::ID> matches; 826a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch std::string url_for_component_searches; 827a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 828c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!full_url_matcher_.IsEmpty()) { 829c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) full_url_matcher_.Match( 830c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) condition_factory_.CanonicalizeURLForFullSearches(url), &matches); 831c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 832c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!url_component_matcher_.IsEmpty()) { 833a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch url_for_component_searches = 834a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch condition_factory_.CanonicalizeURLForComponentSearches(url); 835a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch url_component_matcher_.Match(url_for_component_searches, &matches); 836c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 837c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!regex_set_matcher_.IsEmpty()) { 838c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) regex_set_matcher_.Match( 839c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) condition_factory_.CanonicalizeURLForRegexSearches(url), &matches); 840c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 841c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!origin_and_path_regex_set_matcher_.IsEmpty()) { 842c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) origin_and_path_regex_set_matcher_.Match( 843c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) condition_factory_.CanonicalizeURLForOriginAndPathRegexSearches(url), 844c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) &matches); 845c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 8465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Calculate all URLMatcherConditionSets for which all URLMatcherConditions 8485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // were fulfilled. 8495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::set<URLMatcherConditionSet::ID> result; 8505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (std::set<StringPattern::ID>::const_iterator i = matches.begin(); 8515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) i != matches.end(); ++i) { 8525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // For each URLMatcherConditionSet there is exactly one condition 8535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // registered in substring_match_triggers_. This means that the following 8545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // logic tests each URLMatcherConditionSet exactly once if it can be 8555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // completely fulfilled. 8562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) StringPatternTriggers::const_iterator triggered_condition_sets_iter = 8572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) substring_match_triggers_.find(*i); 8582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (triggered_condition_sets_iter == substring_match_triggers_.end()) 8592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) continue; // Not all substring matches are triggers for a condition set. 8602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const std::set<URLMatcherConditionSet::ID>& condition_sets = 8612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) triggered_condition_sets_iter->second; 8625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (std::set<URLMatcherConditionSet::ID>::const_iterator j = 8635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_sets.begin(); j != condition_sets.end(); ++j) { 8642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) URLMatcherConditionSets::const_iterator condition_set_iter = 8652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) url_matcher_condition_sets_.find(*j); 8662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DCHECK(condition_set_iter != url_matcher_condition_sets_.end()); 867a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (condition_set_iter->second->IsMatch( 868a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch matches, url, url_for_component_searches)) 8695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) result.insert(*j); 8705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 8715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 8725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return result; 8745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 8755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcher::IsEmpty() const { 8775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return condition_factory_.IsEmpty() && 8785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_matcher_condition_sets_.empty() && 8795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) substring_match_triggers_.empty() && 8805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) full_url_matcher_.IsEmpty() && 8815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_component_matcher_.IsEmpty() && 882c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) regex_set_matcher_.IsEmpty() && 883c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) origin_and_path_regex_set_matcher_.IsEmpty() && 8845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) registered_full_url_patterns_.empty() && 8855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) registered_url_component_patterns_.empty(); 8865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 8875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) { 8895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The purpose of |full_url_conditions| is just that we need to execute 8905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the same logic once for Full URL searches and once for URL Component 8915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // searches (see URLMatcherConditionFactory). 8925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 8935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Determine which patterns need to be registered when this function 8945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // terminates. 8955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::set<const StringPattern*> new_patterns; 8965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (URLMatcherConditionSets::const_iterator condition_set_iter = 8975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_matcher_condition_sets_.begin(); 8985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_set_iter != url_matcher_condition_sets_.end(); 8995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++condition_set_iter) { 9005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const URLMatcherConditionSet::Conditions& conditions = 9015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_set_iter->second->conditions(); 9025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = 9035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) conditions.begin(); condition_iter != conditions.end(); 9045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++condition_iter) { 9055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If we are called to process Full URL searches, ignore others, and 9065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // vice versa. (Regex conditions are updated in UpdateRegexSetMatcher.) 9075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!condition_iter->IsRegexCondition() && 908c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) !condition_iter->IsOriginAndPathRegexCondition() && 9095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) full_url_conditions == condition_iter->IsFullURLCondition()) 9105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) new_patterns.insert(condition_iter->string_pattern()); 9115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 912a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 913a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (full_url_conditions) 914a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch continue; 915a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 916a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const URLMatcherConditionSet::QueryConditions& query_conditions = 917a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch condition_set_iter->second->query_conditions(); 918a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch for (URLMatcherConditionSet::QueryConditions::const_iterator 919a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch query_condition_iter = query_conditions.begin(); 920a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch query_condition_iter != query_conditions.end(); 921a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch ++query_condition_iter) { 922a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch new_patterns.insert(query_condition_iter->string_pattern()); 923a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 9245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 9255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // This is the set of patterns that were registered before this function 9275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // is called. 9285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::set<const StringPattern*>& registered_patterns = 9295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) full_url_conditions ? registered_full_url_patterns_ 9305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : registered_url_component_patterns_; 9315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Add all patterns that are in new_patterns but not in registered_patterns. 9333551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) std::vector<const StringPattern*> patterns_to_register = 9343551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) base::STLSetDifference<std::vector<const StringPattern*> >( 9353551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) new_patterns, registered_patterns); 9365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Remove all patterns that are in registered_patterns but not in 9385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // new_patterns. 9393551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) std::vector<const StringPattern*> patterns_to_unregister = 9403551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) base::STLSetDifference<std::vector<const StringPattern*> >( 9413551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) registered_patterns, new_patterns); 9425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Update the SubstringSetMatcher. 9445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SubstringSetMatcher& url_matcher = 9455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) full_url_conditions ? full_url_matcher_ : url_component_matcher_; 9465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_matcher.RegisterAndUnregisterPatterns(patterns_to_register, 9475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) patterns_to_unregister); 9485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Update the set of registered_patterns for the next time this function 9505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // is being called. 9515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) registered_patterns.swap(new_patterns); 9525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 9535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::UpdateRegexSetMatcher() { 9555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<const StringPattern*> new_patterns; 956c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) std::vector<const StringPattern*> new_origin_and_path_patterns; 9575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (URLMatcherConditionSets::const_iterator condition_set_iter = 9595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_matcher_condition_sets_.begin(); 9605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_set_iter != url_matcher_condition_sets_.end(); 9615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++condition_set_iter) { 9625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const URLMatcherConditionSet::Conditions& conditions = 9635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_set_iter->second->conditions(); 9645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = 9655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) conditions.begin(); condition_iter != conditions.end(); 9665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++condition_iter) { 967c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (condition_iter->IsRegexCondition()) { 9685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) new_patterns.push_back(condition_iter->string_pattern()); 969c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } else if (condition_iter->IsOriginAndPathRegexCondition()) { 970c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) new_origin_and_path_patterns.push_back( 971c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) condition_iter->string_pattern()); 972c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 9735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 9745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 9755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Start over from scratch. We can't really do better than this, since the 9775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // FilteredRE2 backend doesn't support incremental updates. 9785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) regex_set_matcher_.ClearPatterns(); 9795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) regex_set_matcher_.AddPatterns(new_patterns); 980c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) origin_and_path_regex_set_matcher_.ClearPatterns(); 981c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) origin_and_path_regex_set_matcher_.AddPatterns(new_origin_and_path_patterns); 9825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 9835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::UpdateTriggers() { 9855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Count substring pattern frequencies. 9865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::map<StringPattern::ID, size_t> substring_pattern_frequencies; 9875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (URLMatcherConditionSets::const_iterator condition_set_iter = 9885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_matcher_condition_sets_.begin(); 9895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_set_iter != url_matcher_condition_sets_.end(); 9905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++condition_set_iter) { 9915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const URLMatcherConditionSet::Conditions& conditions = 9925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_set_iter->second->conditions(); 9935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = 9945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) conditions.begin(); condition_iter != conditions.end(); 9955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++condition_iter) { 9965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPattern* pattern = condition_iter->string_pattern(); 9975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) substring_pattern_frequencies[pattern->id()]++; 9985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 999a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 1000a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const URLMatcherConditionSet::QueryConditions& query_conditions = 1001a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch condition_set_iter->second->query_conditions(); 1002a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch for (URLMatcherConditionSet::QueryConditions::const_iterator 1003a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch query_condition_iter = query_conditions.begin(); 1004a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch query_condition_iter != query_conditions.end(); 1005a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch ++query_condition_iter) { 1006a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const StringPattern* pattern = query_condition_iter->string_pattern(); 1007a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch substring_pattern_frequencies[pattern->id()]++; 1008a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 10095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 10105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Update trigger conditions: Determine for each URLMatcherConditionSet which 10125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // URLMatcherCondition contains a StringPattern that occurs least 10135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // frequently in this URLMatcher. We assume that this condition is very 10145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // specific and occurs rarely in URLs. If a match occurs for this 10155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // URLMatcherCondition, we want to test all other URLMatcherCondition in the 10165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // respective URLMatcherConditionSet as well to see whether the entire 10175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // URLMatcherConditionSet is considered matching. 10185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) substring_match_triggers_.clear(); 10195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (URLMatcherConditionSets::const_iterator condition_set_iter = 10205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_matcher_condition_sets_.begin(); 10215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_set_iter != url_matcher_condition_sets_.end(); 10225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++condition_set_iter) { 10235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const URLMatcherConditionSet::Conditions& conditions = 10245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_set_iter->second->conditions(); 10255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (conditions.empty()) 10265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 10275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) URLMatcherConditionSet::Conditions::const_iterator condition_iter = 10285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) conditions.begin(); 10295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPattern::ID trigger = condition_iter->string_pattern()->id(); 10305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We skip the first element in the following loop. 10315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++condition_iter; 10325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (; condition_iter != conditions.end(); ++condition_iter) { 10335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPattern::ID current_id = 10345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_iter->string_pattern()->id(); 10355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (substring_pattern_frequencies[trigger] > 10365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) substring_pattern_frequencies[current_id]) { 10375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) trigger = current_id; 10385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 10395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1040a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 1041a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const URLMatcherConditionSet::QueryConditions& query_conditions = 1042a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch condition_set_iter->second->query_conditions(); 1043a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch for (URLMatcherConditionSet::QueryConditions::const_iterator 1044a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch query_condition_iter = query_conditions.begin(); 1045a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch query_condition_iter != query_conditions.end(); 1046a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch ++query_condition_iter) { 1047a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch StringPattern::ID current_id = 1048a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch query_condition_iter->string_pattern()->id(); 1049a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if (substring_pattern_frequencies[trigger] > 1050a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch substring_pattern_frequencies[current_id]) { 1051a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch trigger = current_id; 1052a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 1053a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 1054a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 10555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) substring_match_triggers_[trigger].insert(condition_set_iter->second->id()); 10565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 10575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 10585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::UpdateConditionFactory() { 10605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::set<StringPattern::ID> used_patterns; 10615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (URLMatcherConditionSets::const_iterator condition_set_iter = 10625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_matcher_condition_sets_.begin(); 10635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_set_iter != url_matcher_condition_sets_.end(); 10645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++condition_set_iter) { 10655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const URLMatcherConditionSet::Conditions& conditions = 10665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_set_iter->second->conditions(); 10675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (URLMatcherConditionSet::Conditions::const_iterator condition_iter = 10685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) conditions.begin(); condition_iter != conditions.end(); 10695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++condition_iter) { 10705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) used_patterns.insert(condition_iter->string_pattern()->id()); 10715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1072a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch const URLMatcherConditionSet::QueryConditions& query_conditions = 1073a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch condition_set_iter->second->query_conditions(); 1074a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch for (URLMatcherConditionSet::QueryConditions::const_iterator 1075a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch query_condition_iter = query_conditions.begin(); 1076a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch query_condition_iter != query_conditions.end(); 1077a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch ++query_condition_iter) { 1078a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch used_patterns.insert(query_condition_iter->string_pattern()->id()); 1079a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 10805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 10815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) condition_factory_.ForgetUnusedPatterns(used_patterns); 10825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 10835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::UpdateInternalDatastructures() { 10851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // TODO(vadimt): Remove ScopedProfile below once crbug.com/417106 is fixed. 10861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci tracked_objects::ScopedProfile tracking_profile( 10871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci FROM_HERE_WITH_EXPLICIT_FUNCTION( 10881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci "URLMatcher_UpdateInternalDatastructures")); 10895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UpdateSubstringSetMatcher(false); 10905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UpdateSubstringSetMatcher(true); 10915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UpdateRegexSetMatcher(); 10925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UpdateTriggers(); 10935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UpdateConditionFactory(); 10945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 10955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} // namespace url_matcher 1097