15d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "components/url_matcher/url_matcher.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <algorithm>
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <iterator>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"
111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "base/profiler/scoped_profile.h"
127dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include "url/gurl.h"
137dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include "url/url_canon.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)namespace url_matcher {
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This set of classes implement a mapping of URL Component Patterns, such as
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// host_prefix, host_suffix, host_equals, ..., etc., to StringPatterns
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// for use in substring comparisons.
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The idea of this mapping is to reduce the problem of comparing many
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URL Component Patterns against one URL to the problem of searching many
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// substrings in one string:
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------                    -----------------
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// | URL Query operator | ----translate----> | StringPattern |
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------                    -----------------
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//                                                   ^
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//                                                   |
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//                                                compare
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//                                                   |
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//                                                   v
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------                    -----------------
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// | URL to compare     |                    |               |
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// | to all URL Query   | ----translate----> | String        |
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// | operators          |                    |               |
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------                    -----------------
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The reason for this problem reduction is that there are efficient algorithms
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// for searching many substrings in one string (see Aho-Corasick algorithm).
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Additionally, some of the same pieces are reused to implement regular
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// expression comparisons. The FilteredRE2 implementation for matching many
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regular expressions against one string uses prefiltering, in which a set
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// of substrings (derived from the regexes) are first searched for, to reduce
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the number of regular expressions to test; the prefiltering step also
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// uses Aho-Corasick.
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Case 1: {host,path,query}_{prefix,suffix,equals} searches.
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ==========================================================
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// For searches in this class, we normalize URLs as follows:
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Step 1:
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Remove scheme, port and segment from URL:
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> http://www.example.com:8080/index.html?search=foo#first_match becomes
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//    www.example.com/index.html?search=foo
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We remove the scheme and port number because they can be checked later
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// in a secondary filter step. We remove the segment (the #... part) because
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// this is not guaranteed to be ASCII-7 encoded.
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Step 2:
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Translate URL to String and add the following position markers:
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - BU = Beginning of URL
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - ED = End of Domain
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - EP = End of Path
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - EU = End of URL
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Furthermore, the hostname is canonicalized to start with a ".".
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Position markers are represented as characters >127, which are therefore
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// guaranteed not to be part of the ASCII-7 encoded URL character set.
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> www.example.com/index.html?search=foo becomes
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// BU .www.example.com ED /index.html EP ?search=foo EU
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> www.example.com/index.html becomes
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// BU .www.example.com ED /index.html EP EU
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Step 3:
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Translate URL Component Patterns as follows:
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// host_prefix(prefix) = BU add_missing_dot_prefix(prefix)
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> host_prefix("www.example") = BU .www.example
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// host_suffix(suffix) = suffix ED
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> host_suffix("example.com") = example.com ED
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> host_suffix(".example.com") = .example.com ED
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// host_equals(domain) = BU add_missing_dot_prefix(domain) ED
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> host_equals("www.example.com") = BU .www.example.com ED
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Similarly for path query parameters ({path, query}_{prefix, suffix, equals}).
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// With this, we can search the StringPatterns in the normalized URL.
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Case 2: url_{prefix,suffix,equals,contains} searches.
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// =====================================================
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Step 1: as above, except that
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - the scheme is not removed
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - the port is not removed if it is specified and does not match the default
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   port for the given scheme.
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Step 2:
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Translate URL to String and add the following position markers:
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - BU = Beginning of URL
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - EU = End of URL
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> http://www.example.com:8080/index.html?search=foo#first_match becomes
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// BU http://www.example.com:8080/index.html?search=foo EU
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> http://www.example.com:80/index.html?search=foo#first_match becomes
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// BU http://www.example.com/index.html?search=foo EU
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// url_prefix(prefix) = BU prefix
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> url_prefix("http://www.example") = BU http://www.example
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// url_contains(substring) = substring
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> url_contains("index") = index
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Case 3: {host,path,query}_contains searches.
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ============================================
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// These kinds of searches are not supported directly but can be derived
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// by a combination of a url_contains() query followed by an explicit test:
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// host_contains(str) = url_contains(str) followed by test whether str occurs
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   in host component of original URL.
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -> host_contains("example.co") = example.co
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//    followed by gurl.host().find("example.co");
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// [similarly for path_contains and query_contains].
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Regular expression matching (url_matches searches)
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ==================================================
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This class also supports matching regular expressions (RE2 syntax)
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// against full URLs, which are transformed as in case 2.
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool IsRegexCriterion(URLMatcherCondition::Criterion criterion) {
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return criterion == URLMatcherCondition::URL_MATCHES;
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
149c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool IsOriginAndPathRegexCriterion(URLMatcherCondition::Criterion criterion) {
150c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return criterion == URLMatcherCondition::ORIGIN_AND_PATH_MATCHES;
151c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
152c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URLMatcherCondition
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition::URLMatcherCondition()
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : criterion_(HOST_PREFIX),
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      string_pattern_(NULL) {}
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition::~URLMatcherCondition() {}
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition::URLMatcherCondition(
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Criterion criterion,
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const StringPattern* string_pattern)
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : criterion_(criterion),
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      string_pattern_(string_pattern) {}
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs)
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : criterion_(rhs.criterion_),
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      string_pattern_(rhs.string_pattern_) {}
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition& URLMatcherCondition::operator=(
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const URLMatcherCondition& rhs) {
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  criterion_ = rhs.criterion_;
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string_pattern_ = rhs.string_pattern_;
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return *this;
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const {
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (criterion_ < rhs.criterion_) return true;
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (criterion_ > rhs.criterion_) return false;
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (string_pattern_ != NULL && rhs.string_pattern_ != NULL)
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return *string_pattern_ < *rhs.string_pattern_;
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (string_pattern_ == NULL && rhs.string_pattern_ != NULL) return true;
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL,
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // or both are NULL.
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return false;
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherCondition::IsFullURLCondition() const {
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // For these criteria the SubstringMatcher needs to be executed on the
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // GURL that is canonicalized with
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // URLMatcherConditionFactory::CanonicalizeURLForFullSearches.
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (criterion_) {
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case HOST_CONTAINS:
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case PATH_CONTAINS:
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case QUERY_CONTAINS:
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case URL_PREFIX:
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case URL_SUFFIX:
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case URL_CONTAINS:
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case URL_EQUALS:
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return true;
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    default:
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return false;
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherCondition::IsRegexCondition() const {
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return IsRegexCriterion(criterion_);
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
216c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool URLMatcherCondition::IsOriginAndPathRegexCondition() const {
217c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return IsOriginAndPathRegexCriterion(criterion_);
218c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
219c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherCondition::IsMatch(
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::set<StringPattern::ID>& matching_patterns,
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const GURL& url) const {
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(string_pattern_);
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!ContainsKey(matching_patterns, string_pattern_->id()))
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a substring match on the raw URL. In case of a match, we need to verify
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that the match was found in the correct component of the URL.
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (criterion_) {
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case HOST_CONTAINS:
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return url.host().find(string_pattern_->pattern()) !=
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          std::string::npos;
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case PATH_CONTAINS:
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return url.path().find(string_pattern_->pattern()) !=
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          std::string::npos;
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case QUERY_CONTAINS:
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return url.query().find(string_pattern_->pattern()) !=
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          std::string::npos;
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    default:
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URLMatcherConditionFactory
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// These are symbols that are not contained in 7-bit ASCII used in GURLs.
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const char kBeginningOfURL[] = {static_cast<char>(-1), 0};
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const char kEndOfDomain[] = {static_cast<char>(-2), 0};
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const char kEndOfPath[] = {static_cast<char>(-3), 0};
254a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochconst char kQueryComponentDelimiter[] = {static_cast<char>(-4), 0};
255a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochconst char kEndOfURL[] = {static_cast<char>(-5), 0};
256a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
257a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// The delimiter for query parameters
258a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochconst char kQuerySeparator = '&';
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {}
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherConditionFactory::~URLMatcherConditionFactory() {
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  STLDeleteElements(&substring_pattern_singletons_);
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  STLDeleteElements(&regex_pattern_singletons_);
266c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  STLDeleteElements(&origin_and_path_regex_pattern_singletons_);
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches(
2702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const GURL& url) const {
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain +
272c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)         url.path() + kEndOfPath +
273a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch         (url.has_query() ? CanonicalizeQuery(url.query(), true, true)
274a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch                          : std::string()) +
275a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch         kEndOfURL;
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition(
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& prefix) {
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::HOST_PREFIX,
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      kBeginningOfURL + CanonicalizeHostname(prefix));
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateHostSuffixCondition(
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& suffix) {
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::HOST_SUFFIX,
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      suffix + kEndOfDomain);
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateHostContainsCondition(
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& str) {
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::HOST_CONTAINS, str);
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateHostEqualsCondition(
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& str) {
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::HOST_EQUALS,
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      kBeginningOfURL + CanonicalizeHostname(str) + kEndOfDomain);
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreatePathPrefixCondition(
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& prefix) {
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::PATH_PREFIX,
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      kEndOfDomain + prefix);
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreatePathSuffixCondition(
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& suffix) {
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::PATH_SUFFIX, suffix + kEndOfPath);
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreatePathContainsCondition(
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& str) {
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::PATH_CONTAINS, str);
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreatePathEqualsCondition(
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& str) {
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::PATH_EQUALS,
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      kEndOfDomain + str + kEndOfPath);
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateQueryPrefixCondition(
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& prefix) {
3252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  std::string pattern;
3262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!prefix.empty() && prefix[0] == '?')
327a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    pattern = kEndOfPath + CanonicalizeQuery(prefix.substr(1), true, false);
3282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  else
329a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    pattern = kEndOfPath + CanonicalizeQuery(prefix, true, false);
3302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return CreateCondition(URLMatcherCondition::QUERY_PREFIX, pattern);
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateQuerySuffixCondition(
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& suffix) {
3362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!suffix.empty() && suffix[0] == '?') {
3372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return CreateQueryEqualsCondition(suffix);
3382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  } else {
3392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return CreateCondition(URLMatcherCondition::QUERY_SUFFIX,
340a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch                           CanonicalizeQuery(suffix, false, true) + kEndOfURL);
3412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateQueryContainsCondition(
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& str) {
3462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!str.empty() && str[0] == '?')
3472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return CreateQueryPrefixCondition(str);
3482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  else
3492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return CreateCondition(URLMatcherCondition::QUERY_CONTAINS, str);
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateQueryEqualsCondition(
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& str) {
3542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  std::string pattern;
3552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!str.empty() && str[0] == '?')
356a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    pattern =
357a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch        kEndOfPath + CanonicalizeQuery(str.substr(1), true, true) + kEndOfURL;
3582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  else
359a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    pattern = kEndOfPath + CanonicalizeQuery(str, true, true) + kEndOfURL;
3602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return CreateCondition(URLMatcherCondition::QUERY_EQUALS, pattern);
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    URLMatcherConditionFactory::CreateHostSuffixPathPrefixCondition(
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& host_suffix,
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& path_prefix) {
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::HOST_SUFFIX_PATH_PREFIX,
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      host_suffix + kEndOfDomain + path_prefix);
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherConditionFactory::CreateHostEqualsPathPrefixCondition(
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& host,
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& path_prefix) {
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::HOST_EQUALS_PATH_PREFIX,
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      kBeginningOfURL + CanonicalizeHostname(host) + kEndOfDomain +
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      path_prefix);
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches(
3822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const GURL& url) const {
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GURL::Replacements replacements;
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  replacements.ClearPassword();
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  replacements.ClearUsername();
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  replacements.ClearRef();
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Clear port if it is implicit from scheme.
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (url.has_port()) {
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& port = url.scheme();
390010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    if (url::DefaultPortForScheme(port.c_str(), port.size()) ==
391010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        url.EffectiveIntPort()) {
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      replacements.ClearPort();
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return kBeginningOfURL + url.ReplaceComponents(replacements).spec() +
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      kEndOfURL;
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
399c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)static std::string CanonicalizeURLForRegexSearchesHelper(
400c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    const GURL& url,
401c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    bool clear_query) {
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GURL::Replacements replacements;
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  replacements.ClearPassword();
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  replacements.ClearUsername();
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  replacements.ClearRef();
406c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (clear_query)
407c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    replacements.ClearQuery();
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Clear port if it is implicit from scheme.
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (url.has_port()) {
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& port = url.scheme();
411010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    if (url::DefaultPortForScheme(port.c_str(), port.size()) ==
412010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        url.EffectiveIntPort()) {
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      replacements.ClearPort();
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return url.ReplaceComponents(replacements).spec();
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
419c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)std::string URLMatcherConditionFactory::CanonicalizeURLForRegexSearches(
420c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    const GURL& url) const {
421c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return CanonicalizeURLForRegexSearchesHelper(url, false);
422c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
423c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
424c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)std::string
425c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)URLMatcherConditionFactory::CanonicalizeURLForOriginAndPathRegexSearches(
426c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    const GURL& url) const {
427c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return CanonicalizeURLForRegexSearchesHelper(url, true);
428c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
429c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition(
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& prefix) {
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::URL_PREFIX,
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      kBeginningOfURL + prefix);
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition(
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& suffix) {
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL);
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition(
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& str) {
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::URL_CONTAINS, str);
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition(
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& str) {
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::URL_EQUALS,
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      kBeginningOfURL + str + kEndOfURL);
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateURLMatchesCondition(
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& regex) {
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return CreateCondition(URLMatcherCondition::URL_MATCHES, regex);
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
457c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)URLMatcherCondition
458c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)URLMatcherConditionFactory::CreateOriginAndPathMatchesCondition(
459c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    const std::string& regex) {
460c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return CreateCondition(URLMatcherCondition::ORIGIN_AND_PATH_MATCHES, regex);
461c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
462c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcherConditionFactory::ForgetUnusedPatterns(
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const std::set<StringPattern::ID>& used_patterns) {
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PatternSingletons::iterator i = substring_pattern_singletons_.begin();
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (i != substring_pattern_singletons_.end()) {
467c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (ContainsKey(used_patterns, (*i)->id())) {
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++i;
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      delete *i;
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      substring_pattern_singletons_.erase(i++);
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  i = regex_pattern_singletons_.begin();
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (i != regex_pattern_singletons_.end()) {
476c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (ContainsKey(used_patterns, (*i)->id())) {
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++i;
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      delete *i;
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      regex_pattern_singletons_.erase(i++);
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
483c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  i = origin_and_path_regex_pattern_singletons_.begin();
484c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  while (i != origin_and_path_regex_pattern_singletons_.end()) {
485c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (ContainsKey(used_patterns, (*i)->id())) {
486c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      ++i;
487c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    } else {
488c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      delete *i;
489c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      origin_and_path_regex_pattern_singletons_.erase(i++);
490c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    }
491c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherConditionFactory::IsEmpty() const {
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return substring_pattern_singletons_.empty() &&
496c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      regex_pattern_singletons_.empty() &&
497c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      origin_and_path_regex_pattern_singletons_.empty();
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherCondition URLMatcherConditionFactory::CreateCondition(
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    URLMatcherCondition::Criterion criterion,
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& pattern) {
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPattern search_pattern(pattern, 0);
504c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  PatternSingletons* pattern_singletons = NULL;
505c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (IsRegexCriterion(criterion))
506c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    pattern_singletons = &regex_pattern_singletons_;
507c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  else if (IsOriginAndPathRegexCriterion(criterion))
508c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    pattern_singletons = &origin_and_path_regex_pattern_singletons_;
509c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  else
510c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    pattern_singletons = &substring_pattern_singletons_;
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PatternSingletons::const_iterator iter =
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      pattern_singletons->find(&search_pattern);
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (iter != pattern_singletons->end()) {
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return URLMatcherCondition(criterion, *iter);
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    StringPattern* new_pattern =
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        new StringPattern(pattern, id_counter_++);
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pattern_singletons->insert(new_pattern);
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return URLMatcherCondition(criterion, new_pattern);
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string URLMatcherConditionFactory::CanonicalizeHostname(
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::string& hostname) const {
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!hostname.empty() && hostname[0] == '.')
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return hostname;
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  else
5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return "." + hostname;
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
533a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// This function prepares the query string by replacing query separator with a
534a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// magic value (|kQueryComponentDelimiter|). When the boolean
535a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// |prepend_beginning_of_query_component| is true the function prepends the
536a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// query with the same magic. This is done to locate the start of a key value
537a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// pair in the query string. The parameter |query| is passed by value
538a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// intentionally, since it is locally modified.
539a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochstd::string URLMatcherConditionFactory::CanonicalizeQuery(
540a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    std::string query,
541a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    bool prepend_beginning_of_query_component,
542a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    bool append_end_of_query_component) const {
543a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  for (std::string::iterator it = query.begin(); it != query.end(); ++it) {
544a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    if (*it == kQuerySeparator)
545a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      *it = kQueryComponentDelimiter[0];
546a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  }
547a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  if (prepend_beginning_of_query_component)
548a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    query = kQueryComponentDelimiter + query;
549a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  if (append_end_of_query_component)
550a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    query += kQueryComponentDelimiter;
551a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  return query;
552a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch}
553a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherConditionFactory::StringPatternPointerCompare::operator()(
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    StringPattern* lhs,
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    StringPattern* rhs) const {
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (lhs == NULL && rhs != NULL) return true;
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (lhs != NULL && rhs != NULL)
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return lhs->pattern() < rhs->pattern();
5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Either both are NULL or only rhs is NULL.
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return false;
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
565a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// URLQueryElementMatcherCondition
566a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch//
567a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
568a02191e04bc25c4935f804f2c080ae28663d096dBen MurdochURLQueryElementMatcherCondition::URLQueryElementMatcherCondition(
569a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    const std::string& key,
570a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    const std::string& value,
571a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    QueryValueMatchType query_value_match_type,
572a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    QueryElementType query_element_type,
573a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    Type match_type,
574a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    URLMatcherConditionFactory* factory) {
575a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  match_type_ = match_type;
576a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
577a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  if (query_element_type == ELEMENT_TYPE_KEY_VALUE) {
578a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    key_ = kQueryComponentDelimiter + key + "=";
579a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    value_ = value;
580a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  } else {
581a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    key_ = kQueryComponentDelimiter + key;
582a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    value_ = std::string();
583a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  }
584a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
585a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  if (query_value_match_type == QUERY_VALUE_MATCH_EXACT)
586a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    value_ += kQueryComponentDelimiter;
587a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
588a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  // If |value_| is empty no need to find the |key_| and verify if the value
589a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  // matches. Simply checking the presence of key is sufficient, which is done
590a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  // by MATCH_ANY
591a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  if (value_.empty())
592a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    match_type_ = MATCH_ANY;
593a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
594a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  URLMatcherCondition condition;
595a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  // If |match_type_| is MATCH_ANY, then we could simply look for the
596a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  // combination of |key_| + |value_|, which can be efficiently done by
597a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  // SubstringMatcher
598a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  if (match_type_ == MATCH_ANY)
599a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    condition = factory->CreateQueryContainsCondition(key_ + value_);
600a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  else
601a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    condition = factory->CreateQueryContainsCondition(key_);
602a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  string_pattern_ = condition.string_pattern();
603a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
604a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  key_length_ = key_.length();
605a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  value_length_ = value_.length();
606a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch}
607a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
608a02191e04bc25c4935f804f2c080ae28663d096dBen MurdochURLQueryElementMatcherCondition::~URLQueryElementMatcherCondition() {}
609a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
610a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochbool URLQueryElementMatcherCondition::operator<(
611a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    const URLQueryElementMatcherCondition& rhs) const {
612a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  if (match_type_ != rhs.match_type_)
613a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    return match_type_ < rhs.match_type_;
614a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  if (string_pattern_ != NULL && rhs.string_pattern_ != NULL)
615a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    return *string_pattern_ < *rhs.string_pattern_;
616a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  if (string_pattern_ == NULL && rhs.string_pattern_ != NULL)
617a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    return true;
618a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  // Either string_pattern_ != NULL && rhs.string_pattern_ == NULL,
619a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  // or both are NULL.
620a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  return false;
621a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch}
622a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
623a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochbool URLQueryElementMatcherCondition::IsMatch(
624a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    const std::string& url_for_component_searches) const {
625a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  switch (match_type_) {
626a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    case MATCH_ANY: {
627a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      // For MATCH_ANY, no additional verification step is needed. We can trust
628a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      // the SubstringMatcher to do the verification.
629a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      return true;
630a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    }
631a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    case MATCH_ALL: {
632a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      size_t start = 0;
633a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      int found = 0;
634a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      size_t offset;
635a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      while ((offset = url_for_component_searches.find(key_, start)) !=
636a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch             std::string::npos) {
637a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch        if (url_for_component_searches.compare(
638a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch                offset + key_length_, value_length_, value_) != 0) {
639a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch          return false;
640a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch        } else {
641a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch          ++found;
642a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch        }
643a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch        start = offset + key_length_ + value_length_ - 1;
644a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      }
645a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      return !!found;
646a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    }
647a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    case MATCH_FIRST: {
648a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      size_t offset = url_for_component_searches.find(key_);
649a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      return url_for_component_searches.compare(
650a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch                 offset + key_length_, value_length_, value_) == 0;
651a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    }
652a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    case MATCH_LAST: {
653a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      size_t offset = url_for_component_searches.rfind(key_);
654a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      return url_for_component_searches.compare(
655a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch                 offset + key_length_, value_length_, value_) == 0;
656a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    }
657a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  }
658a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  NOTREACHED();
659a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  return false;
660a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch}
661a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
662a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch//
6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URLMatcherSchemeFilter
6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherSchemeFilter::URLMatcherSchemeFilter(const std::string& filter)
6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : filters_(1) {
6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  filters_.push_back(filter);
6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherSchemeFilter::URLMatcherSchemeFilter(
6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::vector<std::string>& filters)
6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : filters_(filters) {}
6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherSchemeFilter::~URLMatcherSchemeFilter() {}
6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherSchemeFilter::IsMatch(const GURL& url) const {
6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return std::find(filters_.begin(), filters_.end(), url.scheme()) !=
6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      filters_.end();
6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URLMatcherPortFilter
6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherPortFilter::URLMatcherPortFilter(
6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::vector<URLMatcherPortFilter::Range>& ranges)
6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : ranges_(ranges) {}
6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherPortFilter::~URLMatcherPortFilter() {}
6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherPortFilter::IsMatch(const GURL& url) const {
6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int port = url.EffectiveIntPort();
6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (std::vector<Range>::const_iterator i = ranges_.begin();
6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       i != ranges_.end(); ++i) {
6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (i->first <= port && port <= i->second)
6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return true;
6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return false;
7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherPortFilter::Range URLMatcherPortFilter::CreateRange(int from,
7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                              int to) {
7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return Range(from, to);
7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherPortFilter::Range URLMatcherPortFilter::CreateRange(int port) {
7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return Range(port, port);
7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URLMatcherConditionSet
7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherConditionSet::~URLMatcherConditionSet() {}
7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherConditionSet::URLMatcherConditionSet(
7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ID id,
7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Conditions& conditions)
7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : id_(id),
7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      conditions_(conditions) {}
7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcherConditionSet::URLMatcherConditionSet(
7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ID id,
7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Conditions& conditions,
7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    scoped_ptr<URLMatcherSchemeFilter> scheme_filter,
7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    scoped_ptr<URLMatcherPortFilter> port_filter)
7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : id_(id),
7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      conditions_(conditions),
7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      scheme_filter_(scheme_filter.Pass()),
7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      port_filter_(port_filter.Pass()) {}
7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
735a02191e04bc25c4935f804f2c080ae28663d096dBen MurdochURLMatcherConditionSet::URLMatcherConditionSet(
736a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    ID id,
737a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    const Conditions& conditions,
738a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    const QueryConditions& query_conditions,
739a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    scoped_ptr<URLMatcherSchemeFilter> scheme_filter,
740a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    scoped_ptr<URLMatcherPortFilter> port_filter)
741a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    : id_(id),
742a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      conditions_(conditions),
743a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      query_conditions_(query_conditions),
744a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      scheme_filter_(scheme_filter.Pass()),
745a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      port_filter_(port_filter.Pass()) {}
746a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
7475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcherConditionSet::IsMatch(
7485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::set<StringPattern::ID>& matching_patterns,
7495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const GURL& url) const {
750a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  return IsMatch(matching_patterns, url, std::string());
751a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch}
752a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
753a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochbool URLMatcherConditionSet::IsMatch(
754a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    const std::set<StringPattern::ID>& matching_patterns,
755a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    const GURL& url,
756a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    const std::string& url_for_component_searches) const {
7575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (Conditions::const_iterator i = conditions_.begin();
7585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       i != conditions_.end(); ++i) {
7595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!i->IsMatch(matching_patterns, url))
7605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
7615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (scheme_filter_.get() && !scheme_filter_->IsMatch(url))
7635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
7645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (port_filter_.get() && !port_filter_->IsMatch(url))
7655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
766a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  if (query_conditions_.empty())
767a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    return true;
768a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  // The loop is duplicated below for performance reasons. If not all query
769a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  // elements are found, no need to verify match that is expected to take more
770a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  // cycles.
771a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  for (QueryConditions::const_iterator i = query_conditions_.begin();
772a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch       i != query_conditions_.end();
773a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch       ++i) {
774a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    if (!ContainsKey(matching_patterns, i->string_pattern()->id()))
775a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      return false;
776a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  }
777a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  for (QueryConditions::const_iterator i = query_conditions_.begin();
778a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch       i != query_conditions_.end();
779a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch       ++i) {
780a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    if (!i->IsMatch(url_for_component_searches))
781a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      return false;
782a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  }
7835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
7845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
7875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// URLMatcher
7885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
7895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcher::URLMatcher() {}
7915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)URLMatcher::~URLMatcher() {}
7935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::AddConditionSets(
7955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const URLMatcherConditionSet::Vector& condition_sets) {
7965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (URLMatcherConditionSet::Vector::const_iterator i =
7975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       condition_sets.begin(); i != condition_sets.end(); ++i) {
7985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DCHECK(url_matcher_condition_sets_.find((*i)->id()) ==
7995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        url_matcher_condition_sets_.end());
8005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    url_matcher_condition_sets_[(*i)->id()] = *i;
8015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UpdateInternalDatastructures();
8035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::RemoveConditionSets(
8065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const std::vector<URLMatcherConditionSet::ID>& condition_set_ids) {
8075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (std::vector<URLMatcherConditionSet::ID>::const_iterator i =
8085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       condition_set_ids.begin(); i != condition_set_ids.end(); ++i) {
8095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DCHECK(url_matcher_condition_sets_.find(*i) !=
8105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        url_matcher_condition_sets_.end());
8115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    url_matcher_condition_sets_.erase(*i);
8125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UpdateInternalDatastructures();
8145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::ClearUnusedConditionSets() {
8175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UpdateConditionFactory();
8185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(
8212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const GURL& url) const {
8225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Find all IDs of StringPatterns that match |url|.
8235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // See URLMatcherConditionFactory for the canonicalization of URLs and the
8245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // distinction between full url searches and url component searches.
8255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::set<StringPattern::ID> matches;
826a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  std::string url_for_component_searches;
827a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
828c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (!full_url_matcher_.IsEmpty()) {
829c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    full_url_matcher_.Match(
830c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        condition_factory_.CanonicalizeURLForFullSearches(url), &matches);
831c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
832c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (!url_component_matcher_.IsEmpty()) {
833a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    url_for_component_searches =
834a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch        condition_factory_.CanonicalizeURLForComponentSearches(url);
835a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    url_component_matcher_.Match(url_for_component_searches, &matches);
836c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
837c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (!regex_set_matcher_.IsEmpty()) {
838c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    regex_set_matcher_.Match(
839c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        condition_factory_.CanonicalizeURLForRegexSearches(url), &matches);
840c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
841c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (!origin_and_path_regex_set_matcher_.IsEmpty()) {
842c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    origin_and_path_regex_set_matcher_.Match(
843c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        condition_factory_.CanonicalizeURLForOriginAndPathRegexSearches(url),
844c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        &matches);
845c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
8465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Calculate all URLMatcherConditionSets for which all URLMatcherConditions
8485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // were fulfilled.
8495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::set<URLMatcherConditionSet::ID> result;
8505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (std::set<StringPattern::ID>::const_iterator i = matches.begin();
8515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       i != matches.end(); ++i) {
8525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // For each URLMatcherConditionSet there is exactly one condition
8535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // registered in substring_match_triggers_. This means that the following
8545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // logic tests each URLMatcherConditionSet exactly once if it can be
8555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // completely fulfilled.
8562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    StringPatternTriggers::const_iterator triggered_condition_sets_iter =
8572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        substring_match_triggers_.find(*i);
8582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (triggered_condition_sets_iter == substring_match_triggers_.end())
8592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      continue;  // Not all substring matches are triggers for a condition set.
8602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const std::set<URLMatcherConditionSet::ID>& condition_sets =
8612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        triggered_condition_sets_iter->second;
8625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (std::set<URLMatcherConditionSet::ID>::const_iterator j =
8635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         condition_sets.begin(); j != condition_sets.end(); ++j) {
8642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      URLMatcherConditionSets::const_iterator condition_set_iter =
8652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          url_matcher_condition_sets_.find(*j);
8662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      DCHECK(condition_set_iter != url_matcher_condition_sets_.end());
867a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      if (condition_set_iter->second->IsMatch(
868a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch              matches, url, url_for_component_searches))
8695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        result.insert(*j);
8705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
8715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
8725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return result;
8745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool URLMatcher::IsEmpty() const {
8775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return condition_factory_.IsEmpty() &&
8785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url_matcher_condition_sets_.empty() &&
8795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      substring_match_triggers_.empty() &&
8805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      full_url_matcher_.IsEmpty() &&
8815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url_component_matcher_.IsEmpty() &&
882c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      regex_set_matcher_.IsEmpty() &&
883c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      origin_and_path_regex_set_matcher_.IsEmpty() &&
8845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      registered_full_url_patterns_.empty() &&
8855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      registered_url_component_patterns_.empty();
8865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
8875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) {
8895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The purpose of |full_url_conditions| is just that we need to execute
8905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the same logic once for Full URL searches and once for URL Component
8915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // searches (see URLMatcherConditionFactory).
8925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
8935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Determine which patterns need to be registered when this function
8945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // terminates.
8955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::set<const StringPattern*> new_patterns;
8965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (URLMatcherConditionSets::const_iterator condition_set_iter =
8975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url_matcher_condition_sets_.begin();
8985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      condition_set_iter != url_matcher_condition_sets_.end();
8995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++condition_set_iter) {
9005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const URLMatcherConditionSet::Conditions& conditions =
9015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        condition_set_iter->second->conditions();
9025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
9035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         conditions.begin(); condition_iter != conditions.end();
9045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++condition_iter) {
9055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // If we are called to process Full URL searches, ignore others, and
9065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // vice versa. (Regex conditions are updated in UpdateRegexSetMatcher.)
9075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!condition_iter->IsRegexCondition() &&
908c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          !condition_iter->IsOriginAndPathRegexCondition() &&
9095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          full_url_conditions == condition_iter->IsFullURLCondition())
9105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        new_patterns.insert(condition_iter->string_pattern());
9115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
912a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
913a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    if (full_url_conditions)
914a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      continue;
915a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
916a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    const URLMatcherConditionSet::QueryConditions& query_conditions =
917a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch        condition_set_iter->second->query_conditions();
918a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    for (URLMatcherConditionSet::QueryConditions::const_iterator
919a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch             query_condition_iter = query_conditions.begin();
920a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch         query_condition_iter != query_conditions.end();
921a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch         ++query_condition_iter) {
922a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      new_patterns.insert(query_condition_iter->string_pattern());
923a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    }
9245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This is the set of patterns that were registered before this function
9275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // is called.
9285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::set<const StringPattern*>& registered_patterns =
9295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      full_url_conditions ? registered_full_url_patterns_
9305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                          : registered_url_component_patterns_;
9315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Add all patterns that are in new_patterns but not in registered_patterns.
9333551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  std::vector<const StringPattern*> patterns_to_register =
9343551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)      base::STLSetDifference<std::vector<const StringPattern*> >(
9353551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)          new_patterns, registered_patterns);
9365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Remove all patterns that are in registered_patterns but not in
9385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // new_patterns.
9393551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  std::vector<const StringPattern*> patterns_to_unregister =
9403551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)      base::STLSetDifference<std::vector<const StringPattern*> >(
9413551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)           registered_patterns, new_patterns);
9425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Update the SubstringSetMatcher.
9445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SubstringSetMatcher& url_matcher =
9455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      full_url_conditions ? full_url_matcher_ : url_component_matcher_;
9465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  url_matcher.RegisterAndUnregisterPatterns(patterns_to_register,
9475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                            patterns_to_unregister);
9485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Update the set of registered_patterns for the next time this function
9505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // is being called.
9515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  registered_patterns.swap(new_patterns);
9525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::UpdateRegexSetMatcher() {
9555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<const StringPattern*> new_patterns;
956c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  std::vector<const StringPattern*> new_origin_and_path_patterns;
9575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (URLMatcherConditionSets::const_iterator condition_set_iter =
9595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url_matcher_condition_sets_.begin();
9605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      condition_set_iter != url_matcher_condition_sets_.end();
9615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++condition_set_iter) {
9625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const URLMatcherConditionSet::Conditions& conditions =
9635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        condition_set_iter->second->conditions();
9645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
9655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         conditions.begin(); condition_iter != conditions.end();
9665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++condition_iter) {
967c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if (condition_iter->IsRegexCondition()) {
9685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        new_patterns.push_back(condition_iter->string_pattern());
969c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      } else if (condition_iter->IsOriginAndPathRegexCondition()) {
970c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        new_origin_and_path_patterns.push_back(
971c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)            condition_iter->string_pattern());
972c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      }
9735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
9745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
9755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Start over from scratch. We can't really do better than this, since the
9775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // FilteredRE2 backend doesn't support incremental updates.
9785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  regex_set_matcher_.ClearPatterns();
9795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  regex_set_matcher_.AddPatterns(new_patterns);
980c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  origin_and_path_regex_set_matcher_.ClearPatterns();
981c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  origin_and_path_regex_set_matcher_.AddPatterns(new_origin_and_path_patterns);
9825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
9835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
9845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::UpdateTriggers() {
9855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Count substring pattern frequencies.
9865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::map<StringPattern::ID, size_t> substring_pattern_frequencies;
9875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (URLMatcherConditionSets::const_iterator condition_set_iter =
9885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url_matcher_condition_sets_.begin();
9895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      condition_set_iter != url_matcher_condition_sets_.end();
9905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++condition_set_iter) {
9915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const URLMatcherConditionSet::Conditions& conditions =
9925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        condition_set_iter->second->conditions();
9935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
9945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         conditions.begin(); condition_iter != conditions.end();
9955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++condition_iter) {
9965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const StringPattern* pattern = condition_iter->string_pattern();
9975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      substring_pattern_frequencies[pattern->id()]++;
9985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
999a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
1000a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    const URLMatcherConditionSet::QueryConditions& query_conditions =
1001a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch        condition_set_iter->second->query_conditions();
1002a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    for (URLMatcherConditionSet::QueryConditions::const_iterator
1003a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch             query_condition_iter = query_conditions.begin();
1004a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch         query_condition_iter != query_conditions.end();
1005a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch         ++query_condition_iter) {
1006a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      const StringPattern* pattern = query_condition_iter->string_pattern();
1007a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      substring_pattern_frequencies[pattern->id()]++;
1008a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    }
10095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
10105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Update trigger conditions: Determine for each URLMatcherConditionSet which
10125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // URLMatcherCondition contains a StringPattern that occurs least
10135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // frequently in this URLMatcher. We assume that this condition is very
10145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // specific and occurs rarely in URLs. If a match occurs for this
10155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // URLMatcherCondition, we want to test all other URLMatcherCondition in the
10165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // respective URLMatcherConditionSet as well to see whether the entire
10175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // URLMatcherConditionSet is considered matching.
10185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  substring_match_triggers_.clear();
10195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (URLMatcherConditionSets::const_iterator condition_set_iter =
10205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url_matcher_condition_sets_.begin();
10215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      condition_set_iter != url_matcher_condition_sets_.end();
10225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++condition_set_iter) {
10235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const URLMatcherConditionSet::Conditions& conditions =
10245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        condition_set_iter->second->conditions();
10255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (conditions.empty())
10265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      continue;
10275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    URLMatcherConditionSet::Conditions::const_iterator condition_iter =
10285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        conditions.begin();
10295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    StringPattern::ID trigger = condition_iter->string_pattern()->id();
10305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We skip the first element in the following loop.
10315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++condition_iter;
10325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (; condition_iter != conditions.end(); ++condition_iter) {
10335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      StringPattern::ID current_id =
10345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          condition_iter->string_pattern()->id();
10355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (substring_pattern_frequencies[trigger] >
10365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          substring_pattern_frequencies[current_id]) {
10375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        trigger = current_id;
10385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
10395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1040a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
1041a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    const URLMatcherConditionSet::QueryConditions& query_conditions =
1042a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch        condition_set_iter->second->query_conditions();
1043a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    for (URLMatcherConditionSet::QueryConditions::const_iterator
1044a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch             query_condition_iter = query_conditions.begin();
1045a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch         query_condition_iter != query_conditions.end();
1046a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch         ++query_condition_iter) {
1047a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      StringPattern::ID current_id =
1048a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch          query_condition_iter->string_pattern()->id();
1049a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      if (substring_pattern_frequencies[trigger] >
1050a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch          substring_pattern_frequencies[current_id]) {
1051a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch        trigger = current_id;
1052a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      }
1053a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    }
1054a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
10555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    substring_match_triggers_[trigger].insert(condition_set_iter->second->id());
10565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
10575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
10585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::UpdateConditionFactory() {
10605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::set<StringPattern::ID> used_patterns;
10615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (URLMatcherConditionSets::const_iterator condition_set_iter =
10625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url_matcher_condition_sets_.begin();
10635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      condition_set_iter != url_matcher_condition_sets_.end();
10645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++condition_set_iter) {
10655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const URLMatcherConditionSet::Conditions& conditions =
10665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        condition_set_iter->second->conditions();
10675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
10685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         conditions.begin(); condition_iter != conditions.end();
10695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++condition_iter) {
10705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      used_patterns.insert(condition_iter->string_pattern()->id());
10715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1072a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    const URLMatcherConditionSet::QueryConditions& query_conditions =
1073a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch        condition_set_iter->second->query_conditions();
1074a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    for (URLMatcherConditionSet::QueryConditions::const_iterator
1075a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch             query_condition_iter = query_conditions.begin();
1076a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch         query_condition_iter != query_conditions.end();
1077a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch         ++query_condition_iter) {
1078a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      used_patterns.insert(query_condition_iter->string_pattern()->id());
1079a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    }
10805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
10815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  condition_factory_.ForgetUnusedPatterns(used_patterns);
10825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
10835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void URLMatcher::UpdateInternalDatastructures() {
10851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // TODO(vadimt): Remove ScopedProfile below once crbug.com/417106 is fixed.
10861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  tracked_objects::ScopedProfile tracking_profile(
10871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      FROM_HERE_WITH_EXPLICIT_FUNCTION(
10881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          "URLMatcher_UpdateInternalDatastructures"));
10895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UpdateSubstringSetMatcher(false);
10905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UpdateSubstringSetMatcher(true);
10915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UpdateRegexSetMatcher();
10925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UpdateTriggers();
10935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UpdateConditionFactory();
10945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
10955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
10965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}  // namespace url_matcher
1097