1/*
2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "platform/URLPatternMatcher.h"
28
29#include "platform/weborigin/KURL.h"
30#include "wtf/StdLibExtras.h"
31
32namespace blink {
33
34bool URLPatternMatcher::matchesPatterns(const KURL& url, const Vector<String>& whitelist)
35{
36    // If there is no whitelist at all, then all URLs are assumed to be in the whitelist.
37    if (whitelist.isEmpty())
38        return true;
39
40    for (size_t i = 0; i < whitelist.size(); ++i) {
41        URLPatternMatcher contentPattern(whitelist[i]);
42        if (contentPattern.matches(url))
43            return true;
44    }
45
46    return false;
47}
48
49bool URLPatternMatcher::parse(const String& pattern)
50{
51    DEFINE_STATIC_LOCAL(const String, schemeSeparator, ("://"));
52
53    size_t schemeEndPos = pattern.find(schemeSeparator);
54    if (schemeEndPos == kNotFound)
55        return false;
56
57    m_scheme = pattern.left(schemeEndPos);
58
59    unsigned hostStartPos = schemeEndPos + schemeSeparator.length();
60    if (hostStartPos >= pattern.length())
61        return false;
62
63    int pathStartPos = 0;
64
65    if (equalIgnoringCase(m_scheme, "file")) {
66        pathStartPos = hostStartPos;
67    } else {
68        size_t hostEndPos = pattern.find("/", hostStartPos);
69        if (hostEndPos == kNotFound)
70            return false;
71
72        m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos);
73        m_matchSubdomains = false;
74
75        if (m_host == "*") {
76            // The pattern can be just '*', which means match all domains.
77            m_host = "";
78            m_matchSubdomains = true;
79        } else if (m_host.startsWith("*.")) {
80            // The first component can be '*', which means to match all subdomains.
81            m_host = m_host.substring(2); // Length of "*."
82            m_matchSubdomains = true;
83        }
84
85        // No other '*' can occur in the host.
86        if (m_host.find("*") != kNotFound)
87            return false;
88
89        pathStartPos = hostEndPos;
90    }
91
92    m_path = pattern.right(pattern.length() - pathStartPos);
93
94    return true;
95}
96
97bool URLPatternMatcher::matches(const KURL& test) const
98{
99    if (m_invalid)
100        return false;
101
102    if (!equalIgnoringCase(test.protocol(), m_scheme))
103        return false;
104
105    if (!equalIgnoringCase(m_scheme, "file") && !matchesHost(test))
106        return false;
107
108    return matchesPath(test);
109}
110
111bool URLPatternMatcher::matchesHost(const KURL& test) const
112{
113    const String& host = test.host();
114    if (equalIgnoringCase(host, m_host))
115        return true;
116
117    if (!m_matchSubdomains)
118        return false;
119
120    // If we're matching subdomains, and we have no host, that means the pattern
121    // was <scheme>://*/<whatever>, so we match anything.
122    if (!m_host.length())
123        return true;
124
125    // Check if the domain is a subdomain of our host.
126    if (!host.endsWith(m_host, false))
127        return false;
128
129    ASSERT(host.length() > m_host.length());
130
131    // Check that the character before the suffix is a period.
132    return host[host.length() - m_host.length() - 1] == '.';
133}
134
135struct MatchTester {
136    const String m_pattern;
137    unsigned m_patternIndex;
138
139    const String m_test;
140    unsigned m_testIndex;
141
142    MatchTester(const String& pattern, const String& test)
143    : m_pattern(pattern)
144    , m_patternIndex(0)
145    , m_test(test)
146    , m_testIndex(0)
147    {
148    }
149
150    bool testStringFinished() const { return m_testIndex >= m_test.length(); }
151    bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); }
152
153    void eatWildcard()
154    {
155        while (!patternStringFinished()) {
156            if (m_pattern[m_patternIndex] != '*')
157                return;
158            m_patternIndex++;
159        }
160    }
161
162    void eatSameChars()
163    {
164        while (!patternStringFinished() && !testStringFinished()) {
165            if (m_pattern[m_patternIndex] == '*')
166                return;
167            if (m_pattern[m_patternIndex] != m_test[m_testIndex])
168                return;
169            m_patternIndex++;
170            m_testIndex++;
171        }
172    }
173
174    bool test()
175    {
176        // Eat all the matching chars.
177        eatSameChars();
178
179        // If the string is finished, then the pattern must be empty too, or contains
180        // only wildcards.
181        if (testStringFinished()) {
182            eatWildcard();
183            if (patternStringFinished())
184                return true;
185            return false;
186        }
187
188        // Pattern is empty but not string, this is not a match.
189        if (patternStringFinished())
190            return false;
191
192        // If we don't encounter a *, then we're hosed.
193        if (m_pattern[m_patternIndex] != '*')
194            return false;
195
196        while (!testStringFinished()) {
197            MatchTester nextMatch(*this);
198            nextMatch.m_patternIndex++;
199            if (nextMatch.test())
200                return true;
201            m_testIndex++;
202        }
203
204        // We reached the end of the string. Let's see if the pattern contains only wildcards.
205        eatWildcard();
206        return patternStringFinished();
207    }
208};
209
210bool URLPatternMatcher::matchesPath(const KURL& test) const
211{
212    MatchTester match(m_path, test.path());
213    return match.test();
214}
215
216} // namespace blink
217