1/*
2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2008 Collabora Ltd.
4 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "config.h"
29#include "RegularExpression.h"
30
31#include <wtf/BumpPointerAllocator.h>
32#include <yarr/Yarr.h>
33#include "Logging.h"
34
35namespace WebCore {
36
37class RegularExpression::Private : public RefCounted<RegularExpression::Private> {
38public:
39    static PassRefPtr<Private> create(const String& pattern, TextCaseSensitivity caseSensitivity)
40    {
41        return adoptRef(new Private(pattern, caseSensitivity));
42    }
43
44    int lastMatchLength;
45
46    unsigned m_numSubpatterns;
47    OwnPtr<JSC::Yarr::BytecodePattern> m_regExpByteCode;
48
49private:
50    Private(const String& pattern, TextCaseSensitivity caseSensitivity)
51        : lastMatchLength(-1)
52        , m_regExpByteCode(compile(pattern, caseSensitivity))
53        , m_constructionError(0)
54    {
55    }
56
57    PassOwnPtr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity)
58    {
59        JSC::Yarr::YarrPattern pattern(JSC::UString(patternString.impl()), (caseSensitivity == TextCaseInsensitive), false, &m_constructionError);
60        if (m_constructionError) {
61            LOG_ERROR("RegularExpression: YARR compile failed with '%s'", m_constructionError);
62            return PassOwnPtr<JSC::Yarr::BytecodePattern>();
63        }
64
65        m_numSubpatterns = pattern.m_numSubpatterns;
66
67        return JSC::Yarr::byteCompile(pattern, &m_regexAllocator);
68    }
69
70    BumpPointerAllocator m_regexAllocator;
71    const char* m_constructionError;
72};
73
74RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity)
75    : d(Private::create(pattern, caseSensitivity))
76{
77}
78
79RegularExpression::RegularExpression(const RegularExpression& re)
80    : d(re.d)
81{
82}
83
84RegularExpression::~RegularExpression()
85{
86}
87
88RegularExpression& RegularExpression::operator=(const RegularExpression& re)
89{
90    d = re.d;
91    return *this;
92}
93
94int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
95{
96    if (!d->m_regExpByteCode)
97        return -1;
98
99    if (str.isNull())
100        return -1;
101
102    int offsetVectorSize = (d->m_numSubpatterns + 1) * 2;
103    int* offsetVector;
104    Vector<int, 32> nonReturnedOvector;
105
106    nonReturnedOvector.resize(offsetVectorSize);
107    offsetVector = nonReturnedOvector.data();
108
109    ASSERT(offsetVector);
110    for (unsigned j = 0, i = 0; i < d->m_numSubpatterns + 1; j += 2, i++)
111        offsetVector[j] = -1;
112
113    int result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), str.characters(), startFrom, str.length(), offsetVector);
114    ASSERT(result >= -1);
115
116    if (result < 0) {
117        d->lastMatchLength = -1;
118        return -1;
119    }
120
121    // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector.
122    d->lastMatchLength = offsetVector[1] - offsetVector[0];
123    if (matchLength)
124        *matchLength = d->lastMatchLength;
125    return offsetVector[0];
126}
127
128int RegularExpression::searchRev(const String& str) const
129{
130    // FIXME: This could be faster if it actually searched backwards.
131    // Instead, it just searches forwards, multiple times until it finds the last match.
132
133    int start = 0;
134    int pos;
135    int lastPos = -1;
136    int lastMatchLength = -1;
137    do {
138        int matchLength;
139        pos = match(str, start, &matchLength);
140        if (pos >= 0) {
141            if (pos + matchLength > lastPos + lastMatchLength) {
142                // replace last match if this one is later and not a subset of the last match
143                lastPos = pos;
144                lastMatchLength = matchLength;
145            }
146            start = pos + 1;
147        }
148    } while (pos != -1);
149    d->lastMatchLength = lastMatchLength;
150    return lastPos;
151}
152
153int RegularExpression::matchedLength() const
154{
155    return d->lastMatchLength;
156}
157
158void replace(String& string, const RegularExpression& target, const String& replacement)
159{
160    int index = 0;
161    while (index < static_cast<int>(string.length())) {
162        int matchLength;
163        index = target.match(string, index, &matchLength);
164        if (index < 0)
165            break;
166        string.replace(index, matchLength, replacement);
167        index += replacement.length();
168        if (!matchLength)
169            break;  // Avoid infinite loop on 0-length matches, e.g. [a-z]*
170    }
171}
172
173} // namespace WebCore
174